aboutsummaryrefslogtreecommitdiff
path: root/deps/lightening
diff options
context:
space:
mode:
Diffstat (limited to 'deps/lightening')
-rw-r--r--deps/lightening/.gitignore4
-rw-r--r--deps/lightening/.gitlab-ci.yml71
-rw-r--r--deps/lightening/AUTHORS14
-rw-r--r--deps/lightening/COPYING676
-rw-r--r--deps/lightening/COPYING.DOC355
-rw-r--r--deps/lightening/COPYING.LESSER165
-rw-r--r--deps/lightening/ChangeLog17
-rw-r--r--deps/lightening/ChangeLog.lightning4018
-rw-r--r--deps/lightening/NEWS199
-rw-r--r--deps/lightening/README.md57
-rw-r--r--deps/lightening/THANKS19
-rw-r--r--deps/lightening/lightening.am58
-rw-r--r--deps/lightening/lightening.h715
-rw-r--r--deps/lightening/lightening/aarch64-cpu.c2584
-rw-r--r--deps/lightening/lightening/aarch64-fpu.c810
-rw-r--r--deps/lightening/lightening/aarch64.c235
-rw-r--r--deps/lightening/lightening/aarch64.h170
-rw-r--r--deps/lightening/lightening/arm-cpu.c3116
-rw-r--r--deps/lightening/lightening/arm-vfp.c1168
-rw-r--r--deps/lightening/lightening/arm.c148
-rw-r--r--deps/lightening/lightening/arm.h135
-rw-r--r--deps/lightening/lightening/endian.h95
-rw-r--r--deps/lightening/lightening/lightening.c1762
-rw-r--r--deps/lightening/lightening/mips-cpu.c2674
-rw-r--r--deps/lightening/lightening/mips-fpu.c1015
-rw-r--r--deps/lightening/lightening/mips.c282
-rw-r--r--deps/lightening/lightening/mips.h203
-rw-r--r--deps/lightening/lightening/ppc-cpu.c3136
-rw-r--r--deps/lightening/lightening/ppc-fpu.c935
-rw-r--r--deps/lightening/lightening/ppc.c476
-rw-r--r--deps/lightening/lightening/ppc.h193
-rw-r--r--deps/lightening/lightening/s390-cpu.c3848
-rw-r--r--deps/lightening/lightening/s390-fpu.c1316
-rw-r--r--deps/lightening/lightening/s390.c1691
-rw-r--r--deps/lightening/lightening/s390.h68
-rw-r--r--deps/lightening/lightening/x86-cpu.c2789
-rw-r--r--deps/lightening/lightening/x86-sse.c1016
-rw-r--r--deps/lightening/lightening/x86.c407
-rw-r--r--deps/lightening/lightening/x86.h163
-rw-r--r--deps/lightening/lightning.texi1760
-rw-r--r--deps/lightening/tests/Makefile87
-rw-r--r--deps/lightening/tests/absr_d.c26
-rw-r--r--deps/lightening/tests/absr_f.c26
-rw-r--r--deps/lightening/tests/addi.c25
-rw-r--r--deps/lightening/tests/addr.c26
-rw-r--r--deps/lightening/tests/addr_d.c27
-rw-r--r--deps/lightening/tests/addr_f.c27
-rw-r--r--deps/lightening/tests/addx.c63
-rw-r--r--deps/lightening/tests/andi.c31
-rw-r--r--deps/lightening/tests/andr.c48
-rw-r--r--deps/lightening/tests/beqi.c32
-rw-r--r--deps/lightening/tests/beqr.c36
-rw-r--r--deps/lightening/tests/beqr_d.c35
-rw-r--r--deps/lightening/tests/beqr_f.c35
-rw-r--r--deps/lightening/tests/bgei.c32
-rw-r--r--deps/lightening/tests/bgei_u.c32
-rw-r--r--deps/lightening/tests/bger.c34
-rw-r--r--deps/lightening/tests/bger_d.c34
-rw-r--r--deps/lightening/tests/bger_f.c34
-rw-r--r--deps/lightening/tests/bger_u.c35
-rw-r--r--deps/lightening/tests/bgti.c33
-rw-r--r--deps/lightening/tests/bgti_u.c31
-rw-r--r--deps/lightening/tests/bgtr.c34
-rw-r--r--deps/lightening/tests/bgtr_d.c34
-rw-r--r--deps/lightening/tests/bgtr_f.c34
-rw-r--r--deps/lightening/tests/bgtr_u.c34
-rw-r--r--deps/lightening/tests/blei.c31
-rw-r--r--deps/lightening/tests/blei_u.c31
-rw-r--r--deps/lightening/tests/bler.c34
-rw-r--r--deps/lightening/tests/bler_d.c34
-rw-r--r--deps/lightening/tests/bler_f.c34
-rw-r--r--deps/lightening/tests/bler_u.c34
-rw-r--r--deps/lightening/tests/bltgtr_d.c36
-rw-r--r--deps/lightening/tests/bltgtr_f.c36
-rw-r--r--deps/lightening/tests/blti.c31
-rw-r--r--deps/lightening/tests/blti_u.c31
-rw-r--r--deps/lightening/tests/bltr.c34
-rw-r--r--deps/lightening/tests/bltr_d.c34
-rw-r--r--deps/lightening/tests/bltr_f.c34
-rw-r--r--deps/lightening/tests/bltr_u.c34
-rw-r--r--deps/lightening/tests/bmci.c36
-rw-r--r--deps/lightening/tests/bmcr.c38
-rw-r--r--deps/lightening/tests/bmsi.c36
-rw-r--r--deps/lightening/tests/bmsr.c38
-rw-r--r--deps/lightening/tests/bnei.c31
-rw-r--r--deps/lightening/tests/bner.c34
-rw-r--r--deps/lightening/tests/bner_d.c36
-rw-r--r--deps/lightening/tests/bner_f.c36
-rw-r--r--deps/lightening/tests/boaddi.c41
-rw-r--r--deps/lightening/tests/boaddi_u.c41
-rw-r--r--deps/lightening/tests/boaddr.c51
-rw-r--r--deps/lightening/tests/boaddr_u.c51
-rw-r--r--deps/lightening/tests/bordr_d.c36
-rw-r--r--deps/lightening/tests/bordr_f.c36
-rw-r--r--deps/lightening/tests/bosubi.c41
-rw-r--r--deps/lightening/tests/bosubi_u.c37
-rw-r--r--deps/lightening/tests/bosubr.c48
-rw-r--r--deps/lightening/tests/bosubr_u.c47
-rw-r--r--deps/lightening/tests/bswapr_ui.c28
-rw-r--r--deps/lightening/tests/bswapr_ul.c27
-rw-r--r--deps/lightening/tests/bswapr_us.c24
-rw-r--r--deps/lightening/tests/buneqr_d.c35
-rw-r--r--deps/lightening/tests/buneqr_f.c35
-rw-r--r--deps/lightening/tests/bunger_d.c34
-rw-r--r--deps/lightening/tests/bunger_f.c34
-rw-r--r--deps/lightening/tests/bungtr_d.c34
-rw-r--r--deps/lightening/tests/bungtr_f.c34
-rw-r--r--deps/lightening/tests/bunler_d.c34
-rw-r--r--deps/lightening/tests/bunler_f.c34
-rw-r--r--deps/lightening/tests/bunltr_d.c34
-rw-r--r--deps/lightening/tests/bunltr_f.c34
-rw-r--r--deps/lightening/tests/bunordr_d.c36
-rw-r--r--deps/lightening/tests/bunordr_f.c36
-rw-r--r--deps/lightening/tests/bxaddi.c39
-rw-r--r--deps/lightening/tests/bxaddi_u.c39
-rw-r--r--deps/lightening/tests/bxaddr.c49
-rw-r--r--deps/lightening/tests/bxaddr_u.c49
-rw-r--r--deps/lightening/tests/bxsubi.c39
-rw-r--r--deps/lightening/tests/bxsubi_u.c35
-rw-r--r--deps/lightening/tests/bxsubr.c46
-rw-r--r--deps/lightening/tests/bxsubr_u.c45
-rw-r--r--deps/lightening/tests/call_10.c54
-rw-r--r--deps/lightening/tests/call_double.c38
-rw-r--r--deps/lightening/tests/call_float.c38
-rw-r--r--deps/lightening/tests/callee_9.c68
-rw-r--r--deps/lightening/tests/cas_atomic.c33
-rw-r--r--deps/lightening/tests/comr.c41
-rw-r--r--deps/lightening/tests/divr.c60
-rw-r--r--deps/lightening/tests/divr_d.c27
-rw-r--r--deps/lightening/tests/divr_f.c27
-rw-r--r--deps/lightening/tests/divr_u.c55
-rw-r--r--deps/lightening/tests/extr_c.c27
-rw-r--r--deps/lightening/tests/extr_d.c25
-rw-r--r--deps/lightening/tests/extr_d_f.c26
-rw-r--r--deps/lightening/tests/extr_f.c25
-rw-r--r--deps/lightening/tests/extr_f_d.c26
-rw-r--r--deps/lightening/tests/extr_i.c30
-rw-r--r--deps/lightening/tests/extr_s.c28
-rw-r--r--deps/lightening/tests/extr_uc.c27
-rw-r--r--deps/lightening/tests/extr_ui.c29
-rw-r--r--deps/lightening/tests/extr_us.c27
-rw-r--r--deps/lightening/tests/jmp0.c24
-rw-r--r--deps/lightening/tests/jmp_table.c61
-rw-r--r--deps/lightening/tests/jmpi.c41
-rw-r--r--deps/lightening/tests/jmpi_local.c25
-rw-r--r--deps/lightening/tests/jmpr.c23
-rw-r--r--deps/lightening/tests/ldi_c.c24
-rw-r--r--deps/lightening/tests/ldi_d.c24
-rw-r--r--deps/lightening/tests/ldi_f.c24
-rw-r--r--deps/lightening/tests/ldi_i.c24
-rw-r--r--deps/lightening/tests/ldi_l.c26
-rw-r--r--deps/lightening/tests/ldi_s.c24
-rw-r--r--deps/lightening/tests/ldi_uc.c24
-rw-r--r--deps/lightening/tests/ldi_ui.c26
-rw-r--r--deps/lightening/tests/ldi_us.c24
-rw-r--r--deps/lightening/tests/ldr_atomic.c28
-rw-r--r--deps/lightening/tests/ldr_c.c27
-rw-r--r--deps/lightening/tests/ldr_d.c27
-rw-r--r--deps/lightening/tests/ldr_f.c27
-rw-r--r--deps/lightening/tests/ldr_i.c27
-rw-r--r--deps/lightening/tests/ldr_l.c29
-rw-r--r--deps/lightening/tests/ldr_s.c27
-rw-r--r--deps/lightening/tests/ldr_uc.c27
-rw-r--r--deps/lightening/tests/ldr_ui.c29
-rw-r--r--deps/lightening/tests/ldr_us.c27
-rw-r--r--deps/lightening/tests/ldxi_c.c27
-rw-r--r--deps/lightening/tests/ldxi_d.c27
-rw-r--r--deps/lightening/tests/ldxi_f.c27
-rw-r--r--deps/lightening/tests/ldxi_i.c27
-rw-r--r--deps/lightening/tests/ldxi_l.c29
-rw-r--r--deps/lightening/tests/ldxi_s.c27
-rw-r--r--deps/lightening/tests/ldxi_uc.c27
-rw-r--r--deps/lightening/tests/ldxi_ui.c29
-rw-r--r--deps/lightening/tests/ldxi_us.c27
-rw-r--r--deps/lightening/tests/ldxr_c.c28
-rw-r--r--deps/lightening/tests/ldxr_d.c28
-rw-r--r--deps/lightening/tests/ldxr_f.c28
-rw-r--r--deps/lightening/tests/ldxr_i.c28
-rw-r--r--deps/lightening/tests/ldxr_l.c30
-rw-r--r--deps/lightening/tests/ldxr_s.c28
-rw-r--r--deps/lightening/tests/ldxr_uc.c28
-rw-r--r--deps/lightening/tests/ldxr_ui.c30
-rw-r--r--deps/lightening/tests/ldxr_us.c28
-rw-r--r--deps/lightening/tests/link-register.c35
-rw-r--r--deps/lightening/tests/lshi.c27
-rw-r--r--deps/lightening/tests/lshr.c69
-rw-r--r--deps/lightening/tests/mov_addr.c25
-rw-r--r--deps/lightening/tests/movi.c22
-rw-r--r--deps/lightening/tests/movi_d.c22
-rw-r--r--deps/lightening/tests/movi_f.c22
-rw-r--r--deps/lightening/tests/mulr.c64
-rw-r--r--deps/lightening/tests/mulr_d.c27
-rw-r--r--deps/lightening/tests/mulr_f.c27
-rw-r--r--deps/lightening/tests/negr.c39
-rw-r--r--deps/lightening/tests/negr_d.c26
-rw-r--r--deps/lightening/tests/negr_f.c26
-rw-r--r--deps/lightening/tests/ori.c31
-rw-r--r--deps/lightening/tests/orr.c48
-rw-r--r--deps/lightening/tests/qdivr.c44
-rw-r--r--deps/lightening/tests/qdivr_u.c42
-rw-r--r--deps/lightening/tests/qmulr.c58
-rw-r--r--deps/lightening/tests/qmulr_u.c46
-rw-r--r--deps/lightening/tests/remr.c60
-rw-r--r--deps/lightening/tests/remr_u.c56
-rw-r--r--deps/lightening/tests/rshi.c28
-rw-r--r--deps/lightening/tests/rshi_u.c28
-rw-r--r--deps/lightening/tests/rshr.c63
-rw-r--r--deps/lightening/tests/rshr_u.c62
-rw-r--r--deps/lightening/tests/sqrtr_d.c25
-rw-r--r--deps/lightening/tests/sqrtr_f.c25
-rw-r--r--deps/lightening/tests/sti_c.c31
-rw-r--r--deps/lightening/tests/sti_d.c31
-rw-r--r--deps/lightening/tests/sti_f.c31
-rw-r--r--deps/lightening/tests/sti_i.c31
-rw-r--r--deps/lightening/tests/sti_l.c33
-rw-r--r--deps/lightening/tests/sti_s.c31
-rw-r--r--deps/lightening/tests/str_atomic.c32
-rw-r--r--deps/lightening/tests/str_c.c32
-rw-r--r--deps/lightening/tests/str_d.c32
-rw-r--r--deps/lightening/tests/str_f.c32
-rw-r--r--deps/lightening/tests/str_i.c32
-rw-r--r--deps/lightening/tests/str_l.c34
-rw-r--r--deps/lightening/tests/str_s.c32
-rw-r--r--deps/lightening/tests/stxi_c.c32
-rw-r--r--deps/lightening/tests/stxi_d.c32
-rw-r--r--deps/lightening/tests/stxi_f.c32
-rw-r--r--deps/lightening/tests/stxi_i.c32
-rw-r--r--deps/lightening/tests/stxi_l.c34
-rw-r--r--deps/lightening/tests/stxi_s.c32
-rw-r--r--deps/lightening/tests/stxr_c.c33
-rw-r--r--deps/lightening/tests/stxr_d.c33
-rw-r--r--deps/lightening/tests/stxr_f.c33
-rw-r--r--deps/lightening/tests/stxr_i.c33
-rw-r--r--deps/lightening/tests/stxr_l.c35
-rw-r--r--deps/lightening/tests/stxr_s.c33
-rw-r--r--deps/lightening/tests/subr.c26
-rw-r--r--deps/lightening/tests/subr_d.c27
-rw-r--r--deps/lightening/tests/subr_f.c27
-rw-r--r--deps/lightening/tests/subx.c63
-rw-r--r--deps/lightening/tests/swap_atomic.c32
-rw-r--r--deps/lightening/tests/test.h79
-rw-r--r--deps/lightening/tests/truncr_d_i.c30
-rw-r--r--deps/lightening/tests/truncr_d_l.c32
-rw-r--r--deps/lightening/tests/truncr_f_i.c30
-rw-r--r--deps/lightening/tests/truncr_f_l.c32
-rw-r--r--deps/lightening/tests/xori.c31
-rw-r--r--deps/lightening/tests/xorr.c48
-rw-r--r--deps/lightening/tests/z_atomic.c88
-rw-r--r--deps/lightening/tests/z_bp.c61
-rw-r--r--deps/lightening/tests/z_branch.c584
-rw-r--r--deps/lightening/tests/z_call.c307
-rw-r--r--deps/lightening/tests/z_ccall.c1000
-rw-r--r--deps/lightening/tests/z_clobber.c1145
-rw-r--r--deps/lightening/tests/z_range.c577
-rw-r--r--deps/lightening/tests/z_ranger.c580
-rw-r--r--deps/lightening/tests/z_stack.c374
256 files changed, 50415 insertions, 0 deletions
diff --git a/deps/lightening/.gitignore b/deps/lightening/.gitignore
new file mode 100644
index 0000000..d2a82cf
--- /dev/null
+++ b/deps/lightening/.gitignore
@@ -0,0 +1,4 @@
+*.o
++*
+/lightning.info
+/tests/test-*
diff --git a/deps/lightening/.gitlab-ci.yml b/deps/lightening/.gitlab-ci.yml
new file mode 100644
index 0000000..955a8c8
--- /dev/null
+++ b/deps/lightening/.gitlab-ci.yml
@@ -0,0 +1,71 @@
+image: debian:stable
+
+before_script:
+ - apt-get update -qq
+ - apt-get install -y
+ make qemu binfmt-support qemu-user-static qemu-user
+ - update-binfmts --enable
+
+x86-64:
+ stage: test
+ script:
+ - dpkg --add-architecture amd64
+ - apt-get update -qq
+ - apt-get install -y libc6-dev:amd64 gcc
+ - make -C tests test-native
+
+i686:
+ stage: test
+ script:
+ - dpkg --add-architecture i386
+ - apt-get update -qq
+ - apt-get install -y gcc-i686-linux-gnu libc6-dev-i386-cross libc6:i386
+ - make -C tests test-ia32 CC_IA32=i686-linux-gnu-gcc
+
+aarch64:
+ stage: test
+ script:
+ - dpkg --add-architecture arm64
+ - apt-get update -qq
+ - apt-get install -y gcc-aarch64-linux-gnu libc6-dev-arm64-cross libc6:arm64
+ - make -C tests test-aarch64 CC_AARCH64=aarch64-linux-gnu-gcc
+
+armhf:
+ stage: test
+ script:
+ - dpkg --add-architecture armhf
+ - apt-get update -qq
+ - apt-get install -y gcc-arm-linux-gnueabihf libc6-dev-armhf-cross libc6:armhf
+ - make -C tests test-armv7 CC_ARMv7="arm-linux-gnueabihf-gcc -marm"
+
+armhf-thumb:
+ stage: test
+ script:
+ - dpkg --add-architecture armhf
+ - apt-get update -qq
+ - apt-get install -y gcc-arm-linux-gnueabihf libc6-dev-armhf-cross libc6:armhf
+ - make -C tests test-armv7 CC_ARMv7="arm-linux-gnueabihf-gcc -mthumb"
+
+mipsel:
+ stage: test
+ script:
+ - dpkg --add-architecture mipsel
+ - apt-get update -qq
+ - apt-get install -y gcc-mipsel-linux-gnu libc6-dev-mipsel-cross libc6:mipsel
+ - make -C tests test-mipsel CC_MIPSEL="mipsel-linux-gnu-gcc"
+
+mips64el:
+ stage: test
+ script:
+ - dpkg --add-architecture mips64el
+ - apt-get update -qq
+ - apt-get install -y gcc-mips64el-linux-gnuabi64 libc6-dev-mips64el-cross libc6:mips64el
+ - make -C tests test-mips64el CC_MIPS64EL="mips64el-linux-gnuabi64-gcc"
+
+ppc64el:
+ stage: test
+ script:
+ - dpkg --add-architecture ppc64el
+ - apt-get update -qq
+ - apt-get install -y gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross libc6:ppc64el
+ - make -C tests test-ppc64le CC_PPC64LE="powerpc64le-linux-gnu-gcc"
diff --git a/deps/lightening/AUTHORS b/deps/lightening/AUTHORS
new file mode 100644
index 0000000..2097c63
--- /dev/null
+++ b/deps/lightening/AUTHORS
@@ -0,0 +1,14 @@
+Paulo Cesar Pereira de Andrade <pcpa@gnu.org>
+
+Paolo Bonzini <bonzini@gnu.org>
+
+PPC assembler by Ian Piumarta <piumarta@inria.fr>
+
+i386 assembler by Ian Piumarta <piumarta@inria.fr>
+and Gwenole Beauchesne <gb.public@free.fr>
+
+x86-64 backend by Matthew Flatt <mflatt@cs.utah.edu>
+
+Major PPC contributions by Laurent Michel <ldm@thorgal.homelinux.org>
+
+Major SPARC contributions by Ludovic Courtes <ludo@chbouib.org>
diff --git a/deps/lightening/COPYING b/deps/lightening/COPYING
new file mode 100644
index 0000000..4432540
--- /dev/null
+++ b/deps/lightening/COPYING
@@ -0,0 +1,676 @@
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ <program> Copyright (C) <year> <name of author>
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
+
diff --git a/deps/lightening/COPYING.DOC b/deps/lightening/COPYING.DOC
new file mode 100644
index 0000000..1a86456
--- /dev/null
+++ b/deps/lightening/COPYING.DOC
@@ -0,0 +1,355 @@
+ GNU Free Documentation License
+ Version 1.1, March 2000
+
+ Copyright (C) 2000 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+0. PREAMBLE
+
+The purpose of this License is to make a manual, textbook, or other
+written document "free" in the sense of freedom: to assure everyone
+the effective freedom to copy and redistribute it, with or without
+modifying it, either commercially or noncommercially. Secondarily,
+this License preserves for the author and publisher a way to get
+credit for their work, while not being considered responsible for
+modifications made by others.
+
+This License is a kind of "copyleft", which means that derivative
+works of the document must themselves be free in the same sense. It
+complements the GNU General Public License, which is a copyleft
+license designed for free software.
+
+We have designed this License in order to use it for manuals for free
+software, because free software needs free documentation: a free
+program should come with manuals providing the same freedoms that the
+software does. But this License is not limited to software manuals;
+it can be used for any textual work, regardless of subject matter or
+whether it is published as a printed book. We recommend this License
+principally for works whose purpose is instruction or reference.
+
+
+1. APPLICABILITY AND DEFINITIONS
+
+This License applies to any manual or other work that contains a
+notice placed by the copyright holder saying it can be distributed
+under the terms of this License. The "Document", below, refers to any
+such manual or work. Any member of the public is a licensee, and is
+addressed as "you".
+
+A "Modified Version" of the Document means any work containing the
+Document or a portion of it, either copied verbatim, or with
+modifications and/or translated into another language.
+
+A "Secondary Section" is a named appendix or a front-matter section of
+the Document that deals exclusively with the relationship of the
+publishers or authors of the Document to the Document's overall subject
+(or to related matters) and contains nothing that could fall directly
+within that overall subject. (For example, if the Document is in part a
+textbook of mathematics, a Secondary Section may not explain any
+mathematics.) The relationship could be a matter of historical
+connection with the subject or with related matters, or of legal,
+commercial, philosophical, ethical or political position regarding
+them.
+
+The "Invariant Sections" are certain Secondary Sections whose titles
+are designated, as being those of Invariant Sections, in the notice
+that says that the Document is released under this License.
+
+The "Cover Texts" are certain short passages of text that are listed,
+as Front-Cover Texts or Back-Cover Texts, in the notice that says that
+the Document is released under this License.
+
+A "Transparent" copy of the Document means a machine-readable copy,
+represented in a format whose specification is available to the
+general public, whose contents can be viewed and edited directly and
+straightforwardly with generic text editors or (for images composed of
+pixels) generic paint programs or (for drawings) some widely available
+drawing editor, and that is suitable for input to text formatters or
+for automatic translation to a variety of formats suitable for input
+to text formatters. A copy made in an otherwise Transparent file
+format whose markup has been designed to thwart or discourage
+subsequent modification by readers is not Transparent. A copy that is
+not "Transparent" is called "Opaque".
+
+Examples of suitable formats for Transparent copies include plain
+ASCII without markup, Texinfo input format, LaTeX input format, SGML
+or XML using a publicly available DTD, and standard-conforming simple
+HTML designed for human modification. Opaque formats include
+PostScript, PDF, proprietary formats that can be read and edited only
+by proprietary word processors, SGML or XML for which the DTD and/or
+processing tools are not generally available, and the
+machine-generated HTML produced by some word processors for output
+purposes only.
+
+The "Title Page" means, for a printed book, the title page itself,
+plus such following pages as are needed to hold, legibly, the material
+this License requires to appear in the title page. For works in
+formats which do not have any title page as such, "Title Page" means
+the text near the most prominent appearance of the work's title,
+preceding the beginning of the body of the text.
+
+
+2. VERBATIM COPYING
+
+You may copy and distribute the Document in any medium, either
+commercially or noncommercially, provided that this License, the
+copyright notices, and the license notice saying this License applies
+to the Document are reproduced in all copies, and that you add no other
+conditions whatsoever to those of this License. You may not use
+technical measures to obstruct or control the reading or further
+copying of the copies you make or distribute. However, you may accept
+compensation in exchange for copies. If you distribute a large enough
+number of copies you must also follow the conditions in section 3.
+
+You may also lend copies, under the same conditions stated above, and
+you may publicly display copies.
+
+
+3. COPYING IN QUANTITY
+
+If you publish printed copies of the Document numbering more than 100,
+and the Document's license notice requires Cover Texts, you must enclose
+the copies in covers that carry, clearly and legibly, all these Cover
+Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on
+the back cover. Both covers must also clearly and legibly identify
+you as the publisher of these copies. The front cover must present
+the full title with all words of the title equally prominent and
+visible. You may add other material on the covers in addition.
+Copying with changes limited to the covers, as long as they preserve
+the title of the Document and satisfy these conditions, can be treated
+as verbatim copying in other respects.
+
+If the required texts for either cover are too voluminous to fit
+legibly, you should put the first ones listed (as many as fit
+reasonably) on the actual cover, and continue the rest onto adjacent
+pages.
+
+If you publish or distribute Opaque copies of the Document numbering
+more than 100, you must either include a machine-readable Transparent
+copy along with each Opaque copy, or state in or with each Opaque copy
+a publicly-accessible computer-network location containing a complete
+Transparent copy of the Document, free of added material, which the
+general network-using public has access to download anonymously at no
+charge using public-standard network protocols. If you use the latter
+option, you must take reasonably prudent steps, when you begin
+distribution of Opaque copies in quantity, to ensure that this
+Transparent copy will remain thus accessible at the stated location
+until at least one year after the last time you distribute an Opaque
+copy (directly or through your agents or retailers) of that edition to
+the public.
+
+It is requested, but not required, that you contact the authors of the
+Document well before redistributing any large number of copies, to give
+them a chance to provide you with an updated version of the Document.
+
+
+4. MODIFICATIONS
+
+You may copy and distribute a Modified Version of the Document under
+the conditions of sections 2 and 3 above, provided that you release
+the Modified Version under precisely this License, with the Modified
+Version filling the role of the Document, thus licensing distribution
+and modification of the Modified Version to whoever possesses a copy
+of it. In addition, you must do these things in the Modified Version:
+
+A. Use in the Title Page (and on the covers, if any) a title distinct
+ from that of the Document, and from those of previous versions
+ (which should, if there were any, be listed in the History section
+ of the Document). You may use the same title as a previous version
+ if the original publisher of that version gives permission.
+B. List on the Title Page, as authors, one or more persons or entities
+ responsible for authorship of the modifications in the Modified
+ Version, together with at least five of the principal authors of the
+ Document (all of its principal authors, if it has less than five).
+C. State on the Title page the name of the publisher of the
+ Modified Version, as the publisher.
+D. Preserve all the copyright notices of the Document.
+E. Add an appropriate copyright notice for your modifications
+ adjacent to the other copyright notices.
+F. Include, immediately after the copyright notices, a license notice
+ giving the public permission to use the Modified Version under the
+ terms of this License, in the form shown in the Addendum below.
+G. Preserve in that license notice the full lists of Invariant Sections
+ and required Cover Texts given in the Document's license notice.
+H. Include an unaltered copy of this License.
+I. Preserve the section entitled "History", and its title, and add to
+ it an item stating at least the title, year, new authors, and
+ publisher of the Modified Version as given on the Title Page. If
+ there is no section entitled "History" in the Document, create one
+ stating the title, year, authors, and publisher of the Document as
+ given on its Title Page, then add an item describing the Modified
+ Version as stated in the previous sentence.
+J. Preserve the network location, if any, given in the Document for
+ public access to a Transparent copy of the Document, and likewise
+ the network locations given in the Document for previous versions
+ it was based on. These may be placed in the "History" section.
+ You may omit a network location for a work that was published at
+ least four years before the Document itself, or if the original
+ publisher of the version it refers to gives permission.
+K. In any section entitled "Acknowledgements" or "Dedications",
+ preserve the section's title, and preserve in the section all the
+ substance and tone of each of the contributor acknowledgements
+ and/or dedications given therein.
+L. Preserve all the Invariant Sections of the Document,
+ unaltered in their text and in their titles. Section numbers
+ or the equivalent are not considered part of the section titles.
+M. Delete any section entitled "Endorsements". Such a section
+ may not be included in the Modified Version.
+N. Do not retitle any existing section as "Endorsements"
+ or to conflict in title with any Invariant Section.
+
+If the Modified Version includes new front-matter sections or
+appendices that qualify as Secondary Sections and contain no material
+copied from the Document, you may at your option designate some or all
+of these sections as invariant. To do this, add their titles to the
+list of Invariant Sections in the Modified Version's license notice.
+These titles must be distinct from any other section titles.
+
+You may add a section entitled "Endorsements", provided it contains
+nothing but endorsements of your Modified Version by various
+parties--for example, statements of peer review or that the text has
+been approved by an organization as the authoritative definition of a
+standard.
+
+You may add a passage of up to five words as a Front-Cover Text, and a
+passage of up to 25 words as a Back-Cover Text, to the end of the list
+of Cover Texts in the Modified Version. Only one passage of
+Front-Cover Text and one of Back-Cover Text may be added by (or
+through arrangements made by) any one entity. If the Document already
+includes a cover text for the same cover, previously added by you or
+by arrangement made by the same entity you are acting on behalf of,
+you may not add another; but you may replace the old one, on explicit
+permission from the previous publisher that added the old one.
+
+The author(s) and publisher(s) of the Document do not by this License
+give permission to use their names for publicity for or to assert or
+imply endorsement of any Modified Version.
+
+
+5. COMBINING DOCUMENTS
+
+You may combine the Document with other documents released under this
+License, under the terms defined in section 4 above for modified
+versions, provided that you include in the combination all of the
+Invariant Sections of all of the original documents, unmodified, and
+list them all as Invariant Sections of your combined work in its
+license notice.
+
+The combined work need only contain one copy of this License, and
+multiple identical Invariant Sections may be replaced with a single
+copy. If there are multiple Invariant Sections with the same name but
+different contents, make the title of each such section unique by
+adding at the end of it, in parentheses, the name of the original
+author or publisher of that section if known, or else a unique number.
+Make the same adjustment to the section titles in the list of
+Invariant Sections in the license notice of the combined work.
+
+In the combination, you must combine any sections entitled "History"
+in the various original documents, forming one section entitled
+"History"; likewise combine any sections entitled "Acknowledgements",
+and any sections entitled "Dedications". You must delete all sections
+entitled "Endorsements."
+
+
+6. COLLECTIONS OF DOCUMENTS
+
+You may make a collection consisting of the Document and other documents
+released under this License, and replace the individual copies of this
+License in the various documents with a single copy that is included in
+the collection, provided that you follow the rules of this License for
+verbatim copying of each of the documents in all other respects.
+
+You may extract a single document from such a collection, and distribute
+it individually under this License, provided you insert a copy of this
+License into the extracted document, and follow this License in all
+other respects regarding verbatim copying of that document.
+
+
+7. AGGREGATION WITH INDEPENDENT WORKS
+
+A compilation of the Document or its derivatives with other separate
+and independent documents or works, in or on a volume of a storage or
+distribution medium, does not as a whole count as a Modified Version
+of the Document, provided no compilation copyright is claimed for the
+compilation. Such a compilation is called an "aggregate", and this
+License does not apply to the other self-contained works thus compiled
+with the Document, on account of their being thus compiled, if they
+are not themselves derivative works of the Document.
+
+If the Cover Text requirement of section 3 is applicable to these
+copies of the Document, then if the Document is less than one quarter
+of the entire aggregate, the Document's Cover Texts may be placed on
+covers that surround only the Document within the aggregate.
+Otherwise they must appear on covers around the whole aggregate.
+
+
+8. TRANSLATION
+
+Translation is considered a kind of modification, so you may
+distribute translations of the Document under the terms of section 4.
+Replacing Invariant Sections with translations requires special
+permission from their copyright holders, but you may include
+translations of some or all Invariant Sections in addition to the
+original versions of these Invariant Sections. You may include a
+translation of this License provided that you also include the
+original English version of this License. In case of a disagreement
+between the translation and the original English version of this
+License, the original English version will prevail.
+
+
+9. TERMINATION
+
+You may not copy, modify, sublicense, or distribute the Document except
+as expressly provided for under this License. Any other attempt to
+copy, modify, sublicense or distribute the Document is void, and will
+automatically terminate your rights under this License. However,
+parties who have received copies, or rights, from you under this
+License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+
+10. FUTURE REVISIONS OF THIS LICENSE
+
+The Free Software Foundation may publish new, revised versions
+of the GNU Free Documentation License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns. See
+http://www.gnu.org/copyleft/.
+
+Each version of the License is given a distinguishing version number.
+If the Document specifies that a particular numbered version of this
+License "or any later version" applies to it, you have the option of
+following the terms and conditions either of that specified version or
+of any later version that has been published (not as a draft) by the
+Free Software Foundation. If the Document does not specify a version
+number of this License, you may choose any version ever published (not
+as a draft) by the Free Software Foundation.
+
+
+ADDENDUM: How to use this License for your documents
+
+To use this License in a document you have written, include a copy of
+the License in the document and put the following copyright and
+license notices just after the title page:
+
+ Copyright (c) YEAR YOUR NAME.
+ Permission is granted to copy, distribute and/or modify this document
+ under the terms of the GNU Free Documentation License, Version 1.1
+ or any later version published by the Free Software Foundation;
+ with the Invariant Sections being LIST THEIR TITLES, with the
+ Front-Cover Texts being LIST, and with the Back-Cover Texts being LIST.
+ A copy of the license is included in the section entitled "GNU
+ Free Documentation License".
+
+If you have no Invariant Sections, write "with no Invariant Sections"
+instead of saying which ones are invariant. If you have no
+Front-Cover Texts, write "no Front-Cover Texts" instead of
+"Front-Cover Texts being LIST"; likewise for Back-Cover Texts.
+
+If your document contains nontrivial examples of program code, we
+recommend releasing these examples in parallel under your choice of
+free software license, such as the GNU General Public License,
+to permit their use in free software.
diff --git a/deps/lightening/COPYING.LESSER b/deps/lightening/COPYING.LESSER
new file mode 100644
index 0000000..fc8a5de
--- /dev/null
+++ b/deps/lightening/COPYING.LESSER
@@ -0,0 +1,165 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+ This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+
+ 0. Additional Definitions.
+
+ As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+
+ "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+
+ An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+
+ A "Combined Work" is a work produced by combining or linking an
+Application with the Library. The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+
+ The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+
+ The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+
+ 1. Exception to Section 3 of the GNU GPL.
+
+ You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+
+ 2. Conveying Modified Versions.
+
+ If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+
+ a) under this License, provided that you make a good faith effort to
+ ensure that, in the event an Application does not supply the
+ function or data, the facility still operates, and performs
+ whatever part of its purpose remains meaningful, or
+
+ b) under the GNU GPL, with none of the additional permissions of
+ this License applicable to that copy.
+
+ 3. Object Code Incorporating Material from Library Header Files.
+
+ The object code form of an Application may incorporate material from
+a header file that is part of the Library. You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+
+ a) Give prominent notice with each copy of the object code that the
+ Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the object code with a copy of the GNU GPL and this license
+ document.
+
+ 4. Combined Works.
+
+ You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+
+ a) Give prominent notice with each copy of the Combined Work that
+ the Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the Combined Work with a copy of the GNU GPL and this license
+ document.
+
+ c) For a Combined Work that displays copyright notices during
+ execution, include the copyright notice for the Library among
+ these notices, as well as a reference directing the user to the
+ copies of the GNU GPL and this license document.
+
+ d) Do one of the following:
+
+ 0) Convey the Minimal Corresponding Source under the terms of this
+ License, and the Corresponding Application Code in a form
+ suitable for, and under terms that permit, the user to
+ recombine or relink the Application with a modified version of
+ the Linked Version to produce a modified Combined Work, in the
+ manner specified by section 6 of the GNU GPL for conveying
+ Corresponding Source.
+
+ 1) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (a) uses at run time
+ a copy of the Library already present on the user's computer
+ system, and (b) will operate properly with a modified version
+ of the Library that is interface-compatible with the Linked
+ Version.
+
+ e) Provide Installation Information, but only if you would otherwise
+ be required to provide such information under section 6 of the
+ GNU GPL, and only to the extent that such information is
+ necessary to install and execute a modified version of the
+ Combined Work produced by recombining or relinking the
+ Application with a modified version of the Linked Version. (If
+ you use option 4d0, the Installation Information must accompany
+ the Minimal Corresponding Source and Corresponding Application
+ Code. If you use option 4d1, you must provide the Installation
+ Information in the manner specified by section 6 of the GNU GPL
+ for conveying Corresponding Source.)
+
+ 5. Combined Libraries.
+
+ You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+
+ a) Accompany the combined library with a copy of the same work based
+ on the Library, uncombined with any other library facilities,
+ conveyed under the terms of this License.
+
+ b) Give prominent notice with the combined library that part of it
+ is a work based on the Library, and explaining where to find the
+ accompanying uncombined form of the same work.
+
+ 6. Revised Versions of the GNU Lesser General Public License.
+
+ The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+
+ If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.
diff --git a/deps/lightening/ChangeLog b/deps/lightening/ChangeLog
new file mode 100644
index 0000000..cc7c8e9
--- /dev/null
+++ b/deps/lightening/ChangeLog
@@ -0,0 +1,17 @@
+-*- text -*-
+
+Starting from October 30, 2018, the Lightening project no longer stores
+change logs in `ChangeLog' files. Instead, changes are detailed in the
+version control system's logs. They can be seen by downloading a copy
+of the Git repository:
+
+ $ git clone https://gitlab.com/wingo/lightening
+ $ git log
+
+Alternatively, they can be seen on the web, using the Gitweb interface
+at:
+
+ https://gitlab.com/wingo/lightening
+
+Change logs from upstream GNU Lightning are still available in
+ChangeLog.lightning.
diff --git a/deps/lightening/ChangeLog.lightning b/deps/lightening/ChangeLog.lightning
new file mode 100644
index 0000000..19b3335
--- /dev/null
+++ b/deps/lightening/ChangeLog.lightning
@@ -0,0 +1,4018 @@
+2018-04-20 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_private.h: Add new register classes to
+ flag float registers and double only registers, required for sparc64
+ where only low 32 bit fpr registers can be used for single precision
+ operations.
+ Add new 128 bit jit_regset_t type for sparc64 register set.
+
+ * include/lightning/jit_sparc.h, lib/jit_sparc-cpu.c, lib/jit_sparc-fpu.c,
+ lib/jit_sparc-sz.c, lib/jit_sparc.c: Update for 64 bits sparc.
+
+ * lib/lightning.c: Update for new jit_regset_t required for sparc64.
+
+2018-02-26 Paulo Andrade <pcpa@gnu.org>
+
+ * check/lightning.c, include/lightning.h: Add the new jit_va_push
+ interface. That should be called when passing a va_list to a C
+ function. This is required because on Alpha a va_list is passed
+ by value, and lightning does not know about data types, so, cannot
+ understand it is pushing a va_list as argument.
+
+ * lib/jit_names.c, lib/lightning.c: Minor changes for the new
+ jit_code_va_push.
+
+ * check/cva_list.c: Update only test case using jit_va_push, to
+ pass a va_list to a C function.
+
+ doc/body.texi: Better documentation of the varargs interface.
+
+ * jit_alpha.c, jit_alpha-cpu.c: Update to properly push a
+ C va_list and correctly calculate varargs offset.
+
+ * lib/jit_aarch64-sz.c, lib/jit_aarch64.c, lib/jit_alpha-sz.c,
+ lib/jit_arm-sz.c, lib/jit_arm.c, lib/jit_hppa-sz.c, lib/jit_hppa.c,
+ lib/jit_ia64-sz.c, lib/jit_ia64.c, lib/jit_mips-sz.c, lib/jit_mips.c,
+ lib/jit_ppc-sz.c, lib/jit_ppc.c, lib/jit_s390-sz.c, lib/jit_s390.c,
+ lib/jit_sparc-sz.c, lib/jit_sparc.c, lib/jit_x86-sz.c, lib/jit_x86.c:
+ Update for the new jit_va_push interface.
+
+2018-02-22 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_alpha-cpu.c: Always set t12 to the address of the
+ current function, to properly work on all systems. Previously
+ the shortcut did only work on Tru64. For Linux and glibc the
+ change is required.
+
+2018-02-22 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_aarch64.c, lib/jit_alpha.c, lib/jit_arm.c,
+ lib/jit_mips.c, lib/jit_ppc.c, lib/jit_sparc.c, lib/jit_x86.c:
+ Correct wrong logic in usage of jit_live in jit_retr. The
+ problem is that if a temporary is required during epilog,
+ the return register might be allocated, so, jit_live must always
+ be used.
+
+2018-01-31 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/lightning.c: Avoid deep recursions when computing live
+ register ranges.
+
+2018-01-31 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_mips-cpu.c: Correct frame size and varargs
+ initialization for the n32 abi.
+ * lib/jit_mips.c, lib/jit_mips-fpu.c: Correct 32 bit abis
+ in big-endian.
+
+2017-09-13 Paulo Andrade <pcpa@gnu.org>
+
+ * configure.ac: Add check for binutils 2.29 prototype to the
+ disassembler function.
+ * lib/jit_disasm.c: Adapt for binutils 2.29 change.
+
+2017-06-09 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_private.h, lib/lightning.c: Add a
+ second pass from start when computing register live ranges.
+ This should be used temporarily, and is required for certain
+ loop constructs, with several consecutive blocks not referencing
+ a live register.
+
+2016-05-05 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/lightning.c: Correct wrong movr simplification,
+ remove no longer needed code to set return registers live
+ and update live register set when reaching a label boundary,
+ but do not descend if the block has been already visited.
+ The later need some tuning for complex code generation, where
+ it will still have issues.
+
+2015-11-30 Paulo Andrade <pcpa@gnu.org>
+
+ * doc/body.texi: Change documentation to no longer say
+ it is a variant of the Fibonacci sequence, and document
+ a proper implementation.
+ Thanks to Jon Arintok for pointing out that the Fibonacci
+ sequence generation was incorrect. It was documented, but
+ still confusing.
+
+ * check/fib.tst, check/fib.ok, check/bp.tst, check/bp.ok,
+ doc/ifib.c, doc/rbif.c: Implement a proper Fibonacci
+ sequence implementation.
+
+2015-07-03 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_mips-cpu.c: Correct definition of htonr_ul.
+ Correct prolog/epilog/va* routines to work on o64 abi.
+
+ * lib/jit_mips-fpu.c: Correct load of double literal
+ argument when not using a data buffer.
+ Remove alignment correction in vaarg_d if using the
+ new mips abi.
+
+ * lib/jit_mips.c: Correct code to allow creating variadic
+ jit functions when using the new mips abi.
+
+ * lib/jit_rewind.c: Minor adjust for rewind when using
+ the new mips abi, if there are varargs arguments in
+ registers.
+
+2015-06-06 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ia64-cpu.c: Search backward for the last output
+ register used, otherwise would stop too early if a float
+ argument used the slot.
+ Correct offset of first va_list argument, and use proper
+ va_list abi.
+
+ * lib/jit_ia64-fpu.c: Add new functions to move a gpr
+ to a fpr register, to counterpart the ones that move a
+ fpr to a gpr. These are required to properly implement
+ jit_getarg*_{f,d} on complex prototypes, or variadic
+ jit functions.
+
+ * lib/jit_ia64-sz.c: Update for support to jit variadic
+ functions.
+
+ * lib/jit_ia64.c: Implement proper abi for variadic
+ jit functions.
+
+2015-06-04 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_rewind.c: New file implementing generic functions
+ to "rewind", or rewrite IR code sequences.
+
+ * include/lightning.h: Add several new codes, that previously
+ were a function call, that would synthesize the operation.
+ Now, there is a code for the operation, and a new flag to
+ know an operation is synthesized.
+
+ * include/lightning/jit_private.h: Add several new macros to
+ help construct synthesized IR code sequences.
+
+ * lib/Makefile.am: Update for lib/jit_rewind.c.
+
+ * lib/jit_disasm.c: Update for a small rework on jit_node_t,
+ so that --enable-devel-disassembler does not need a change
+ in the layout of jit_node_t.
+
+ * lib/jit_names.c: Update for the new codes.
+
+ * lib/jit_print.c: Update to print more readable output, and
+ flag synthesized IR code sequences.
+
+ * lib/jit_aarch64-sz.c, lib/jit_aarch64.c,
+ lib/jit_arm-sz.c, lib/jit_arm.c, lib/jit_x86-sz.c,
+ lib/jit_x86.c: Update for new synthesized IR code sequences.
+
+ * lib/jit_ppc-cpu.c, lib/jit_ppc-fpu., lib/jit_ppc-sz.c,
+ lib/jit_ppc.c, lib/jit_mips-cpu.c, lib/jit_mips-fpu.c,
+ lib/jit_mips-sz.c, lib/jit_mips.c, lib/jit_s390-fpu.c,
+ lib/jit_s390-sz.c, lib/jit_s390.c: Update for new synthesized
+ IR code sequences and correct bugs in the initial varargs
+ implementation support.
+
+ * lib/jit_alpha-sz.c, lib/jit_alpha.c, lib/jit_hppa-sz.c,
+ lib/jit_hppa.c, lib/jit_ia64-sz.c, lib/jit_ia64.c,
+ lib/jit_sparc-sz.c, lib/jit_sparc.c: Add generic, untested
+ support for the new synthesized IR code sequences. Known
+ most likely broken right now, and should be corrected once
+ access to these hosts is available.
+
+ * lib/lightning.c: Update for new IR codes, and add support
+ for not yet existing instructions that change third argument.
+
+ * size.c: Change to use different tables for LE and BE PowerPC.
+ Correct a wrong endif for x32.
+
+2015-05-25 Paulo Andrade <pcpa@gnu.org>
+
+ * check/cva_list.c: New file implementing a test to ensure
+ the value returned by jit_va_start is a valid C va_list.
+
+ * check/va_list.ok: New simple helper file, as now the
+ va_list.tst test is enabled.
+
+ * check/va_list.tst: Rewritten for an extensive variadic
+ jit functions test.
+
+ * check/Makefile.am: Update for the new tests.
+
+ * lib/jit_arm-cpu.c, lib/jit_arm-swf.c, lib/jit_arm-vfp.c,
+ lib/jit_arm.c: Correct broken software float in a previous
+ commit. Note that the hard float abi implementation is known
+ broken at this time, for special cases involving variadic
+ functions, and should be corrected next.
+
+ lib/jit_x86-cpu.c, lib/jit_x86-sz.c, lib/jit_x86.c: Correct
+ the jit_va_list_t semantics to match C va_list.
+
+2015-05-24 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/Makefile.am: Bump library major. This is a preparation
+ for a rework that was due for quite some time, but that is
+ now required to properly implement variadic jit functions.
+ The rework is mainly required to know at prolog parsing, if
+ a function is variadic or not. This will benefit a few
+ backends, and is mandatory for the hard float arm abi.
+ The rework was already planned for quite some time, to
+ be able to use a variable stack framesize, and for leaf
+ functions optimization where applicable.
+ The change will be source compatible, but will change
+ some internals, and jit_code_t values, as some new will
+ be added.
+ The only behavior change is that, jit_arg_register_p may
+ change return value on hard float arm abi, if called before
+ or after jit_ellipsis. Common sense anyway, would say to
+ make that call after jit_ellipsis, but documentation
+ should be updated for it.
+
+2015-05-24 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_aarch64-fpu.c, lib/jit_aarch64.c: Correct base
+ aarch64 varargs code.
+
+2015-05-24 Paulo Andrade <pcpa@gnu.org>
+
+ * check/lightning.c: Clearly run check if clang is the system
+ compiler.
+
+2015-05-20 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_sparc-cpu.c, lib/jit_sparc-fpu.c, lib/jit_sparc.c:
+ Add base support to jit vararg functions to the sparc backend.
+
+2015-05-20 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_alpha-cpu.c, lib/jit_alpha-fpu.c, lib/jit_alpha.c:
+ Add base support to jit vararg functions to the alpha backend.
+
+2015-05-19 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_hppa-cpu.c, lib/jit_hppa-fpu.c, lib/jit_hppa.c:
+ Add base support to jit vararg functions to the hppa backend.
+
+2015-05-10 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ia64-cpu.c, lib/jit_ia64-fpu.c, lib/jit_ia64.c:
+ Add base support to jit vararg functions to the ia64 backend.
+
+2015-05-10 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ia64-fpu.c, lib/jit_ia64.c: Correct movi_d_w
+ and movi_f_w implementation to work when not using a
+ data buffer. This causes the check varargs.tst to
+ work when passing "-d" to the lightning test tool.
+
+2015-05-10 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ia64.c: Implement inline assembly cache flush,
+ required on multiprocessor systems.
+
+2015-05-06 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_mips-cpu.c, lib/jit_mips-fpu.c, lib/jit_mips.c:
+ Add base support to jit vararg functions to the mips backend.
+ Currently only supported on the o32 abi, until access to a
+ n32 system is arranged.
+
+2015-05-05 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ppc-cpu.c, lib/jit_ppc-fpu.c, lib/jit_ppc.c:
+ Add base support to jit vararg functions to the PowerPC backend.
+
+2015-05-02 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_s390-cpu.c, lib/jit_s390-fpu.c, lib/jit_s390.c:
+ Add base support to jit vararg functions to the s390 backend.
+
+2015-05-01 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_arm-cpu.c, lib/jit_arm-swf.c, lib/jit_arm-vfp.c,
+ lib/jit_arm.c: Add base support to jit vararg
+ functions to the arm backend.
+
+2015-04-30 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_aarch64-cpu.c, lib/jit_aarch64-fpu.c,
+ lib/jit_aarch64.c: Add base support to jit vararg
+ functions to the aarch64 backend.
+
+2015-04-27 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, include/lightning/jit_private.h,
+ lib/jit_names.c, lib/lightning.c: Add initial support
+ for the new jit_va_start, jit_va_arg, jit_va_arg_d, and
+ jit_va_end interfaces. The jit_va_start call is supposed
+ to return a va_list compatible pointer, but not yet
+ decided if it will be "declared" stdarg compatible,
+ as for now only x86 support has been added (and should
+ be compatible), but issues may arise on other backends.
+
+ * check/lightning.c: Add wrappers to call the new jit_va_*
+ interfaces.
+
+ * lib/jit_x86-cpu.c, lib/jit_x86.c: Implement the new
+ jit_va_* for x86.
+
+ * lib/jit_x86-sz.c: Add fields, but not yet fully updated,
+ as this is an intermediate commit.
+
+ * lib/jit_aarch64-sz.c, lib/jit_aarch64.c,
+ lib/jit_alpha-sz.c, lib/jit_alpha.c,
+ lib/jit_arm-sz.c, lib/jit_arm.c,
+ lib/jit_hppa-sz.c, lib/jit_hppa.c,
+ lib/jit_ia64-sz.c, lib/jit_ia64.c,
+ lib/jit_mips-sz.c, lib/jit_mips.c,
+ lib/jit_ppc-sz.c, lib/jit_ppc.c,
+ lib/jit_s390-sz.c, lib/jit_s390.c,
+ lib/jit_sparc-sz.c, lib/jit_sparc.c: Prepare for the
+ new jit_va_* interfaces. Not yet implemented, and will
+ cause an assertion if used.
+
+ * check/va_list.tst: Simple early test case, that works
+ on x86_64, x32, ix86, cygwin, and cygwin64.
+
+2015-02-17 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, include/lightning/jit_private.h,
+ lib/jit_aarch64-cpu.c, lib/jit_aarch64.c,
+ lib/jit_alpha-cpu.c, lib/jit_alpha.c,
+ lib/jit_arm-cpu.c, lib/jit_arm.c,
+ lib/jit_hppa-cpu.c, lib/jit_hppa.c,
+ lib/jit_ia64-cpu.c, lib/jit_ia64.c,
+ lib/jit_mips-cpu.c, lib/jit_mips.c,
+ lib/jit_ppc-cpu.c, lib/jit_ppc.c,
+ lib/jit_s390-cpu.c, lib/jit_s390.c,
+ lib/jit_sparc-cpu.c, lib/jit_sparc.c,
+ lib/jit_x86-cpu.c, lib/jit_x86.c: Implement the new
+ jit_allocar(offs, size) interface, that receives
+ two integer registers arguments, allocates space
+ dynamically in the stack, returns the offset in
+ the first argument, and uses the second argument
+ for the size in bytes of the memory to be allocated.
+
+ * check/allocar.ok, check/allocar.tst: New files
+ implementing test cases for the new jit_allocar
+ interface.
+
+ * check/Makefile.am, check/lightning.c: Update for
+ the new test case and interface.
+
+ * doc/body.texi: Add documentation of the new
+ interface.
+
+2015-02-17 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_x86.h, lib/jit_x86-cpu.c,
+ lib/jit_x86-x87.c: No longer make st(7) available.
+ Need to keep one x87 slots empty to avoid exceptions.
+ This has the side effect of no longer needing the
+ hackish emms instruction before a function call.
+
+2015-02-16 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/lightning.c: Remove the jit_regno_patch bitfield
+ register fields before actual emit, as it is only really
+ used before emit, otherwise, on special conditions it
+ may consider live registers as dead during code emit.
+
+2015-02-15 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_x86-cpu.c, lib/jit_x86-sse.c, lib/jit_x86-x87.c:
+ Correct encoding of ldxr* stxr* in the x32 abi. If the
+ displacement register is negative, it would generate
+ a 64 bit instruction with a 32 bit unsigned displacement.
+
+ * check/ranger.tst, check/ranger.ok: New files, implementing
+ a test case for negative loads and stores. This is range.tst
+ converted to use registers instead of immediate offsets.
+
+ check/Makefile.am: Update for the new test case.
+
+2015-02-07 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_size.c: Preventively use at least 144 bytes
+ if JIT_INSTR_MAX is less than it. The logic is not
+ guaranteed to be 100% precise, it is mostly heuristics
+ to allocate a buffer with as close as possible size,
+ but a wrong value may cause code generation to write
+ past the end of the buffer.
+
+2015-02-03 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/lightning.c: Correct the reason the bug in
+ simplify_stxi was not triggered before, it was due to
+ incorrectly resetting the value->code field, what was
+ causing it to never properly optimize:
+ stxi Im0 Rb0 Rt0
+ ldxi Rt1 Rb1 Im1
+ when Rb0 == Rb1, Rt0 == Rt1 and Im0 == Im1
+ There was another possible issue, that has been also
+ addressed in this commit, that would be the case of
+ Rbn == Rtn, where no redundancy removal is possible.
+
+2015-02-03 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/lightning.c: Correct wrong check in simplify_stxi.
+ The test was incorrectly comparing the target register
+ and the displacement offset. This was a time bomb bug,
+ that would trigger in code like:
+ stxi Im0 Rb0 Rt0
+ stxi Im1 Rb1 Rt1
+ if Rb0 == Rb1 && Rt0 == Rt1 && Im0 == Rt1, that is,
+ the wrong check was Im0 == Rt1, instead of the supposed
+ Im0 == Imm1 (that was what the code mean't to do). It
+ was removing the second stxi assuming it was redundantly
+ generated; as that is not uncommon pattern on
+ translators generating jit.
+
+2015-02-02 Paulo Andrade <pcpa@gnu.org>
+
+ * configure.ac, include/lightning/jit_private.h,
+ lib/jit_aarch64.c, lib/jit_alpha.c, lib/jit_arm.c,
+ lib/jit_disasm.c, lib/jit_hppa.c, lib/jit_ia64.c,
+ lib/jit_mips.c, lib/jit_ppc.c, lib/jit_print.c,
+ lib/jit_s390.c, lib/jit_sparc.c, lib/jit_x86.c: Add a new
+ --enable-devel-disassembler option, that should be used
+ during development, or lightning debug. This option
+ intermixes previous jit_print and jit_disassemble
+ output, making it easier to visualize what lightning
+ call was used, and what code was generated.
+
+2015-01-31 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_arm-cpu.c, lib/jit_arm.c: Only limit to 24 bit
+ displacement non conditional jump in the same jit_state_t.
+
+2015-01-19 Paulo Andrade <pcpa@gnu.org>
+
+ * doc/body.texi: Reorder documentation, making jit_frame
+ and jit_tramp the lightning response to the need of
+ trampolines, continuations and tail call optimizations.
+ A pseudo code example of a factorial function was added.
+ Also added a section for description of the available
+ predicates.
+
+ * doc/fact.c: New file, implementing a simple example of
+ a translation of a trivial, recursive, tail call optimization
+ into lightning calls. This is the conversion to functional C
+ code of the example in doc/body.texi.
+
+ * doc/Makefile.am: Update for the next test case.
+
+2015-01-17 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, lib/jit_aarch64.c,
+ lib/jit_alpha.c, lib/jit_arm-vfp.c, lib/jit_arm.c,
+ lib/jit_hppa.c, lib/jit_ia64.c, lib/jit_mips.c,
+ lib/jit_ppc.c, lib/jit_s390.c, lib/jit_sparc.c,
+ lib/jit_x86.c: Add the new jit_arg_register_p predicate.
+ The predicate is expected to be used to know if an
+ argument is in a register, what would need special
+ handling if code that can overwrite non callee save
+ registers is executed.
+
+ * check/carg.c: New test case to check consistency and
+ expected usage of jit_arg_register_p.
+
+ * check/Makefile.am: Update for new test case.
+
+2015-01-17 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_aarch64.h,
+ include/lightning/jit_alpha.h,
+ include/lightning/jit_arm.h,
+ include/lightning/jit_hppa.h,
+ include/lightning/jit_mips.h,
+ include/lightning/jit_ppc.h,
+ include/lightning/jit_s390.h,
+ include/lightning/jit_sparc.h,
+ include/lightning/jit_x86.h,
+ lib/jit_aarch64.c, lib/jit_alpha.c,
+ lib/jit_arm.c, lib/jit_hppa.c,
+ lib/jit_ia64.c, lib/jit_mips.c,
+ lib/jit_ppc.c, lib/jit_s390.c,
+ lib/jit_sparc.c, lib/jit_x86.c: Remove jit_arg_reg_p and
+ jit_arg_f_reg_p from a public header, and define it only
+ on port specific files where an integer offset is used
+ to qualify an argument identifier. Exported code expects
+ an opaque pointer (but of jit_node_t* type) to "qualify"
+ an argument identifier.
+ This patch, and the code review/simplification done during
+ it also corrected some bugs:
+ o Inconsistent jit_arg_d value of double argument after 3
+ integer arguments in arm for jit_functions; tested, C
+ functions were being properly called.
+ o Inconsistent use of getarg_{f,d} and putarg*_{f,d} on
+ s390 (32-bit) that happened to not have a proper test
+ case, as it would only happen for jit functions, and
+ tested, called C functions had proper arguments.
+ o Corrected a "last minute" correction that did not go
+ to the committed version, and would not compile on hppa,
+ due to bad _jit_putargi_d prototype definition.
+
+2015-01-17 Paulo Andrade <pcpa@gnu.org>
+
+ * doc/body.texi: Correct wrong/outdated information for
+ hton*, pusharg* and ret*, and add missing documentation
+ for rsb*, qmul*, qdvi* and putarg*.
+
+2015-01-15 Paulo Andrade <pcpa@gnu.org>
+
+ * configure.ac, lib/jit_disasm.c: Rewrite workaround
+ to apparent problem to initialize powerpc disassembler.
+
+2015-01-15 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, lib/jit_aarch64.c,
+ lib/jit_alpha.c, lib/jit_arm.c, lib/jit_hppa.c,
+ lib/jit_ia64.c, lib/jit_mips.c, lib/jit_ppc.c,
+ lib/jit_s390.c, lib/jit_sparc.c, lib/jit_x86.c:
+ Implement jit_putarg*. It works as a mix of jit_getarg*
+ and jit_pusharg*, in the way that the first argument is
+ a register or immediate, and the second is a pointer
+ returned by jit_arg*. The use of the interface is to change
+ values of arguments to the current jit function.
+
+ * check/put.ok, check/put.tst: New test cases exercising
+ the new jit_putarg* interface.
+
+ * check/Makefile.am, check/lightning.c: Update for the
+ new test case and interface.
+
+2015-01-08 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_s390.h, lib/jit_s390-cpu.c,
+ lib/jit_s390-fpu.c, lib/jit_s390-sz.c, lib/jit_s390.c:
+ Renamed s390x* files to s390*.
+
+ * check/float.tst, check/lightning.c, configure.ac,
+ include/lightning.h, include/lightning/Makefile.am,
+ lib/Makefile.am, lib/jit_s390.c, lib/jit_size.c,
+ lib/lightning.c: Update for renamed files.
+
+2015-01-08 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, include/lightning/jit_private.h,
+ include/lightning/jit_s390x.h, lib/jit_disasm.c,
+ lib/jit_s390x-cpu.c, lib/jit_s390x-fpu.c, lib/jit_s390x-sz.c,
+ lib/jit_s390x.c, lib/jit_size.c, lib/lightning.c:
+ Add support for generating jit for s390 32 bit. This change
+ also removed %f15 from the list of temporaries fpr registers;
+ it was not being used, but if were, it would corrupt the
+ stack frame because the spill address would overwrite grp
+ offsets.
+
+2014-12-26 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ppc-cpu.c, lib/jit_ppc.c: Correct some endianess issues
+ on the powerpc le backend.
+
+2014-12-26 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ppc-cpu.c: Add mcrxr instruction emulation,
+ as this instruction has been phased out, and should be
+ implemented as a kernel trap.
+
+2014-12-26 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_arm.c: Better check for need to flush constants
+ before the pool being no longer reachable.
+
+2014-12-25 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h: Split jit_htonr in the new 3 interfaces
+ jit_htonr_us, jit_htonr_ui and jit_htonr_ul, the later only
+ available on 64 bit. The plain/untyped jit_htonr macro call
+ maps to the wordsize one.
+ * lib/jit_aarch64-cpu.c, lib/jit_aarch64-sz.c, lib/jit_aarch64.c,
+ lib/jit_alpha-cpu.c, lib/jit_alpha-sz.c, lib/jit_alpha.c,
+ lib/jit_arm-cpu.c, lib/jit_arm-sz.c, lib/jit_arm.c,
+ lib/jit_hppa-cpu.c, lib/jit_hppa-sz.c, lib/jit_hppa.c,
+ lib/jit_ia64-cpu.c, lib/jit_ia64-sz.c, lib/jit_ia64.c,
+ lib/jit_mips-cpu.c, lib/jit_mips-sz.c, lib/jit_mips.c,
+ lib/jit_ppc-cpu.c, lib/jit_ppc-sz.c, lib/jit_ppc.c,
+ lib/jit_s390x-cpu.c, lib/jit_s390x-sz.c, lib/jit_s390x.c,
+ lib/jit_sparc-cpu.c, lib/jit_sparc-sz.c, lib/jit_sparc.c,
+ lib/jit_x86-cpu.c, lib/jit_x86-sz.c, lib/jit_x86.c:
+ Update backends for the new jit_htonr*.
+ * check/lightning.c, lib/jit_names.c, lib/lightning.c:
+ Update for the new jit_htonr* interfaces.
+ * check/Makefile.am: Update for new test cases.
+ * check/hton.ok, check/hton.tst: New test cases.
+
+2014-12-24 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_private.h, include/lightning/jit_x86.h,
+ lib/jit_disasm.c, lib/jit_x86-cpu.c, lib/jit_x86-sse.c,
+ lib/jit_x86-sz.c, lib/jit_x86-x87.c, lib/jit_x86.c,
+ size.c: Implement support for the x32 abi. Built and
+ tested on Gentoo default/linux/amd64/13.0/x32 profile.
+
+2014-12-24 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_names.c: Add missing rsbi_f and rsbi_d strings.
+
+2014-12-21 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_arm.c: Call __clear_cache for every page.
+ This should only be required for older boards or
+ toolchain setup, but has been reported to be required
+ for lightning at some point.
+
+2014-12-21 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_arm.c: Correct check to guard overflow of index
+ of constants from program counter.
+
+2014-11-24 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/lightning.c: Remove an optimization to calee save
+ registers that may incorrectly remove a jit_movr under
+ special conditions.
+
+2014-11-20 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_ppc.h, lib/jit_ppc-cpu.c,
+ lib/jit_ppc.c: Add initial powerpc le support.
+
+2014-11-20 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_disasm.c: Change thumb or arm disassemble based on
+ jit code before disassembly.
+
+ * lib/jit_arm-cpu.c: Correct reversed arguments to LDRD and
+ STRD instructions, and correct checking for support of those.
+
+ * lib/jit_arm-swf.c: Correct wrong use of LDRD and STRD and
+ only use those if the register is even.
+
+ * check/check.arm.swf.sh, check/check.arm4.swf.sh: New files
+ to test LDRD and STRD, as well as the alternate code path
+ when those are not available, in the .arm4. test case.
+
+ * check/Makefile.am: Update for the new test cases.
+
+2014-11-08 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_private.h, lib/jit_aarch64.c,
+ lib/jit_alpha.c, lib/jit_arm.c, lib/jit_hppa.c,
+ lib/jit_ia64.c, lib/jit_mips.c, lib/jit_ppc.c,
+ lib/jit_s390x.c, lib/jit_sparc.c, lib/jit_x86.c:
+ Implement a private jit_flush call, that flushes
+ the cache, if applicable, aligning down to the
+ previous and up to the next page boundary.
+
+2014-11-08 Paulo Andrade <pcpa@gnu.org>
+
+ * check/ctramp.c: New file. It just repeats the test
+ of tramp.tst, but using two jit_state_t, what should
+ test possible issues with two contexts, and also validate
+ jit_tramp works on backends with function descriptions.
+
+ * check/Makefile.am: Update for new test case.
+
+2014-11-03 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_mips.h: Do not make the t9 register
+ JIT_R11 (or JIT_R7 for n32 or n64 abi) available. Previously
+ it cause problems if one expects it to not be changed in a
+ function call. For example, calling a jit function, where it
+ really does not need to be changed.
+
+2014-10-26 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_aarch64.c, lib/jit_alpha.c, lib/jit_arm.c,
+ lib/jit_hppa.c, lib/jit_ia64.c, lib/jit_mips.c, lib/jit_ppc.c,
+ lib/jit_s390x.c, lib/jit_sparc.c, lib/jit_x86.c: Add an
+ assertion to all code generation "drivers" to ensure
+ _jitc->regarg is empty or in an expected state, after
+ translation of a lightning instruction to native code.
+ This change was a brute force test to find out other cases
+ of a temporary not being release (like was happening with
+ _bmsi and _bmci on x86), but no other case was found,
+ after running make check, with assertions enabled, on all
+ backends.
+
+2014-10-26 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_x86-cpu.c: Correct a register allocation leak in
+ _bmsi and _bmci.
+
+2014-10-25 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_disasm.c: Do not cause an fatal error if init_jit
+ fails in the jit_init_debug call.
+
+2014-10-24 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ia64.c, lib/jit_ppc.c: Correct handling of function
+ descriptor when first prolog is a jit_tramp prolog. The
+ test case was using the same jit_context_t, so was not
+ triggering this condition.
+
+ * lib/jit_ppc-cpu.c: Properly handle jump displacements that
+ do not fit on 24 powerpc. This required changing from previous
+ "mtlr reg, blr" to "mtctr reg, bctr" to properly handle
+ the logic to "hide" function descriptors, but that would
+ also be required as the proper jit_jmpr when/if implementing
+ optimizations to leaf functions (was working with blr because
+ it is saved/reloaded in prolog/epilog).
+
+2014-10-21 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, lib/lightning.c: Add three predicates
+ to query information about labels. jit_forward_p(label)
+ will return non zero if the label is "forward", that is
+ need a call to jit_link(label), jit_indirect_p(label)
+ that returns non zero if the label was created with the
+ jit_indirect() call, and jit_target_p(label) that will
+ return non zero if there is at least one jump patched
+ to land at that label.
+
+2014-10-18 Paulo Andrade <pcpa@gnu.org>
+
+ * check/range.ok, check/range.tst: New test case designed
+ to catch incorrect code generation, usually due to incorrect
+ test of immediate size. The test checks a large amount of
+ encodings in "power of two" boundaries. This test exorcises
+ a significant amount of code paths that was previously not
+ tested.
+
+ * check/Makefile.am: Add range test to make check target.
+
+ * lib/jit_aarch64-cpu.c: Correct wrong address calculation
+ for stxi_c, stxi_s, stxi_i and stxi_l when the offset is
+ too large.
+
+ * lib/jit_mips-fpu.c: Correct wrong size test to check if
+ an immediate can be encoded in a float or double store.
+
+ * lib/jit_s390x-cpu.c: Correct inverted encoding to stxi_s
+ when the offset cannot be encoded, and fallbacks to an
+ alternate encoding in 2 instructions.
+
+2014-10-17 Paulo Andrade <pcpa@gnu.org>
+
+ * check/alu_rsb.ok, check/alu_rsb.tst: New files implementing
+ tests for jit_rsb*.
+
+ * check/Makefile.am, check/lightning.c, include/lightning.h,
+ lib/jit_aarch64-cpu.c, lib/jit_aarch64-fpu.c, lib/jit_aarch64-sz.c,
+ lib/jit_aarch64.c, lib/jit_alpha-cpu.c, lib/jit_alpha-fpu.c,
+ lib/jit_alpha-sz.c, lib/jit_alpha.c, lib/jit_arm-cpu.c,
+ lib/jit_arm-swf.c, lib/jit_arm-sz.c, lib/jit_arm-vfp.c,
+ lib/jit_arm.c, lib/jit_hppa-cpu.c, lib/jit_hppa-fpu.c,
+ lib/jit_hppa-sz.c, lib/jit_hppa.c, lib/jit_ia64-cpu.c,
+ lib/jit_ia64-fpu.c, lib/jit_ia64-sz.c, lib/jit_ia64.c,
+ lib/jit_mips-cpu.c, lib/jit_mips-fpu.c, lib/jit_mips-sz.c,
+ lib/jit_mips.c, lib/jit_names.c, lib/jit_ppc-cpu.c,
+ lib/jit_ppc-fpu.c, lib/jit_ppc-sz.c, lib/jit_ppc.c,
+ lib/jit_s390x-cpu.c, lib/jit_s390x-fpu.c, lib/jit_s390x-sz.c,
+ lib/jit_s390x.c, lib/jit_sparc-cpu.c, lib/jit_sparc-fpu.c,
+ lib/jit_sparc-sz.c, lib/jit_sparc.c, lib/jit_x86-cpu.c,
+ lib/jit_x86-sse.c, lib/jit_x86-sz.c, lib/jit_x86-x87.c,
+ lib/jit_x86.c, lib/lightning.c: Implement jit_rsb*. This
+ was a missing lightning 1.x interface, that on most
+ backends is synthesized, but on a few backends (hppa and ia64),
+ it can generate better code as on those there is, or the
+ only instruction with an immediate is in "rsb" format
+ (left operand).
+
+2014-10-17 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_names.c: New file with single definition of string
+ representation of lightning IR codes.
+
+ * size.c: Modified to append the code name in a C comment
+ after the maximum instruction size.
+
+ * lib/jit_print.c: Minor change to not duplicate jit_names.c
+ contents.
+
+ * lib/jit_aarch64-sz.c, lib/jit_alpha-sz.c, lib/jit_arm-sz.c,
+ lib/jit_hppa-sz.c, lib/jit_ia64-sz.c, lib/jit_mips-sz.c,
+ lib/jit_ppc-sz.c, lib/jit_s390x-sz.c, lib/jit_sparc-sz.c,
+ lib/jit_x86-sz.c: Rewritten to add string representation of
+ IR codes in a C comment.
+
+2014-10-14 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_aarch64-cpu.c, lib/jit_alpha-cpu.c, lib/jit_arm-cpu.c,
+ lib/jit_hppa-cpu.c, lib/jit_mips-cpu.c, lib/jit_ppc-cpu.c,
+ lib/jit_sparc-cpu.c: Implement or correct the internal
+ nop(count) call that receives an argument that tells the
+ modulo bytes to align the code for the next instruction.
+
+ * include/lightning.h, lib/lightning.c, lib/jit_aarch64.c,
+ lib/jit_alpha.c, lib/jit_arm.c, lib/jit_hppa.c, lib/jit_ia64.c,
+ lib/jit_mips.c, lib/jit_ppc.c, lib/jit_s390x.c, lib/jit_sparc.c,
+ lib/jit_x86.c: Implement the new jit_align() call that receive
+ an argument, that tells the modulo, in bytes, to align the
+ next instruction. In most backends the only value that makes
+ a difference is a value that matches sizeof(void*), as all
+ other values usually are already automatically aligned in
+ labels, but not guaranteed to be aligned at word size bytes.
+
+ * check/align.ok, check/align.tst: New files, implementing
+ a simple test for the new jit_align() interface.
+
+ * check/Makefile.am, check/lightning.c, lib/jit_aarch64-sz.c,
+ lib/jit_alpha-sz.c, lib/jit_arm-sz.c, lib/jit_hppa-sz.c,
+ lib/jit_ia64-sz.c, lib/jit_mips-sz.c, lib/jit_ppc-sz.c,
+ lib/jit_print.c, lib/jit_s390x-sz.c, lib/jit_sparc-sz.c,
+ lib/jit_x86-sz.c: Update for the new jit_code_align code and
+ the jit_align() interface.
+
+2014-10-13 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, lib/jit_size.c, size.c: Use a
+ symbolic value for the last IR code.
+
+2014-10-12 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, include/lightning/jit_private.h,
+ lib/jit_aarch64-cpu.c, lib/jit_alpha-cpu.c, lib/jit_arm-cpu.c,
+ lib/jit_hppa-cpu.c, lib/jit_ia64-cpu.c, lib/jit_mips-cpu.c,
+ lib/jit_ppc-cpu.c, lib/jit_s390x-cpu.c, lib/jit_sparc-cpu.c,
+ lib/jit_x86-cpu.c, lib/lightning.c: Implement the new
+ jit_frame and jit_tramp interfaces, that allow writing
+ trampoline like calls, where a single dispatcher jit buffer
+ is written, and later other jit buffers are created, with
+ the same stack frame layout as the dispatcher. This is the
+ logic that GNU Smalltalk used in lightning 1.x, and is required
+ to make a sane port for lighting 2.x.
+
+ * jit_ia64-cpu.c: Implement support for jit_frame and jit_tramp,
+ and also correct wrong encoding for B4 instructions, that
+ implement jmpr, as well as correct reverse logic in _jmpr,
+ that was moving the branch register to the jump register,
+ and not vice-versa.
+ Also, if a stack frame is to be assumed, always assume it may
+ call a function with up to 8 arguments, regardless of the
+ hint frame argument.
+
+ * lib/jit_arm.c: Add a new must_align_p() interface to ensure
+ function prologs are always aligned. This condition was
+ previously always true, somewhat by accident, but with
+ jit_tramp it is not guaranteed.
+
+ * jit_ia64-cpu.c: lib/jit_ppc.c: Add minor special handling
+ required to implement jit_tramp, where a function descriptor
+ should not be added before a prolog, as jit_tramp means omit
+ prolog.
+
+ * check/lightning.c: Update test driver for the new interfaces.
+
+ * check/Makefile.am, check/tramp.tst, check/tramp.ok: Add
+ a simple test and example of the jit_frame and jit_tramp
+ usage implementing a simple Fibonacci function using a
+ simulation of an interpreter stack and how it would handle
+ state in language specific variables.
+
+ * doc/body.texi: Add documentation for jit_frame and
+ jit_tramp.
+
+2014-09-29 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_aarch64.c, lib/jit_alpha.c, lib/jit_arm.c,
+ lib/jit_hppa.c, lib/jit_ia64.c, lib/jit_mips.c,
+ lib/jit_ppc.c, lib/jit_s390x.c, lib/jit_sparc.c,
+ lib/jit_x86.c, lib/lightning.c: Allow jit_jmpi on a
+ target that is not a node. This may lead to hard to
+ debug code generation, but is a required feature for
+ certain generators, like the ones that used lightning
+ 1.2x. Note that previously, but not really well
+ documented, it was instructed to use:
+ jit_movi(rn, addr); jit_jmpr(rn);
+ but now, plain:
+ jit_patch_abs(jit_jmpi(), addr);
+ should also work.
+
+2014-09-24 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_x86-sz.c: Generate information about instruction
+ lengths for more precise calculation of buffer size on
+ Windows x64. This change is specially important because
+ the maximum instruction length is larger than other
+ systems, what could cause an out of bounds write on
+ special conditions without this update.
+
+2014-09-24 Paulo Andrade <pcpa@gnu.org>
+
+ * check/lightning.c: Add workaround to conflicting global
+ optind variable in cygwin binutils that have an internal
+ getopt* implementation.
+
+ * lib/jit_x86-cpu.c: Add a simple define ffsl ffs if building
+ for 32 bit and there is no ffsl function.
+
+2014-09-24 Paulo Andrade <pcpa@gnu.org>
+
+ * check/lightning.c: Add a hopefully temporary kludge to not use
+ sprintf and sscanf returned by dlsym. This is required to pass
+ the varargs test.
+
+ * include/lightning/jit_private.h: Use symbolic name for first
+ integer register argument, as this is different in sysv and
+ win64 abi.
+
+ * include/lightning/jit_x86.h: Add conditionals and definitions
+ for Windows x64 (under __CYGWIN__ preprocessor conditional).
+
+ * lib/jit_x86-cpu.c: Correct one instruction encoding bug, that
+ was working by accident. Only use rax to rdx for some byte
+ operations to work on compatibility mode (that is, to generate
+ the proper encoding, instead of actually generating encoding
+ for high byte registers, e.g. %bh).
+ Add proper prolog and epilog for windows x64.
+
+ * lib/jit_x86-sse.c: Correct a swapped rex prefix for float
+ operations.
+
+ * lib/jit_x86.c: Adjust to support Windows x64 abi.
+
+ * check/check.x87.nodata.sh: New file, previously used but that
+ was missing git.
+
+2014-09-07 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/lightning.c: Mark all registers advertised as live, as
+ per jit_callee_save_p as live whenever reaching a jump that
+ cannot be tracked. This is a rethink of the previous commit,
+ and is a better approach, otherwise there would not be much
+ sense on relying on jit_callee_save_p if it could not be
+ trusted.
+
+ * check/jmpr.tst, check/jmpr.ok: New files implementing a very
+ simple test case, that would actually cause an assertion on
+ code before the change to only mark as live when reaching a
+ jump that could not tracked, the actually advertised as callee
+ save registers.
+
+ check/Makefile.am: Update for new jmpr test case.
+
+2014-09-01 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/lightning.c: Do not mark all registers in unknown state
+ as live on jit_jmpr, or jit_jmpi to an absolute address. Instead,
+ treat it as a function call, and only consider JIT_Vn registers
+ as possibly live.
+
+2014-08-29 Paulo Andrade <pcpa@gnu.org>
+
+ * doc/body.texi: Add a proper info menu entry for
+ GNU lightning.
+
+ * doc/version.texi: Regenerate.
+
+2014-08-16 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_aarch64-cpu.c, lib/jit_aarch64-fpu.c,
+ lib/jit_arm-cpu.c, lib/jit_arm-vfp.c,
+ lib/jit_hppa-cpu.c, lib/jit_hppa-fpu.c,
+ lib/jit_ia64-cpu.c, lib/jit_ia64-fpu.c,
+ lib/jit_mips-cpu.c, lib/jit_mips-fpu.c,
+ lib/jit_ppc-cpu.c, lib/jit_ppc-fpu.c,
+ lib/jit_s390x-cpu.c, lib/jit_s390x-fpu.c,
+ lib/jit_s390x.c, lib/jit_sparc-cpu.c,
+ lib/jit_x86-cpu.c, lib/jit_x86-sse.c,
+ lib/jit_x86-x87.c: Review generation of all branch
+ instructions and always adds the jit_class_nospill
+ bitfield for temporary registers that cannot be spilled
+ because the reload would be after a conditional jump; the
+ patch only adds an extra assertion. These conditions do
+ not happen on documented lightning usage, but can happen
+ if one uses the not exported jit_get_reg and jit_unget_reg
+ calls and cause enough register starvation.
+
+2014-08-16 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_alpha.c: Correct wrong bitmask of most argument
+ float register arguments, that were being set as callee
+ save instead of argument registers class.
+
+2014-08-16 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_arm-sz.c: Regenerate table of known maximum
+ instruction sizes for the software float fallback,
+ that implements "virtual" float registers in the stack
+ and operations as calls to libgcc.
+
+ * size.c: Correct typo in the generated jit_arm-sz.c file.
+
+2014-08-10 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_alpha.h, lib/jit_alpha-cpu.c,
+ lib/jit_alpha-fpu.c, lib/jit_alpha-sz.c, lib/jit_alpha.c:
+ New files implementing a lightning Alpha port. Thanks
+ to Trent Nelson and snakebit.net staff for providing access
+ to an Alpha system.
+
+ * check/float.tst, check/lightning.c, configure.ac,
+ include/lightning.h, include/lightning/Makefile.am,
+ include/lightning/jit_private.h, lib/Makefile.am,
+ lib/jit_disasm.c, lib/jit_size.c, lib/lightning.c:
+ Minor changes to adapt for the new Alpha port.
+
+2014-08-10 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/lightning.c: Always mark JIT_RET and JIT_FRET as
+ live in a function epilog. This is required because
+ on some ports a complex sequence, allocating one or more
+ registers, may be required to jump from a ret* to the
+ epilog, and the lightning api does not have annotations
+ to know if a function returns a value, or the type of
+ the return value.
+
+2014-08-10 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/lightning.c: Change the correct live bitmask of
+ return registers after a function call in jit_update.
+
+2014-08-10 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/lightning.c: Change assertions to have an int
+ result and correct a bad bit mask assertion.
+
+2014-08-10 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_aarch64.c: Correct bad setup for assertion
+ of consistency before a patch.
+
+2014-08-10 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_mips-cpu.c: Correct typo in the jit_bmsr
+ implementation that was using the wrong test result
+ register.
+
+2014-07-28 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_memory.c: Do not call free on NULL pointers.
+
+ * include/lightning/jit_private.h, lib/jit_note.c,
+ lib/lightning.c: Add a wrapper to memcpy and memmove
+ to not actually call those functions with a zero size
+ argument, and likely also a null src or dst.
+
+2014-07-27 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_private.h, lib/jit_disasm.c,
+ lib/lightning.c: Remove the global jit_progname variable.
+ It was being only used in jit_init_debug, that is called
+ from init_jit, so, just pass an argument.
+
+2014-07-27 Paulo Andrade <pcpa@gnu.org>
+
+ * doc/body.texi: Add note that jit_set_memory_functions
+ should be called before init_jit, because init_jit
+ itself may call the memory wrappers.
+
+2014-04-22 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_arm.c: Do not get confused with default settings
+ if /proc is not mounted on Linux specific code path.
+
+2014-04-09 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_aarch64.h, include/lightning/jit_arm.h,
+ include/lightning/jit_hppa.h, include/lightning/jit_ia64.h,
+ include/lightning/jit_mips.h, include/lightning/jit_ppc.h,
+ include/lightning/jit_private.h, include/lightning/jit_s390x.h,
+ include/lightning/jit_sparc.h, include/lightning/jit_x86.h:
+ Do not add jit_regset_t, JIT_RA0, and JIT_FA0 to the installed
+ header file. These types and definitions are supposed to be
+ only used internally.
+
+2014-04-05 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_arm-cpu.c: Only adjust stack pointer in prolog if
+ need stack space, that is, do not emit a nop instruction
+ subtracting zero from the stack pointer.
+
+2014-04-04 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_disasm.c: Correct a crash in the doc/printf example
+ on arm due to releasing the data_info information in
+ jit_clear_state. This is a special case for arm only, and
+ actually, only armv5 or older uses the data_info buffer,
+ or when forcing arm instruction set mode besides thumb
+ available.
+
+2014-12-03 Paulo Andrade <pcpa@gnu.org>
+
+ * doc/body.texi: Write detailed description and examples for
+ jit_get_memory_functions, jit_set_memory_functions,
+ jit_get_code, jit_set_code, jit_get_data and jit_set_data.
+
+2014-12-03 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, include/lightning/jit_private.h,
+ lib/lightning.c: Implement the new jit_set_data() interface,
+ and the new jit_get_data() helper. Like jit_set_code(),
+ jit_realize() should be called before jit_set_data().
+ The most common usage should be jit_set_data(JIT_DISABLE_DATA
+ | JIT_DISABLE_NOTE), to force synthesize any float/double
+ constant in the stack and not generate any debug information.
+
+ * lib/jit_note.c: Minor change to debug note generation as
+ now it uses an alternate temporary data buffer during constants
+ and debug generation to accommodate the possibility of the user
+ setting an alternate data buffer.
+
+ * lib/jit_hppa-fpu.c, lib/jit_s390x.c, lib/jit_s390x-cpu.c,
+ lib/jit_s390x-fpu.c, lib/jit_sparc.c, lib/jit_sparc-fpu.c,
+ lib/jit_x86-sse.c, lib/jit_x86-x87.c: Implement jit_set_data.
+
+ * lib/jit_hppa-sz.c, lib/jit_sparc-sz.c, lib/jit_x86-sz.c,
+ lib/jit_s390x-sz.c: Update for several instructions that now
+ have a different maximum length due to jit_set_data.
+
+ * lib/jit_mips-fpu.c: Implement jit_set_data, but missing
+ validation on n32 and n64 abis (and/or big endian).
+
+ * lib/jit_mips-sz.c: Update for changes in o32.
+
+ * lib/jit_ppc-fpu.c: Implement jit_set_data, but missing
+ validation on Darwin PPC.
+
+ * lib/jit_ppc-sz.c: Update for changes in powerpc 32 and
+ 64 bit.
+
+ * lib/jit_ia64-fpu.c: Implement untested jit_set_data.
+
+ * TODO: Add note to list ports that were not tested for the
+ new jit_set_data() feature, due to no longer having access
+ to them.
+
+ * check/nodata.c: New file implementing a simple test exercising
+ several different conditions created by jit_set_data().
+
+ * check/check.nodata.sh: New file implementing a wrapper
+ over the existing *.tst files, that runs all tests without
+ using a data buffer for constants; only meaningful (and
+ enabled) on architectures that used to store float/double
+ constants on a read only data buffer.
+
+ * configure.ac, check/Makefile.am: Update for the new test
+ cases.
+
+ * check/lightning.c: Implement the new "-d" option that
+ sets an internal flag to call jit_set_data() disable
+ constants and debug, that is, using only a pure code
+ buffer.
+
+2014-11-03 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, include/lightning/jit_private.h,
+ lib/lightning.c: Implement the new jit_set_code() interface,
+ that allows instructing lightning to use an alternate code
+ buffer. The new jit_realize() function should be called
+ before jit_set_code(), and usually call jit_get_code()
+ to query the amount of bytes expected to be required for
+ the code.
+
+ * lib/jit_size.c: Minor update to have less chances of
+ miscalculating the code buffer by starting the counter
+ with the size of the longest instruction instead of zero,
+ as code emit fails if at any moment less than the longest
+ instruction bytes are available.
+
+ * check/setcode.c: New file implementing some basic tests
+ of the new jit_set_code() interface.
+
+ * check/Makefile.am: Update for newer test case.
+
+2014-06-03 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, lib/lightning.c: Add the new
+ jit_indirect() call, that returns a special label node,
+ and tells lightning that the label may be the target of
+ an indirect jump.
+
+ * doc/body.texi: Document the new jit_indirect() call, and
+ add examples of different ways to create labels and branches.
+
+2014-23-02 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_x86.c: Rewrite previous patch to inline save/restore
+ because clobbering %ebx in x86 is treated as an error
+ (jit_x86.c:239:5: error: PIC register clobbered by 'ebx' in 'asm').
+
+2014-19-02 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_x86.c: Rewrite incorrect inline assembly that could
+ truncate a variable in a callee save register. Now it simply
+ tells gcc that the register is clobbered, instead of using a
+ *32 bit* swap with a temporary variable. The problem only
+ happens when compiling with optimization.
+
+2014-19-02 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_aarch64.h, include/lightning/jit_arm.h,
+ include/lightning/jit_hppa.h, include/lightning/jit_ia64.h,
+ include/lightning/jit_mips.h, include/lightning/jit_ppc.h,
+ include/lightning/jit_s390x.h, include/lightning/jit_sparc.h,
+ include/lightning/jit_x86.h: Change jit_regset_t to an
+ unsigned type, to allow safe right shift.
+
+ * lib/lightning.c: Rewrite jit_regset_scan1 to allow easier
+ compiler optimization.
+
+2013-12-03 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_x86-x87.c: Correct wrong optimization when
+ loading the log(2) constant.
+
+2013-12-03 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_x86-cpu.c: Use the emms instruction before
+ calling any function. This is particularly important
+ when using c99 complex functions as it can easily
+ overflow the x87 stack due to the way lightning uses
+ the x87 stack as a flat register file.
+
+2013-12-02 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_x86-x87.c: Correct wrong code generation due
+ to comparing the base and not the value register with
+ %st(0) in stxi_f.
+
+2013-12-02 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_x86-x87.c, lib/jit_x86.c: Use 8 bytes aligned
+ stack offset for float/double x87 to/from sse move.
+
+2013-11-27 Paulo Andrade <pcpa@gnu.org>
+
+ * configure.ac, lib/jit_arm-swf.c, lib/jit_arm.c: Add
+ changes that should at least allow building lightning
+ on Apple iOS7.
+
+2013-10-08 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ppc-cpu.c: Correct wrong shortcut for ldxi_l with
+ a zero offset, that was calling ldr_i instead of ldr_l.
+
+2013-10-08 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_arm.h, lib/jit_arm-cpu.c: Do not use
+ by default load/store instructions that map to ldrt/strt.
+ There is already the long displacement version for positive
+ offsets, and when using a (shorter) negative offset it does
+ not map to ldrt/strt. At least on qemu strt may cause
+ reproducible, but unexpected SIGILL.
+
+2013-10-08 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_arm-vfp.c: Correct wrong load/store offset
+ calculation when the displacement is constant but too
+ large to use an instruction with an immediate offset.
+
+2013-10-07 Paulo Andrade <pcpa@gnu.org>
+
+ * check/self.c: Extend tests to validate jit_callee_save_p
+ does not cause an assertion on valid arguments, and test
+ extra registers defined on some backends.
+
+ * configure.ac: Do not ignore environment CFLAGS when
+ checking if need to test runtime configurable options,
+ like use x87 when sse2 is available, arm instruction set
+ instead of thumb, etc.
+
+ * include/lightning/jit_arm.h: Correct wrong jit_f macro
+ definition.
+
+ * include/lightning/jit_ia64.h, include/lightning/jit_ppc.h:
+ Correct wrong jit_r macro definition.
+
+ * lib/jit_x86-x87.c, lib/jit_x86.c: Actually use the
+ reserved stack space for integer to/from float conversion.
+ The stack space was also changed to ensure it is 8 bytes
+ aligned. Also, for Solaris x86 in 32 bit mode, an alternate
+ truncr_d was implemented because for some reason it is
+ failing with SIGILL if using the "fisttpl" instructions,
+ that must be available on p6 or newer, but for the sake of
+ making all tests pass, implement a 486 or newer sequence
+ if "sun" is defined.
+
+2013-10-03 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_mips.h, lib/jit_mips-cpu.c,
+ lib/jit_mips-sz.c, lib/jit_mips.c, size: Build and
+ pass all test cases on Irix big endian mips using
+ the 64 bit abi.
+
+2013-10-02 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_mips.h: Add proper mips abi detection.
+
+2013-09-30 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_print.c: Do not crash if calling jit_print from
+ gdb before actually emitting code.
+
+ * lib/lightning.c: Correct misplaced check for already
+ visited blocks on conditional branches, what was preventing
+ proper merge live bit masks of forward blocks.
+
+2013-09-30 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_x86-cpu.c: Correct not properly tested case of using
+ %r12 as index register, what was causing an invalid assertion.
+ %r12 is mapped to the "extra" JIT_R3 register, and test cases
+ only test "standard" lightning registers.
+
+2013-09-28 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ia64.c: Minor change to force collecting the maximum
+ instruction length in the --enable-devel-get-jit-size build
+ mode. The actual generated file did not change because the
+ sampling was large enough that it had already collected proper
+ information in the previously slightly buggy code (not forcing
+ a sync of the instructions that could be combined).
+
+2013-09-27 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_arm.c: Correct build when disassembler is
+ disabled.
+
+2013-09-25 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ia64-cpu.c, lib/jit_ia64-fpu.c: Correct some
+ off by one range checks (that were only accepting values
+ one less than the maximum allowed) and an invalid test
+ condition check that was forcing it to always use
+ indirect jumps even when reachable with an immediate
+ displacement.
+
+2013-09-24 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_aarch64-sz.c, lib/jit_arm-sz.c, lib/jit_hppa-sz.c,
+ lib/jit_ia64-sz.c, lib/jit_mips-sz.c, lib/jit_ppc-sz.c,
+ lib/jit_s390x-sz.c, lib/jit_size.c, lib/jit_sparc-sz.c,
+ lib/jit_x86-sz.c: New files implementing static tables
+ with longest known instructions length generated to match
+ a lightning instruction. These tables should make it easier
+ to make it very unlikely to ever miscalculate, or by too
+ much, the size of a code buffer.
+
+ * lib/jit_size.c: New file that aids to either collect
+ jit code size information, or use the information depending
+ on build options.
+
+ * size.c: New helper file that parses input for, and create
+ an initial jit_$arch-sz.c file, that needs some minor edit
+ for arches with multiple configurations.
+
+ * configure.ac, Makefile.am: Add the new, devel mode only
+ --enable-devel-get-jit-size configure option, that sets
+ compile time flags to collect jit code size information,
+ that will be used as input for the "noinst size program".
+
+ * lib/jit_aarch64.c, lib/jit_arm.c, lib/jit_disasm.c,
+ lib/jit_hppa.c, lib/jit_ia64.c, lib/jit_memory.c,
+ lib/jit_mips.c, lib/jit_ppc.c, lib/jit_s390x.c,
+ lib/jit_sparc.c, lib/jit_x86.c, lib/lightning.c: Minor
+ changes for the --enable-devel-get-jit-size build mode,
+ as well as the "production build mode" with jit code
+ size information.
+
+2013-09-14 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, lib/lightning.c: Add the new
+ jit_pointer_p interface, that returns a boolean value
+ telling if the pointer argument is inside the jit
+ code buffer. This is useful to avoid the need to add
+ extra labels and calls to jit_address to figure bounds
+ of code buffer, and still keep internal data private.
+
+2013-09-13 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, include/lightning/jit_private.h,
+ lib/jit_note.c: Change the code argument of jit_get_note
+ to a jit_pointer_t and make jit_get_note a public interface.
+ It was intended so since start, as a way to map an offset
+ in the code to a function name, file name and line number
+ mapping.
+
+2013-09-11 Paulo Andrade <pcpa@gnu.org>
+
+ * doc/body.texi: Correct reversed arguments in example of
+ usage in a (possibly) multi threaded, multiple jit_state_t
+ environments.
+
+ * include/lightning/jit_arm.h, include/lightning/jit_private.h,
+ lib/jit_arm-cpu.c, lib/jit_arm.c: Make a previously, non
+ documented, global state private to the related jit_state_t
+ generating code.
+
+2013-09-10 Paulo Andrade <pcpa@gnu.org>
+
+ * check/self.c, check/self.ok: New files implementing simple
+ consistency check assertions. At first validating some macros
+ that use values from different sources agree.
+
+ * check/Makefile.am: Update for the new test case.
+
+ * include/lightning.h, lib/lightning.c: Add the new
+ jit_callee_save_p() call, that is intended to be used when
+ writing complex code using lightning, so that one does not
+ need to verify what backend is being used, or have access to
+ private data, to query if a register is callee save or not;
+ on several backends the scratch registers are actually callee
+ save.
+
+ * include/lightning/jit_aarch64.h, include/lightning/jit_arm.h,
+ include/lightning/jit_hppa.h, include/lightning/jit_mips.h,
+ include/lightning/jit_ppc.h, include/lightning/jit_sparc.h,
+ include/lightning/jit_x86.h: Add an explicit definition for
+ JIT_R3-JIT_Rn, JIT_V3-JIT_Vn and JIT_F6-JIT_Fn when applicable.
+ This allows one to write code based on "#if defined(JIT_XN)"
+ and therefore, not need to check what is the current backend
+ or have access to private data structures. This is particularly
+ useful when writing virtual machines with several specialized,
+ global registers.
+
+ * lib/jit_ia64.c: Properly flag the callee save general
+ purpose registers as such, so that jit_callee_save_p() works
+ as intended.
+
+2013-09-10 Paulo Andrade <pcpa@gnu.org>
+
+ * check/lightning.c, configure.ac: Conditionally use the
+ code written to workaround a bug in the Hercules emulator,
+ as isnan and isinf are not available at least on HP-UX ia64.
+
+2013-09-10 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_s390x-cpu.c: Spill/reload correct callee save
+ float registers.
+
+2013-09-10 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_hppa-cpu.c: Correct code to call a function stored
+ in a register or a patched function address.
+
+2013-09-10 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ia64-cpu.c: Correct incorrect logic when restoring
+ the value of the "r2" callee save register.
+
+2013-08-29 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_arm-cpu.c, lib/jit_arm.c: Correct wrong test and update
+ of the thumb offset information, when checking if needing to
+ patch a jump from arm to thumb mode. The problem would happen when
+ remapping the code buffer, and the new address being lower than
+ the previous one.
+
+2013-08-26 Paulo Andrade <pcpa@gnu.org>
+
+ * configure.ac: Extend FreeBSD test to also handle NetBSD.
+
+ * lib/jit_x86-cpu.c: Correct wrongly defined offset type of
+ ldxi_ui. Problem detected when building on NetBSD.
+
+ * lib/lightning.c: Adjust code to handle NetBSD mremap,
+ where arguments do not match Linux mremap.
+
+2013-08-26 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ppc.c: Correct C sequence point problem miscalculating
+ the actual function address in a function descriptor. Problem
+ happens with gcc 4.8.1 at least.
+
+2013-08-11 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_s390x-cpu.c: Correct code checking if immediate
+ fits instruction, but using the negated value.
+
+2013-07-28 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_s390x.h, lib/jit_s390x-cpu.c,
+ lib/jit_s390x-fpu.c, lib/jit_s390x.c: New files
+ implementing the new s390x port.
+
+ * configure.ac, include/lightning.h,
+ include/lightning/Makefile.am,
+ include/lightning/jit_private.h,
+ lib/Makefile.am, lib/jit_disasm.c, lib/lightning.c:
+ Minor adaptation for the new s390x backend.
+
+ * check/float.tst: Update for the s390x result of
+ truncating +Inf to integer.
+
+ * check/qalu_mul.tst: Add extra test cases to better test
+ high word of signed multiplication as the result is
+ adjust from unsigned multiplication on s390x.
+
+2013-07-28 Paulo Andrade <pcpa@gnu.org>
+
+ * check/lightning.c: Do not assume casting a double NaN or
+ Inf to float will produce the expected float NaN or Inf.
+ This is not true at least under s390x.
+
+2013-07-28 Paulo Andrade <pcpa@gnu.org>
+
+ * check/check.arm.sh, check/check.sh, check/check.swf.sh,
+ check/check.x87.sh: Properly check test programs output,
+ not just rely on the test program self testing the results
+ and not crashing.
+
+2013-07-28 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_aarch64.c: Remove unused macros left from cut&paste
+ of jit_arm.c.
+
+2013-07-16 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_aarch64.h, lib/jit_aarch64-cpu.c,
+ lib/jit_aarch64-fpu.c, lib/jit_aarch64.c: New files
+ implementing the new aarch64 port, as a new architecture,
+ not as an expansion of the existing armv[4-7] port.
+
+ * check/lightning.c: Add aarch64 support and a small
+ change to recognize character constants as immediate
+ values.
+
+ * check/float.tst: Add aarch64 preprocessor conditionals
+ to select proper expected value when converting [+-]Inf
+ and NaN to integer.
+
+ * include/lightning/jit_arm.h, lib/jit_arm.c: Minor changes
+ to better match the new aarch64 files.
+
+ * configure.ac, include/lightning.h,
+ include/lightning/Makefile.am, include/lightning/jit_private.h,
+ lib/Makefile.am, lib/lightning.c: Minor adjustments
+ for the aarch64 port.
+
+2013-07-08 Paulo Andrade <pcpa@gnu.org>
+
+ * NEWS, THANKS, configure.ac, doc/version.texi: Update for
+ the 1.99a second alpha release.
+
+2013-06-25 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_mips.c: Correct cut&paste error that caused wrong
+ stack offset calculation for double arguments in stack in
+ the o32 abi.
+ Correct typo in the __LITTLE_ENDIAN macro name, that came
+ from cut&paste error in the original typo in lib/jit_ppc.c.
+
+ * lib/jit_ia64.c, lib/jit_ppc.c: Correct typo in the
+ __LITTLE_ENDIAN macro name.
+
+2013-06-22 Paulo Andrade <pcpa@gnu.org>
+
+ * check/lightning.c, configure.ac, include/lightning.h,
+ lib/lightning.c: Add tests and quirks to build/detect
+ and/or work on Irix.
+
+ * include/lightning/jit_mips.h, lib/jit_mips-cpu.c,
+ lib/jit_mips-fpu.c, lib/jit_mips.c: Adapt code to run
+ in big endian mips, using the n32 abi.
+
+2013-06-18 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h: Minor extra preprocessor testing
+ to "detect" byte order on x86 solaris, that now builds
+ and pass all test cases.
+
+2013-06-18 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_sparc-cpu.c: Correct compiler warning of value
+ used before assignment. The usage is bogus as the api
+ requires always patching jumps, but the random value used
+ could cause an assertion due to invalid displacement.
+
+ * lib/jit_sparc.c: Always load and store double arguments
+ in stack as 2 float loads or stores, for safety, as unaligned
+ access is not allowed in Sparc Solaris.
+
+2013-06-14 Paulo Andrade <pcpa@gnu.org>
+
+ * configure.ac: Force -mlp64 to CFLAGS on HP-UX ia64 port.
+ It is the only supported mode, and expects gcc as C compiler.
+
+ * include/lightning.h, lib/jit_ia64-cpu.c, lib/jit_ia64.c:
+ Correct ia64 port to work on HP-UX that runs it in big endian
+ mode.
+
+2013-06-10 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_hppa.c: Sanitize the cache synchronization inline
+ assembly code that was doing twice the work and redundantly
+ flushing the end address every loop iteration.
+
+2013-06-09 Paulo Andrade <pcpa@gnu.org>
+
+ * configure.ac, check/Makefile.am, doc/Makefile.am: Do not
+ explicitly link to -ldl, but instead autodetect the library
+ with dlopen, dlsym, etc.
+
+ * check/lightning.c: Add workaround to apparently buggy
+ getopt in HP-UX that sets optind to the wrong index, and
+ use RTLD_NEXT on HP-UX instead of RTLD_DEFAULT to dlsym
+ global symbols.
+
+ * include/lightning.h: Rework definitions of wordsize and
+ byte order to detect proper values on HP-UX.
+
+ * lib/lightning.c: Minor correction to use MAP_ANONYMOUS
+ instead of MAP_ANON on HP-UX.
+
+ * lib/jit_hppa.c: Float arguments must be passed on integer
+ registers on HP-UX, not only for varargs functions.
+ Add code to properly clear instruction cache. This was
+ not required on Debian hppa port, but may have been working
+ by accident.
+
+ * lib/jit_hppa-cpu.c: Follow pattern of HP-UX binaries and
+ use bve,n instead of bv,n to return from functions.
+
+ * lib/jit_hppa-fpu.c: For some reason "fst? frX,rX,(rY)" did
+ not work on the tested computer (HP-UX B.11.23 U 9000/785 HP-UX)
+ so the code was changed, at first for __hpux only to add the
+ base and offset register and use the instruction with an
+ immediate (zero) offset.
+
+2013-06-07 Paulo Andrade <pcpa@gnu.org>
+
+ * check/lightning.c, lib/jit_disasm.c, lib/jit_ppc-cpu.c,
+ lib/jit_ppc-fpu.c, lib/jit_ppc.c, include/lightning.h,
+ include/lightning/jit_ppc.h, include/lightning/jit_private.h:
+ Adapt code to work on 32 bit AIX ppc using gcc. Most changes
+ are basically to adapt the elf64 logic to 32 bit, as it does
+ not use the same convention of 32 bit Darwin ppc.
+
+ * check/stack.tst: Add a fake memcpy function to the test
+ case if running under AIX, as it is not available to dlsym.
+
+ * configure.ac: Check for getopt.h header, not available in
+ AIX.
+
+2013-06-01 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_hppa.h, lib/jit_hppa-cpu.c,
+ lib/jit_hppa-fpu.c, lib/jit_hppa.c: New files implementing
+ the hppa port. Built on Debian Linux PA-RISC 2.0, 32 bit.
+
+ * check/float.tst: Add preprocessor for hppa expected
+ values when converting NaN and +-Inf to an integer.
+
+ * check/ldst.inc: Ensure double load/store tests use an
+ 8 byte aligned address by default.
+
+ * lib/lightning.c: Correct a bug found during tests in
+ the new port, where qmul* and qdiv* were not properly
+ setting one of the result registers as modified in the
+ function, what would be a problem if the only "write"
+ usage were the qmul* or qdiv*.
+
+ * check/varargs.tst, check/varargs.ok: Add one extra
+ interleaved integer/double test to validate proper code
+ generation in the extra case.
+
+ * check/lightning.c, configure.ac, include/lightning.h,
+ include/lightning/Makefile.am,
+ include/lightning/jit_private.h, lib/Makefile.am,
+ lib/jit_disasm.c: Update for the hppa port.
+
+2013-04-27 Paulo Andrade <pcpa@gnu.org>
+
+ * check/varargs.tst: Correct misplaced .align directive
+ that was causing the double buffer to not be aligned at
+ 8 bytes.
+ * lib/jit_ia64-cpu.c:
+ Properly implement abi for excess arguments passed on
+ stack.
+ Simplify load/store with immediate displacement argument
+ with zero value.
+ Simplify some calls to "subi" changing to "addi" with
+ a negative argument.
+ Remove some #if 0'ed code, that could be useful in
+ special conditions, but the most useful one would be
+ to "optimize" "static" jit functions, but for the sake
+ of simplicity, jit functions are implemented in a way
+ that can be passed back to C code as C function pointers.
+ Add an attribute to prototypes of several unused functions.
+ These functions are defined for the sake of implementing all
+ Itanium documented instructions, but a significant amount of
+ them is not used by lightning.
+ * lib/jit_ia64-fpu.c: Simplify load/store with zero immediate
+ displacement and add unused attribute for functions not used
+ by lightning, but required to provide macros implementing all
+ Itanium documented instructions.
+ * lib/jit_ia64.c: Update for the properly implemented abi
+ for stack arguments.
+ * lib/lightning.c: Mark an unused function as such.
+
+2013-04-27 Paulo Andrade <pcpa@gnu.org>
+
+ lib/jit_ia64-cpu.c:
+ Correct immediate range check of integer comparisons when
+ inverting arguments.
+ Correct gei_u that was not decrementing immediate when
+ inverting arguments.
+ Correct b?add* and b?sub* that were not properly updating
+ the result register.
+
+2013-04-27 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ia64-cpu.c: Correct wrong mapping of 2 instructions
+ in "M-, stop, M-, stop" translation, that was ignoring the
+ last stop (implemented as a nop I- stop).
+
+ * lib/jit_ia64-fpu.c: Properly implement fnorm.s and fnorm.d,
+ as well as the proper integer to float or double conversion.
+
+2013-04-27 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ia64-cpu.c: Correct bogus implementation of ldr_T
+ for signed integers, that was using ld1.s, ld2.s and ld4.s.
+ The ".s" stands for speculative load, not sign extend.
+
+ * lib/jit_ia64-fpu.c: Correct bogus implementation of ldxr_T
+ for float and double. The third (actually, second) argument
+ is indeed added to the base register, but the base register
+ is modified. The actual M7 implementation was already correct,
+ just the ldxr_f and ldxr_d implementation that was kept in
+ a prototype state, misinterpreting what M7 does.
+
+2013-04-27 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ia64-cpu.c: Correct X2 pattern matching by preventing
+ it to attempt to require a stop between the L and the X
+ instruction; that is, check the registers and predicates
+ before emitting the L instruction, not after.
+
+ * lib/jit_ia64-fpu.c: Slightly simplify and correct
+ divr_f and divrd_d implementation.
+
+ * check/lightning.c: Add __ia64__ preprocessor define
+ on Itanium.
+
+ * check/alu.inc, check/clobber.tst, check/float.tst: Define
+ several macros conditionally to __ia64__. This is required
+ because __ia64__ jit generation can use way too many memory,
+ due to not implementing instruction reordering to avoid
+ as much as possible "stops", what causes way too many nops
+ to be generated, as well as the fact that division and
+ remainder requires function calls, and float division
+ requires significant code to implement.
+
+2013-04-27 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h: Add new backend specific movr_w_d,
+ movr_d_w and movi_d_w codes as helpers to ia64 varargs
+ functions arguments.
+
+ * lib/jit_ia64-cpu.c:
+ Correct wrong encoding of A5 small integers.
+ Correct define of "mux" instruction modifiers.
+ Correct ordering of arguments and predicates of cmp_xy
+ implementation with immediate arguments; like most other
+ codes with an immediate, the immediate is the second, not
+ the third argument.
+
+ * lib/jit_ia64-fpu.c: Actual implementation of the code
+ to move to/from gpr to/from fpr, to implement varargs abi.
+
+ * lib/jit_ia64.c: Make fpr argument registers not allocatable
+ as temporaries, no need for the extra checks when there are
+ plenty registers.
+
+ * lib/jit_print.c, lib/lightning.c: Minor updates for the
+ new movr_w_d, movr_d_w and movi_d_w codes.
+
+2013-04-26 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ia64-cpu.c, lib/jit_ia64-fpu.c: Correct code to
+ also insert a stop to break an instruction group if a
+ register is written more than once in the same group.
+ This may happen if a register is argument and result of
+ some lightning call (not a real instruction). The most
+ common case should be code in the pattern:
+ movl rn=largenum
+ ...
+ mov rn=smallnum
+ where "rn" would end up holding "largenum".
+ But the problem possibly could happen in other circumstances.
+
+2013-04-26 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_ia64.h, lib/jit_ia64-cpu.c,
+ lib/jit_ia64-fpu.c, lib/jit_ia64.c:
+ Relocate JIT_Rn registers to the local registers, as, like
+ float registers, div/rem and sqrt are implemented as function
+ calls, and may overwrite non saved scratch registers.
+ Change patch_at to receive a jit_code_t instead of a
+ jit_node_t, so that it is easier to "inline" patches when
+ some instruction requires complex code to implement, e.g.
+ uneq and ltgt.
+ Correct arguments to FMA and FMA like instructions that,
+ due to a cut&paste error were passing the wrong argument
+ to the related F- implementation function.
+ Rewrite ltgt to return the proper result if one (or both)
+ of the arguments is unordered.
+
+2013-04-26 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_ia64.h, include/lightning/jit_private.h,
+ lib/jit_ia64-cpu.c, lib/jit_ia64-fpu.c, lib/jit_ia64.c,
+ lib/lightning.c: Rework code to detect need of a "stop" to
+ also handle predicates, as if a predicate is written, it
+ cannot be read in the same instruction group.
+ Use a single jit_regset_t variable for all registers when
+ checking need for a stop (increment value by 128 for
+ float registers).
+ Correct wrong "subi" implementation, as the code executed
+ is r0=im-r1, not r0=r1-im.
+ Use standard lightning 6 fpr registers, and rework to
+ use callee save float registers, that may be spill/reloaded
+ in prolog/epilog. This is required because some jit
+ instructions implementations need to call functions; currently
+ integer div/mod and float sqrt, what may change the value of
+ scratch float registers.
+ Rework point of "sync" of branches that need to return a
+ patch'able address, because the need for a "stop" before a
+ predicate read causes all branches to be the instruction
+ in slot 0, as there is no template to "stop" and branch
+ in the same instruction "bundle".
+
+2013-04-25 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_ia64.h, lib/jit_ia64-cpu.c,
+ lib/jit_ia64-fpu.c, lib/jit_ia64.c: New files implementing
+ the basic infrastructure of an Itanium port. The code
+ compiles and can generate jit for basic hello world like
+ functions.
+
+ * check/lightning.c, configure.ac, include/lightning.h,
+ include/lightning/Makefile.am, include/lightning/jit_private.h,
+ lib/Makefile.am, lib/lightning.c: Update for the Itanium
+ port.
+
+ * lib/jit_mips-cpu.c, lib/jit_mips.c: Correct typo and
+ make the jit_carry register local to the jit_state_t.
+ This matches code reviewed in the Itanium port, that
+ should use the same base logic to handle carry/borrow.
+
+2013-04-10 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_private.h, lib/jit_arm.c,
+ lib/jit_mips-cpu.c, lib/jit_mips.c, lib/jit_ppc-cpu.c,
+ lib/jit_ppc.c, lib/jit_print.c, lib/jit_sparc-cpu.c,
+ lib/jit_sparc.c, lib/jit_x86-cpu.c, lib/jit_x86.c,
+ lib/lightning.c: Change all jit_regset macros to take
+ a pointer argument, to avoid structure copies when
+ adding a port to an architecture with more than 64
+ registers.
+
+2013-04-08 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_arm.c, lib/jit_ppc.c: Do not rely on __clear_cache
+ aligning to the next page boundary the end argument. It may
+ actually truncate it.
+
+2013-03-29 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_private.h, lib/jit_arm.c, lib/jit_memory.c,
+ lib/jit_mips.c, lib/jit_ppc.c, lib/jit_sparc.c, lib/jit_x86.c,
+ lib/lightning.c: Do not start over jit generation if can grow
+ the code buffer with mremap without moving the base pointer.
+
+2013-03-29 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_memory.c: Implement a simple memory allocation wrapper
+ to allow overriding calls to malloc/calloc/realloc/free, as well
+ as ensuring all memory containing pointers is zero or points to
+ allocated memory.
+
+ * include/lightning.h, include/lightning/jit_private.h: Definitions
+ for the memory allocation wrapper.
+
+ * lib/Makefile.am: Update for new jit_memory.c file.
+
+ * lib/jit_arm.c, lib/jit_disasm.c, lib/jit_mips.c, lib/jit_note.c,
+ lib/jit_ppc.c, lib/jit_sparc.c, lib/jit_x86.c, lib/lightning.c:
+ Use the new memory allocation wrapper code.
+
+2013-03-22 Paulo Andrade <pcpa@gnu.org>
+
+ * configure.ac, include/lightning/jit_private.h, lib/lightning.c:
+ Remove dependency on gmp. Only a simple bitmap was required, and
+ that was not enough reason to force linking to gmp and possible
+ complications caused by it.
+
+2013-03-10 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h: Add check for __powerpc__ defined
+ in Linux, while Darwin defines __ppc__.
+
+ * include/lightning/jit_ppc.h: Adjust register definitions
+ for Darwin 32 bit and Linux 64 bit ppc usage and/or ABI.
+
+ * include/lightning/jit_private.h: Add proper check for
+ Linux __powerpc__ and an data definition for an workaround
+ to properly handle code that starts with a jump to a "main"
+ label.
+
+ * lib/jit_disasm.c: Add extra disassembler initialization
+ for __powerpc64__.
+
+ * lib/jit_ppc-cpu.c: Add extra macros and functions, and
+ correct/adapt previous ones to handle powerpc64.
+
+ * lib/jit_ppc-fpu.c: Adapt for 64 bit wordsize. Basically
+ add conversion from/to int32/int64 and proper handling of
+ load/store offsets too large for 32 bit.
+
+ * lib/jit_ppc.c: Add calls to 64 bit codes and adaptation
+ for the PowerPC 64 bit Linux ABI.
+
+ * lib/jit_arm.c, lib/jit_mips.c, lib/jit_sparc, lib/jit_x86.c,
+ lib/lightning.c: Correct off by one error when restarting jit
+ of a function due to finding too late that needs to spill/reload
+ some register. Problem was found by accident on a very special
+ condition during PowerPC 64 code adaptation.
+
+2013-03-08 Paulo Andrade <pcpa@gnu.org>
+
+ * check/lightning.c: Add missing ppc preprocessor definition.
+
+2013-03-06 Paulo Andrade <pcpa@gnu.org>
+
+ * check/float.tst: Comment out the int to negative infinity
+ test in mips for the moment because not all Loongson agrees
+ on the result.
+
+ * lib/jit_disasm.c: Add a test instead of an assertion
+ when loading symbols for disassembly due to a failure with
+ a simple binutils build in Debian mipsel64.
+
+2013-03-06 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_private.h, lib/jit_arm-cpu.c,
+ lib/jit_arm.c, lib/jit_disasm.c, lib/jit_mips-cpu.c,
+ lib/jit_mips.c, lib/jit_note.c, lib/jit_ppc-cpu.c,
+ lib/jit_ppc.c, lib/jit_print.c, lib/jit_sparc-cpu.c,
+ lib/jit_sparc.c, lib/jit_x86-cpu.c, lib/jit_x86.c,
+ lib/lightning.c: Add an extra structure for data storage
+ during jit generation, and release it after generating
+ jit, to reduce a bit memory usage, and also to make it
+ easier to understand what data is available during
+ jit runtime.
+
+2013-03-06 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/lightning.c: Make data and code buffer readonly.
+
+2013-02-20 Paulo Andrade <pcpa@gnu.org>
+
+ * doc/body.texi: Fool proof validate the examples of what
+ an assembly-language programmer would write and correct the
+ wrong sparc example.
+
+2013-02-19 Paulo Andrade <pcpa@gnu.org>
+
+ * doc/body.texi: Add back the SPARC code generation example.
+
+2013-02-19 Paulo Andrade <pcpa@gnu.org>
+
+ * check/lightning.c: Remove state flag to work with partial
+ sparc port, by just disassembling if there was incomplete
+ code generation.
+
+ * jit_sparc-cpu.c: Correct wrong range check for immediate
+ integer constants (off by one bit shift).
+ Correct macro implementing equivalent "rd %y, rd" assembly.
+ Implement qmul* and qdiv*.
+
+ * jit_sparc.c: Update for qmul* and qdiv* and remove logic
+ to handle incomplete code generation during sparc port.
+
+2013-02-18 Paulo Andrade <pcpa@gnu.org>
+
+ * check/float.tst: Add sparc to list of known NaN and +-Inf
+ to integer conversion.
+
+ * check/lightning.c: Define __sparc__ to preprocessor in
+ the sparc backend.
+
+ * include/lightning/jit_private.h: Correct wrong definition
+ of emit_stxi_d, that has lived for a long time, but would
+ cause problems whenever needing to spill/reload a float
+ register.
+
+ * include/lightning/jit_sparc.h: Can only use %g2,%g3,%g4
+ for scratch variables, as other "global" registers are
+ reserved for the system, e.g. libc.
+ Reorder float register naming to make it easier to
+ access odd float registers, so that generating code for
+ pusharg and getarg is easier for the IR.
+
+ * lib/jit_mips-cpu.c, lib/jit_ppc-cpu.c: Update to match
+ new code in jit_sparc-cpu.c. It must call jit_get_reg
+ with jit_class_nospill if using the register to move
+ an unconditional branch address to it, as the reload
+ will not happen (actually could happen in the delay
+ slot...)
+
+ * lib/jit_sparc-cpu.c: Correct wrong macro definition for
+ ldxr_s.
+ Properly implement div* and implement rem. Div* needs
+ to use the y register, and rem* needs to be synthesized.
+ Correct b?sub* macro definitions.
+
+ * lib/jit_sparc-fpu.c: Correct reversed float to/from double
+ conversion.
+ Correct wrong jit_get_reg call asking for a gpr and then
+ using the fpr with that number.
+ Correct wrong branch displacement computation for
+ conditional branches.
+
+ * lib/jit_sparc.c: Correct getarg_d and pushargi_d implementation.
+ Add rem* entries to the switch converting IR to machine code.
+
+ * lib/lightning.c: Correct a problem detected when adding
+ the jit_class_nospill flag to jit_get_reg, that was caused
+ when having a branch to an "epilog" node, what would cause
+ the code to think all registers in unknown state were live,
+ while in truth, all registers in unknown state in the
+ "just after return" point are actually dead.
+
+2013-02-17 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_sparc.h, lib/jit_sparc-cpu.c,
+ lib/jit_sparc-fpu.c, lib/jit_sparc.c: New files implementing
+ the basic framework of the sparc port.
+
+ * configure.ac, include/lightning.h, include/lightning/Makefile.am,
+ include/lightning/jit_private.h, lib/jit_disasm.c: Update
+ for the sparc port framework.
+
+ * lib/jit_mips.c: Correct reversed retr/reti logic.
+
+ * lib/jit_ppc.c: Correct misspelled __LITTLE_ENDIAN.
+
+ * lib/lightning.c: Always do byte hashing in hash_data, because
+ the logic to "compress" strings causes large pointers to not
+ be guaranteed aligned at 4 byte boundaries.
+ Update for the sparc port framework.
+
+2013-02-11 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_arm.c: Correct jit_pushargi_f in the arm hardfp abi.
+ Most of the logic uses even numbered register numbers, so that
+ a float and a double can be used in the same register, but
+ the abi requires packing the float arguments, so jit_pushargi_f
+ needs to allocate a temporary register to modify only the
+ proper register argument (or be very smart to push two
+ immediate arguments if applicable).
+
+2013-02-11 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, lib/lightning.c: Implement the new
+ jit_clear_state and jit_destroy_state calls. jit_clear_state
+ releases all memory not required during jit_execution; that
+ is, leaves only the mmap'ed data and code buffers allocated.
+ jit_destroy_state releases the mmap'ed buffers as well as
+ the jit_state_t object itself, that holds pointers to the
+ code and data buffers, as well as annotation pointers (for
+ disassembly or backtrace) in the data buffer.
+
+ * lib/jit_note.c: Correct invalid vector offset access.
+
+ * check/ccall.c, check/lightning.c, doc/ifib.c, doc/incr.c,
+ doc/printf.c, doc/rfib.c, doc/rpn.c: Use the new jit_clear_state
+ and jit_destroy_state calls, to demonstrate the new code to
+ release all jit memory.
+
+ * doc/body.texi: Add basic documentation and usage description
+ of jit_clear_state and jit_destroy_state.
+
+2013-02-11 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_private.h, lib/jit_note.c, lib/lightning.c:
+ Store all annotation information in the mmap'ed area reserved for
+ read only data. This adds code to not allocate memory for jit_note_t
+ objects, and to relocate jit_line_t objects and its contents after
+ calculating annotation information. The jit_line_t objects are
+ relocated because it is not possible to always calculate before
+ hand data layout because note information may be extended or
+ redundant entries removed, as well as allowed to be added in
+ non sequential order.
+ A bug was also corrected in _jit_set_note, that was causing it
+ to allocate new jit_line_t objects when not needed. It was still
+ working correctly, but allocating way more memory than required.
+
+2013-02-05 Paulo Andrade <pcpa@gnu.org>
+
+ *include/lightning.h, lib/lightning.c: Add the new jit_live code
+ to explicitly mark a register as live. It is required to avoid
+ assuming functions always return a value in the gpr and fpr return
+ register, and to avoid the need of some very specialized codes
+ that vary too much from backend to backend, to instruct the
+ optimization code the return register is live.
+
+ * lib/jit_arm.c, lib/jit_mips.c, lib/jit_ppc.c, lib/jit_print.c,
+ lib/jit_x86.c: Update for the new jit_live code.
+
+ * check/ret.ok, check/ret.tst: New files implementing a simple
+ test case that would previously fail at least in ix86/x86_64.
+
+ * check/Makefile.am: Update for new "ret" test case.
+
+2013-02-05 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ppc-cpu.c, lib/jit_ppc.c: Validate and correct
+ problems in the qmul and qdiv ppc implementation.
+
+2013-02-04 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, include/lightning/jit_private.h,
+ lib/jit_arm-cpu.c, lib/jit_arm.c, lib/jit_mips-cpu.c,
+ lib/jit_mips.c, lib/jit_ppc-cpu.c, lib/jit_ppc.c,
+ lib/jit_x86-cpu.c, lib/jit_x86.c, lib/lightning.c:
+ Implement the new qmul and qdiv instructions that return signed
+ and unsigned lo/hi multiplication result and div/rem division result.
+ These should be useful for jit translation of code that needs to
+ know if a multiplication overflows (no branch opcode added) or if
+ a division is exact (easy check if remainder is zero).
+
+ * check/lightning.c, lib/jit_print.c, check/Makefile.am,
+ check/all.tst: Update for the new qmul and qdiv instructions.
+
+ * check/qalu.inc, check/qalu_div.ok, check/qalu_div.tst,
+ check/qalu_mul.ok, check/qalu_mul.tst: New files implementing
+ simple test cases for qmul and qdiv.
+
+2013-01-30 Paulo Andrade <pcpa@gnu.org>
+
+ * doc/body.texi: Correct "jmpi" description that incorrectly
+ told it was possible to pass any address as jump target. The
+ only way to do that is "movi+jmpr".
+
+2013-01-30 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_x86-cpu.c: Correct undefined behavior code.
+ http://gcc.gnu.org/bugzilla/show_bug.cgi?id=56143
+
+2013-01-29 Paulo Andrade <pcpa@gnu.org>
+
+ * configure.ac: Use AC_CONFIG_HEADERS instead of AC_CONFIG_HEADER
+ to have HAVE_CONFIG_H defined with latest aclocal.
+
+ * include/lightning/jit_private.h, lib/lightning.c: Add new
+ abstraction to use an heuristic to calculate amount of space
+ required for jit generation, and code to reallocate buffer if
+ did miscalculate it.
+
+ * lib/jit_arm.c, lib/jit_mips.c, lib/jit_ppc.c, lib/jit_x86.c:
+ Update to use new code to estimate and resize of required buffer
+ for jit code.
+
+ * lib/jit_x86-cpu.c: Minor cosmetic change to avoid adding a
+ non required rex prefix when calling a function pointer stored
+ in a register.
+
+2013-01-24 Paulo Andrade <pcpa@gnu.org>
+
+ * check/Makefile.am: "make debug" target should pass only
+ the main test tool program as argument for running gdb
+
+ * configure.ac: Add the --enable-assertions options.
+
+ * doc/Makefile.am, doc/body.texi, doc/lightning.texi:
+ Major rewrite of the documentation to match the current
+ implementation.
+
+ * doc/version.texi: Automatic date update.
+
+ * doc/ifib.c, doc/incr.c, doc/printf.c, doc/rfib.c, doc/rpn.c:
+ Implementation of the documentation examples, that are also
+ compiled during a normal build.
+
+ * doc/p-lightning.texi, doc/porting.texi, doc/toc.texi,
+ doc/u-lightning.texi, doc/using.texi: These files were
+ renamed in the documentation rewrite, as the documentation
+ was significantly trimmed due to full removal of the porting
+ chapters. Better porting documentation should be added but
+ for the moment it was just removed the documentation not
+ matching the implementation.
+
+2013-01-18 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_note.c: Correct bounds check and wrong code keeping
+ a pointer that could be changed after a realloc call.
+
+2013-01-18 Paulo Andrade <pcpa@gnu.org>
+
+ * check/3to2.tst, check/add.tst, check/allocai.tst, check/bp.tst,
+ check/call.tst, check/ccall.c, check/clobber.tst, check/divi.tst,
+ check/fib.tst, check/ldsti.tst, check/ldstr-c.tst, check/ldstr.tst,
+ check/ldstxi-c.tst, check/ldstxi.tst, check/ldstxr-c.tst,
+ check/ldstxr.tst, check/lightning.c, check/rpn.tst, check/stack.tst,
+ check/varargs.tst, include/lightning.h,
+ include/lightning/jit_private.h, lib/jit_arm.c, lib/jit_disasm.c,
+ lib/jit_mips.c, lib/jit_note.c, lib/jit_ppc.c, lib/jit_print.c,
+ lib/jit_x86.c, lib/lightning.c: Extend the "jit_note" abstraction
+ with the new "jit_name" call, that receives a string argument, and
+ should usually be called to mark boundaries of functions of code
+ generating jit (that is, it is not expected that the language
+ generating jit map its functions to jit functions).
+
+2013-01-17 Paulo Andrade <pcpa@gnu.org>
+
+ * check/add.tst, check/allocai.tst, check/bp.tst, check/divi.tst,
+ check/fib.tst, check/lightning.c, include/lightning/jit_arm.h,
+ include/lightning/jit_mips.h, include/lightning/jit_ppc.h,
+ include/lightning/jit_private.h, include/lightning/jit_x86.h:
+ Make JIT_RET, JIT_FRET and JIT_SP private. These should not be
+ used in any operations due to frequently having special
+ constraints (usually JIT_FRET). JIT_FP must be made available
+ because it must be used as the base register to access stack
+ space allocated with jit_allocai.
+
+2013-01-14 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, lib/lightning.c: Add an extra align
+ argument to the jit_data call (that should be made private),
+ so that it should not align strings at 8 bytes.
+ Correct the jit_note call to include the null ending byte
+ when adding label/note names to the "jit data section".
+
+2013-01-11 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_note.c: New file implementing a simple string+integer
+ annotation, that should be used to map filename and line number
+ to offsets in the generated jit.
+
+ * include/lightning.h, lib/lightning.c: Update for the new
+ note code.
+ Add an extra mandatory argument to init_jit, that is used
+ as argument to bfd_openr.
+ Change from generic void* to char* the argument to jit_note
+ and add an extra integer argument, to map to filename and
+ line number.
+
+ * check/ccall.c, check/lightning.c, include/lightning/jit_private.h,
+ lib/jit_arm.c, lib/jit_disasm.c, lib/jit_mips.c, lib/jit_ppc.c,
+ lib/jit_print.c, lib/jit_x86.c: lib/Makefile.am: Update for the
+ new annotation code.
+
+ * configure.ac, check/Makefile.am: Update to work with latest
+ automake.
+
+2013-01-09 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, lib/jit_arm.c, jit_mips-fpu.c,
+ lib/jit_mips.c, lib/jit_print.c, lib/jit_x86.c, lib/lightning.c:
+ Remove the jit_code_getarg_{f,d} and jit_code_pusharg{i,r}_{f,d}
+ calls, replacing them with the new, internal only, jit_movr_w_f,
+ jit_mov{r,i}_f_w, jit_movr_ww_d, and jit_mov{i,r}_d_ww, that
+ better describe the operation being done, and allow removing
+ the hackish code to detect special conditions for arm when
+ moving from/to vfp from/to a grp register pair.
+ Rename jit_code_retval_{f,d} to jit_code_x86_retval_{f,d} as
+ it is specific to 32 bit x86, and used to move abi return
+ value in x87 register to a sse register.
+
+2013-01-05 Paulo Andrade <pcpa@gnu.org>
+
+ * check/cccall.c, check/ccall.ok: New test case to validate
+ interleaved calls from/to C code and jit.
+
+ * check/Makefile.am: Update for the new ccall test case.
+
+ * include/lightning.h, lib/lightning.c: Add the new jit_address
+ call that returns the real/final address of a "note" in the
+ generated jit. It requires a jit_node_t as returned by the
+ jit_note call, and is only valid after calling jit_emit.
+ Add an intermediate solution to properly handle arm
+ soft and softfp modes that move a double to an integer register
+ pair. Currently it just adds extra tests for the condition,
+ but the proper solution should be to have extra lightning
+ codes for these conditions, codes which should be only used
+ by the backends that need it, and merged with the existing
+ jit_pusharg*_{f,d}.
+
+ * include/lightning/jit_private.h: Add new jit_state_t flag
+ to know it finished jit_emit, so that calls to jit_address
+ are valid.
+
+ * lib/jit_mips.c: Correct abi implementation so that the
+ new ccall test case pass. Major problem was using
+ _jit->function.self.arg{i,f} as boolean values, but that
+ would cause lightning.c:patch_registers() to incorrectly
+ assume only one register was used as argument when calling
+ jit_regarg_p(); _jit->function.self.arg{i,f} must be the
+ number of registers used as arguments (in all backends).
+
+ * lib/jit_x86.c: Add workaround, by marking %rax as used,
+ to a special condition, when running out of registers and the
+ allocator trying to spill and reload %rax, but %rax was used
+ as a pointer to a function, what would cause the reload to
+ destroy the return value. This condition can be better
+ generalized, but the current solution is good enough.
+
+ * include/lightning/jit_ppc.h, lib/jit_ppc-cpu.c, lib/jit_ppc.c:
+ Rewrite logic to handle arguments, as the original code was
+ written based on a SysV pdf about the generic powerpc ABI,
+ what did "invent" a new abi for the previous test cases, but
+ failed in the new ccall test in Darwin PPC. Now it properly
+ handles 13 float registers for arguments, as well as proper
+ computation of stack offsets when running out of registers
+ for arguments.
+
+2013-01-02 Paulo Andrade <pcpa@gnu.org>
+
+ * check/float.tst: Correct test case to match ppc also
+ converting positive infinity to 0x7fffffff.
+
+ * lib/jit_arm-swf.c: Correct typos with double underscores.
+
+ * lib/lightning.c: Correct remaining wrong reverse jump logic.
+
+2012-12-29 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/lightning.c: Correct both, wrong and confusing logic
+ to compute the reverse of a jump. Now it properly matches
+ C semantics for "eq" (==) and "ne" (!=) and correct computation
+ of reverse of "uneq" as "gt".
+
+ * check/branch.tst: Update "ne" float branch check that
+ previously happened to be wrongly tested with a NaN argument.
+
+2012-12-29 Paulo Andrade <pcpa@gnu.org>
+
+ * check/float.ok, check/float.tst: New test cases implementing
+ extensive validation of float comparison and branch code
+ generation as well as integer conversion, involving NaN and
+ [+-]Inf.
+
+ * lib/jit_arm-swf.c, lib/jit_x86-sse.c, lib/jit_x86-x87.c:
+ Correct bugs found by new float test case.
+
+ * lib/jit_x86.c: Correct cut&paste error added in commit to
+ convert jit_arg* return value to a jit_node_t*, that would
+ cause it to not properly handle double arguments in ix86.
+
+ * check/Makefile.am: Update for the new test case.
+
+2012-12-28 Paulo Andrade <pcpa@gnu.org>
+
+ * check/lightning.c, include/lightning.h, lib/jit_arm.c,
+ lib/jit_mips.c, lib/jit_ppc.c, lib/jit_print.c, lib/jit_x86.c,
+ lib/lightning.c: Change return value of jit_arg{,_f,_d} to
+ a jit_node_t* object, that should be used as argument to
+ jit_getarg_{c,uc,s,us,i,ui,l,f,d}. This just requires changing
+ from jit_int32_t to jit_pointer_t (or jit_node_t*) the "handle"
+ for the getarg calls, with the benefit that it makes it easy
+ to implement patching of the stack address of non register
+ arguments, this way allowing to implement variable size stack
+ frames if applicable; useful if there are too many registers and
+ jit functions uses only a few callee save registers.
+
+2012-12-27 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_arm.c, lib/jit_mips-cpu.c, lib/jit_mips.c: Correct
+ regressions when patching jit_calli for a forward function.
+
+ * lib/jit_ppc-cpu.c: Correct wrong arguments to ANDI opcode
+ in jit_getarg_u{c,s} implementation.
+
+2012-12-23 Paulo Andrade <pcpa@gnu.org>
+
+ * check/call.ok, check/call.tst: New test cases to validate
+ simple typed argument and return values in function calls.
+
+ * check/lightning.c: Properly handle jit_movi of labels for
+ backward and forward code labels.
+
+ * check/Makefile.am: Update for new test case.
+
+2012-12-23 Paulo Andrade <pcpa@gnu.org>
+
+ * check/carry.ok, check/carry.tst: New test case to validate
+ carry condition handling.
+
+ * check/Makefile.am: Update for new test case.
+
+2012-12-22 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ppc-cpu.c, lib/jit_ppc.c: Implement logic for
+ jit_htonr for big endian, so that ppc (big endian) pass the
+ new clobber.tst test case.
+
+2012-12-22 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_arm.c: Correct use of wrong argument offset
+ variable in armv7l or float/double argument for varargs
+ function in armv7hl.
+ Correct jit_getarg* logic in software float mode to
+ match expected behavior in other backends, that is, if
+ a function is not called, it is safe to use a few lightning
+ calls before a next jit_getarg* call, as done in the test
+ case check/stack.tst. The proper solution should be to
+ extend the parser in lib/lightning.c to check if there is
+ some float operation that will call some (libgcc?) function,
+ but software float arm should be a very uncommon backend for
+ lightning, so, just load the already in place arguments
+ saved to stack, assuming the register argument was clobbered
+ (what should not be the case most times...).
+
+2012-12-22 Paulo Andrade <pcpa@gnu.org>
+
+ * check/clobber.ok, check/clobber.tst: New test case doing
+ extensive validation tests to ensure registers not used in
+ a operation are not clobbered.
+
+ * check/Makefile.am: Update for new test case.
+
+2012-12-21 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/lightning.c: Partially rewrite/revert code to compute
+ initial register live state at the start of a basic block.
+ The original logic was corrupted when adding optimizations
+ to do as few computations as possible in jit_update. The
+ reglive field must be always a known set of live registers
+ at the start of a basic block. The value that was incorrect
+ was the regmask field, that must be the set of registers
+ that are in unknown state, because they are not known live,
+ neither set (or possibly not set) in the basic block, and
+ *must* store the state at the start of the basic block.
+
+2012-12-20 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_ppc.h: Correct mismatch of JIT_F{1,5}
+ with enum codes, that were correct, and returned by jit_f().
+
+ * lib/jit_ppc-cpu.c, lib/jit_ppc-fpu.c, lib/jit_ppc.c: Properly
+ implement and better describe values when generating stack
+ frames.
+
+2012-12-18 Paulo Andrade <pcpa@gnu.org>
+
+ * check/stack.ok, check/stack.tst: New files to test data
+ integrity on a deep chain of stack frames.
+
+ * lib/jit_arm.c, lib/jit_arm-cpu.c, lib/jit_mips.c,
+ lib/jit_mips-cpu.c, lib/jit_ppc.c, lib/jit_ppc-cpu.c,
+ lib/jit_x86.c, lib/jit_x86-cpu.c: Calculate _jit->function->stack
+ in the emit stage, otherwise it will calculate it wrong if
+ need to jit_allocai space to spill registers.
+
+ * lib/lightning.c: Correct wrong offset when updating the
+ "current" jit function pointer in the code that may need to
+ allocate stack space to spill registers.
+
+ * check/lightning.c: Correct off by one data space check.
+
+ * check/Makefile.am: Update for new test case.
+
+2012-12-17 Paulo Andrade <pcpa@gnu.org>
+
+ * check/fop_abs.ok, check/fop_abs.tst, check/fop_sqrt.ok,
+ check/fop_sqrt.tst: New files implementing simple test cases
+ for the extra float operations.
+
+ * check/Makefile.am: Update for new test cases.
+
+ * check/alu.inc: Add an extra macro to check for unordered
+ equality on tests where it is expected to use NaN as an
+ argument.
+
+ * check/lightning.c: Minor change for proper/common argument
+ syntax handling ommiting arguments to options.
+
+2012-12-17 Paulo Andrade <pcpa@gnu.org>
+
+ * check/Makefile.am: Automatically generate pattern list
+ of tests with alternate jit generation options. This should
+ prevent typos and needing to change multiple places after
+ a change.
+
+2012-12-14 Paulo Andrade <pcpa@gnu.org>
+
+ * check/lightning.c: Remove the ".cpu name value" syntax,
+ as it was not able to do proper changes before the jit
+ internal data structure was initialized. Now it supports
+ several getopt options to force using different jit
+ generation options, effectively replacing the previous
+ syntax.
+
+ * check/run-test: Add simple extra logic to handle differently
+ named test scripts, used to test things like x87 coprocessor
+ in ix86, and arm instruction set or software float in armv7l.
+
+ * configure.ac: Add some AC_RUN_IFELSE calls to figure at
+ compile time if can test different code generation options,
+ and update Makefile generation accordingly.
+
+ * check/Makefile.am, lib/jit_arm.c, lib/jit_x86.c: Update to
+ properly work with the test tool updating the jit_cpu global
+ information.
+
+ * check/check.arm.sh, check/check.swf.sh, check/check.x87.sh:
+ New wrapper files passing -mthumb=0, mvfp=0 and -mx87=1 to
+ the test tool, if applicable, so that it can validate alternate
+ code generation options on test hosts that support them.
+
+2012-12-14 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_x86-x87.c, lib/jit_x86.c: Correct test cases in ix86
+ when using the x87 coprocessor instead of sse2+.
+
+2012-12-14 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, include/lightning/jit_private.h,
+ lib/jit_arm.c, lib/jit_mips.c, lib/jit_ppc.c, lib/jit_x86.c,
+ lib/lightning.c: Make jit_ellipsis implementation not
+ backend specific. It is not intended to handle va_list
+ like objects at runtime, as jit_arg* and jit_getarg*
+ return constant values resolved at parse time, so, effectively
+ it is not possible to create printf like jit functions, as
+ there is no va_start, va_arg, va_end, etc, abstraction. This
+ limitation should be kept for the sake of making new ports
+ easier.
+
+2012-12-14 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, lib/lightning.c: Add two extra wrapper
+ functions to avoid need for excess pointer to/from word casts.
+
+ * check/lightning.c: Only need for pointer to/from word cast
+ now is jit_movi, update accordingly.
+
+2012-12-13 Paulo Andrade <pcpa@gnu.org>
+
+ * check/varargs.ok, check/varargs.tst: New test cases implementing
+ simple varargs calls with a large amount of arguments to exercise
+ excess arguments on stack.
+
+ * include/lightning.h: Include config.h if HAVE_CONFIG_H is
+ defined.
+
+ * lib/jit_arm.c: Allocate a fpr register, not a gpr one for
+ temporary when pushing varargs arguments in the stack.
+
+ * lib/jit_arm-swf.c: Correct code changing the wrong offset
+ in jit_absr_d and jit_negr_d in software float.
+
+ * lib/jit_mips.c: Correct calculation of offsets of arguments
+ on stack.
+
+ * lib/jit_ppc.c: Correct bogus logic for "next" offset of arguments
+ on stack and adjust for fixed offset of stack arguments.
+
+2012-12-12 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h, lib/jit_arm.c, lib/jit_mips.c,
+ lib/jit_ppc.c, lib/jit_x86.c, lib/lightning.c: Change jit_prepare
+ to no longer receive an argument. If receiving an argument, it
+ should be an ABI specifier, not a boolean if varargs or not,
+ and add the new jit_ellipsis call, to specify where the
+ ellipsis is in the C prototype of the function being called.
+ Note that currently it is not supported to define varargs
+ functions and it will be ignored if calling jit_ellipsis not
+ in a prepare/finish* block, but this should be addressed.
+
+ * check/allocai.tst, check/alu_add.tst, check/alu_and.tst,
+ check/alu_com.tst, check/alu_div.tst, check/alu_lsh.tst,
+ check/alu_mul.tst, check/alu_neg.tst, check/alu_or.tst,
+ check/alu_rem.tst, check/alu_rsh.tst, check/alu_sub.tst,
+ check/alu_xor.tst, check/alux_add.tst, check/alux_sub.tst,
+ check/bp.tst, check/branch.tst, check/cvt.tst, check/divi.tst,
+ check/fib.tst, check/ldsti.tst, check/ldstr-c.tst,
+ check/ldstr.tst, check/ldstxi-c.tst, check/ldstxi.tst,
+ check/ldstxr-c.tst, check/ldstxr.tst, check/rpn.tst,
+ check/lightning.c: Update for the change to jit_prepare and
+ addition of jit_ellipsis.
+
+2012-12-11 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ppc-cpu.c: Make movr a function that checks arguments
+ so that other code can safely assume it is a noop if src and dst
+ are the same register.
+ Implement rem{r,i}{,_u} as a div{,u}/mul/sub.
+ Correct ANDIS, ORIS and XORIS calls to cast the argument to
+ unsigned before the shift to avoid an assertion if the argument
+ had the topmost bit set.
+ Implement lshi, rshi and rshi_u as functions to test for a
+ zero argument, that would otherwise trigger an assertion when
+ computing the shift value.
+ Do a simple implementation of bm{s,c}{r,i} with a temporary,
+ "andr" of arguments and jump based on comparison with zero.
+ Correct typo in ldxi_c.
+
+ * lib/jit_ppc-fpu.c: Correct wrong arguments to FDIV* and STF*.
+
+ * lib/jit_ppc.c: Correct wrong check for 6 instead of 8 integer
+ arguments in registers. If calling a varargs function and
+ passing a float or double argument, also either store the
+ value in the stack or in integer registers, as varargs functions
+ do not fetch it from float registers.
+ Add "case" for new functions and incorrectly missing ones.
+ Call libgcc's __clear_cache, that should know what to do
+ if the hardware needs flushing cache before execution.
+
+ * lib/lightning.c: Do a simple/trivial logic in jit_regset_scan1,
+ that should make it easier for the compiler to optimize it, and
+ that also corrects the previously wrong code for big endian, and
+ that was causing problems in ppc due to not saving all callee save
+ registers as it was not "finding" them in the regset due to the
+ little endian assumption bug.
+
+2012-12-11 Paulo Andrade <pcpa@gnu.org>
+
+ * configure.ac: Only default to using the builtin disassembler
+ if on GNU/Linux. This should be temporary, due to requiring
+ /proc/self/exe.
+ Correctly check $target_cpu for powerpc.
+
+ * include/lightning/jit_ppc.h: Correctly implement jit_v_num.
+
+ * include/lightning/jit_private.h: Declare proper prototype
+ for jit_init_debug and jit_finish_debug.
+
+ * lib/jit_ppc-cpu.c: Remove code to save/restore callee save
+ float registers, as it is not required since those float
+ registers are not usable currently.
+ Change prolog and epilog generation to, at least comparing
+ code, match what gcc generates in "gcc -O0", but it is still
+ failing in Darwin PPC, apparently due to the __clear_cache
+ call not being enough, as frequently it will also fail to
+ execute, and the code buffer is all zeroes.
+
+ * lib/lightning.c: Do not fail in jit_regset_scan1 calls due
+ to passing 64 as argument on computers with 64 registers.
+
+2012-12-10 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_mips-cpu.c: Correct all current test cases.
+ Call the "xori" not the "XORI" macro for jit_xori implementation,
+ as the XORI macro handles only 16 bit unsigned values.
+ Call the "movr" macro, not the "movi" macro in the special
+ case of adding or subtracting zero.
+ Use the proper temporary register in the jit_andr implementation.
+
+2012-12-09 Paulo Andrade <pcpa@gnu.org>
+
+ * check/alu.inc, check/alu_add.ok, check/alu_add.tst,
+ check/alu_and.ok, check/alu_and.tst, check/alu_com.ok,
+ check/alu_com.tst, check/alu_div.ok, check/alu_div.tst,
+ check/alu_lsh.ok, check/alu_lsh.tst, check/alu_mul.ok,
+ check/alu_mul.tst, check/alu_neg.ok, check/alu_neg.tst,
+ check/alu_or.ok, check/alu_or.tst, check/alu_rem.ok,
+ check/alu_rem.tst, check/alu_rsh.ok, check/alu_rsh.tst,
+ check/alu_sub.ok, check/alu_sub.tst, check/alu_xor.ok,
+ check/alu_xor.tst, check/alux_add.ok, check/alux_add.tst,
+ check/alux_sub.ok, check/alux_sub.tst, check/branch.ok,
+ check/branch.tst: New test cases for arithmetic and branch
+ tests.
+
+ * check/Makefile.am: Update for new test cases.
+
+ * include/lightning/jit_private.h: Make the jit_reg_free_p
+ macro shared by all backends. Previously was added for the
+ arm backend, but is useful in the x86_64 backend when checking
+ state of "special purpose register".
+ Also add the new jit_class_named register class, that must be
+ or'ed with the register value if calling jit_get_reg expecting
+ an specific value, because the specific register value may be
+ zero, that previously was treated as no register requested.
+
+ * lib/jit_arm-cpu.c: Correct argument order for T2_MVN.
+
+ * lib/jit_arm-swf.c: Call the proper function for double
+ divide. The "software float" implementation just calls
+ libgcc functions.
+
+ * lib/jit_arm.c: Return float/double values in the float
+ register if using the hard float ABI.
+
+ * lib/jit_x86-cpu.c: Change the can_sign_extend_int_p macro
+ to not include -0x80000000L, because there is code that
+ "abuses" it and thinks it can negate the immediate value
+ after calling that macro.
+ Correct implementation of jit_subi that had a wrong code
+ patch logic doing subtraction with reversed arguments.
+ Correct REX prefix calculation in the jit_muli implementation.
+ Correct logic to get/unget %*ax and %*dx registers in divremr
+ and divremi.
+ Correct divremi that was using the symbolic, unique %*ax
+ value in on place (not using the _REGNO name suffix).
+ Correct cut&paste error causing it to use "xor" instead of
+ "or" in one code path of the jit_ori implementation.
+ Correct several flaws when clobbering registers and/or when
+ one of the arguments was %*cx in the rotshr wrapper function
+ implementing most shift operations.
+
+ * lib/lightning.c: No longer expect that the backend be smart
+ enough to know what to do when asking for a named register
+ if that register is already an argument or is live. It fails
+ if it is an argument, or if register is live, fails if cannot
+ spill.
+ No longer incorrectly assume that eqr_{f,d} and ltgr_{f,d} are
+ safe to inverse value tests in jump thread optimization.
+
+2012-12-05 Paulo Andrade <pcpa@gnu.org>
+
+ * check/Makefile.am, check/cvt.ok, check/cvt.tst: Add new
+ "cvt" test case to test conversion from/to int/float types.
+
+ * check/lightning.c: Only define truncr_{f,d}_l in 64 bit mode.
+
+ * include/lightning.h: Correct typo that caused it to define
+ jit_truncr_{f,d}_l in 32 bit mode.
+
+ * lib/jit_arm-cpu.c: Avoid assertion failure in the signed/unsigned
+ extend opcodes generation as it shares an interface for 3 argument
+ opcode generation.
+
+ * lib/jit_x86-cpu.c: Correct wrong argument passed to
+ jit_unget_reg in the andi implementation and wrong byte
+ unsigned extend code generation.
+
+ * lib/jit_x86-sse.c: Correct conversion from "word" to float or
+ double as is dependent on wordsize.
+
+2012-12-05 Paulo Andrade <pcpa@gnu.org>
+
+ * check/ldstr-c.ok, check/ldstr-c.tst, check/ldstxi-c.ok,
+ check/ldstxi-c.tst, check/ldstxr-c.ok, check/ldstxr-c.tst:
+ New test case files testing load clobbering the base and/or
+ index register;
+
+ * check/ldst.inc: New file with common definition for all the
+ ldst* test cases.
+
+ check/Makefile.am, check/ldsti.tst, check/ldstr.tst,
+ check/ldstxi.tst, check/ldstxr.tst: Update for new common
+ definitions file and new register clobber ldst tests.
+
+2012-12-05 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_mips-fpu.c: Correct wrong register order in stxr_{f,d}
+ in the mips backend.
+
+2012-12-05 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_arm-vfp.c: Correct regression found in armv7l with
+ latest test cases.
+
+2012-12-05 Paulo Andrade <pcpa@gnu.org>
+
+ * check/ldstxi.tst, check/ldstxr.tst: Correct wrong argument
+ order for 32 bit mode tests.
+
+ * configure.ac: Correct check for ix86 target_cpu.
+
+2012-12-05 Paulo Andrade <pcpa@gnu.org>
+
+ * check/ldstr.ok, check/ldstr.tst, check/ldsti.ok,
+ check/ldsti.tst, check/ldstxr.ok, check/ldstxr.tst,
+ check/ldstxi.ok, check/ldstxi.tst:
+ New test case files exercising a very large amount of
+ register combinations to verify load/store implementation.
+
+ * check/Makefile.am: Update for new test cases.
+
+ * lib/jit_x86-cpu.c: Correct wrong argument order when
+ computing REX prefix for {ld,st}r_T codes;
+
+2012-12-04 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_mips-fpu.c, lib/jit_mips.c: Implement missing mips
+ jit_sqrtr_{f,d} codes.
+
+ * check/all.tst, include/lightning.h, lib/jit_print.c: Change
+ declaration order and call order in all.tst of {add,sub}c and
+ {add,sub}x. *c must be called before to set the carry and *x
+ second to use the carry and keep it set. The wrong call order
+ was causing all.tst to fail in mips, where a register is
+ allocated to keep a global carry state.
+
+2012-12-04 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_mips.h, lib/jit_mips-cpu.c,
+ lib/jit_mips-fpu.c, lib/jit_mips.c: Correct float/double
+ argument handling and make the mips backend pass the initial
+ test cases.
+
+ * include/lightning.h, ib/jit_print.c, lib/lightning.c:
+ Add extra enum values for argument handling functions that
+ could not be abstracted to the current codes, that is, when
+ float values need to move from/to gpr from/to fpr. It would
+ be more tempting to add such primitives, but they would have
+ wordsize limitations, and it is not expected to add codes
+ with one gpr argument for 64 bit and two for 32 bit.
+
+ * lib/jit_ppc.c: Check _jit->function before calling jit_epilog()
+ to avoid a runtime exception.
+
+2012-12-04 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_mips.h, lib/jit_mips.c: Update to
+ make the mips backend compile in a qemu image.
+
+ * lib/jit_ppc.c: Minor adaptations to help in having the
+ ppc backend compilable.
+
+2012-12-03 Paulo Andrade <pcpa@gnu.org>
+
+ * configure.ac, include/lightning/jit_private.h, lib/jit_arm-cpu.c,
+ lib/jit_arm-swf.c, lib/jit_arm.c, check/Makefile.am: Correct
+ implementation of the arm backend port to build and pass the
+ current test cases. Tested on armv7 with softfp abi.
+
+ * lib/jit_disasm.c: Rename and change prototype of static
+ disassemble function as in the arm backend it is required
+ to access state information stored in the jit_state_t object.
+
+ * check/3to2.tst, check/add.tst: Correct test case code assuming
+ JIT_RO and JIT_RET are the same, and even if they are the same,
+ the logic was incorrect because it must always call jit_retval*
+ to fetch a function call return before any other instruction.
+ The arm backend hash a special condition if jit_retval is not
+ called, because "r0" is not JIT_R0, but is JIT_RET and *also*
+ the first argument for a called function, so JIT_RET must be
+ only used as an argument to jit_retval.
+
+2012-12-03 Paulo Andrade <pcpa@gnu.org>
+
+ * check/all.tst, check/lightning.c: Only declare or use 64 bit
+ interfaces on 64 bit builds.
+
+ * check/fib.tst: Use simpler logic to not need preprocessor
+ conditionals for 32 or 64 bit.
+
+ * include/lightning.h: Only declare 64 bit macros on a 64 bit
+ build. Code using lightning must know about wordsize and the
+ jit generation limitations, also, this way it generates a
+ compile time failure, not a runtime assertion.
+
+ * include/lightning/jit_x86.h: Correct typo in macro name.
+
+ * lib/jit_arm.c, lib/jit_arm-cpu.c, lib/jit_mips.c,
+ lib/jit_mips-cpu.c, lib/jit_ppc.c, lib/jit_ppc-cpu.c,
+ lib/jit_x86.c, lib/jit_x86-cpu.c: Correct wrong code to get
+ current jit function pointer.
+
+ * lib/lightning.c: Move call to the simplify() optimization
+ to after register liveness is known. Previous code did work
+ by accident but now with proper test cases the problem was
+ noticed.
+
+ * lib/jit_disasm.c: Always cast bfd_vma to long long when
+ passing it as printf argument.
+
+2012-12-03 Paulo Andrade <pcpa@gnu.org>
+
+ * configure.ac, check/Makefile.am, check/check.sh,
+ doc/Makefile.am, include/lightning/Makefile.am,
+ lib/Makefile.am: Correct make distcheck.
+
+2012-12-02 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_ppc.c: Assign copyright ownership to FSF.
+
+ * lib/jit_x86-cpu.c: Correct integer multiplication that was
+ generating code with reversed register arguments.
+
+ * check/rpn.ok, check/rpn.tst: New test case file.
+
+2012-12-02 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_x86-cpu.c, lib/jit_x86-sse.c, lib/jit_x86-x87.c:
+ Actually change copyright owner to FSF as avertised.
+
+ * lib/jit_arm-cpu.c, lib/jit_arm-swf.c,
+ lib/jit_arm-vfp.c, lib/jit_arm.c,
+ lib/jit_mips-cpu.c, lib/jit_mips-fpu.c, lib/jit_mips.c,
+ lib/jit_ppc-cpu.c, lib/jit_ppc-fpu.c, lib/jit_ppc.c: New
+ files implementing initial code different jit backends.
+
+ * include/lightning/jit_private.h: Add extra field to the
+ private jit_patch_t type, required by the arm port.
+
+ * lib/Makefile.am: Update for the new backend implementation
+ files.
+
+2012-12-02 Paulo Andrade <pcpa@gnu.org>
+
+ * check/Makefile.am: Add proper "make clean" rule and missing
+ check.sh to EXTRA_DIST.
+
+2012-12-02 Paulo Andrade <pcpa@gnu.org>
+
+ * .gitignore: Update pattern of ignored files.
+
+ * check/Makefile.am: Add rule to build liblightning.la dependency
+ in case of running "make check" before building the library.
+
+2012-12-02 Paulo Andrade <pcpa@gnu.org>
+
+ * lightning/Makefile.am, lightning/asm-common.h,
+ lightning/core-common.h, lightning/fp-common.h,
+ lightning/funcs-common.h, lightning/i386/Makefile.frag,
+ lightning/i386/asm-32.h, lightning/i386/asm-64.h,
+ lightning/i386/asm.h, lightning/i386/core-32.h,
+ lightning/i386/core-64.h, lightning/i386/core.h,
+ lightning/i386/fp-32.h, lightning/i386/fp-64.h,
+ lightning/i386/fp.h, lightning/i386/funcs.h,
+ lightning/ppc/asm.h, lightning/ppc/core.h,
+ lightning/ppc/fp.h, lightning/ppc/funcs.h,
+ lightning/sparc/asm.h, lightning/sparc/core.h,
+ lightning/sparc/fp.h, lightning/sparc/funcs.h:
+ Removed. The core logic is used in the new code, and new mips
+ and arm ports will be added. At first, sparc will not be
+ supported as it has not yet been ported to the new engine.
+
+2012-12-02 Paulo Andrade <pcpa@gnu.org>
+
+ * tests/Makefile.am, tests/3to2.c, tests/3to2.ok, tests/add.c,
+ tests/add.ok, tests/allocai.c, tests/allocai.ok, tests/bp.c,
+ tests/bp.ok, tests/divi.c, tests/divi.ok, tests/fib.c, tests/fib.ok,
+ tests/fibdelay.c, tests/fibdelay.ok, tests/fibit.c, tests/fibit.ok,
+ tests/funcfp.c, tests/funcfp.ok, tests/incr.c, tests/incr.ok,
+ tests/ldst.c, tests/ldst.ok, tests/ldxi.c, tests/ldxi.ok,
+ tests/modi.c, tests/modi.ok, tests/movi.c, tests/movi.ok,
+ tests/printf.c, tests/printf.ok, tests/printf2.c, tests/printf2.ok,
+ tests/ret.c, tests/ret.ok, tests/rpn.c, tests/rpn.ok, tests/rpnfp.c,
+ tests/rpnfp.ok, tests/sete.c, tests/sete.ok, tests/testfp.c,
+ tests/testfp.ok, tests-run-test: Removed previous test suite, in
+ favor of a newer one in the check subdirectory.
+
+ * check/3to2.ok, check/3to2.tst, check/add.ok, check/add.tst,
+ check/allocai.ok, check/allocai.tst, check/bp.ok, check/bp.tst,
+ check/divi.ok, check/divi.tst, check/fib.ok, check/fib.tst:
+ New sample input for the new test program, loosely matching
+ several of the previous test cases.
+
+ * check/Makefile.am: New test suite makefile.
+
+ * check/check.sh, check/run-test: New wrapper files for the
+ new test suite.
+
+ * check/lightning.c: New file. The main driver of the new test
+ suite, that compiles to a parser of a very simple assembly like
+ language, generates jit and executes it.
+
+ * check/all.tst: New file. A generic debug and sample test file
+ with a directive to prevent it from being executed, and useful to
+ read disassembly of all possible instructions, using a fixed set
+ of registers.
+
+ * include/Makefile.am, include/lightning.h,
+ include/lightning/Makefile.am, include/lightning/jit_arm.h,
+ include/lightning/jit_mips.h, include/lightning/jit_ppc.h,
+ include/lightning/jit_private.h, include/lightning/jit_x86.h,
+ lib/Makefile.am, lib/jit_disasm.c, lib/jit_print.c,
+ lib/jit_x86-cpu.c, lib/jit_x86-sse.c, lib/jit_x86-x87.c,
+ lib/jit_x86.c, lib/lightning.c: New files. These files are
+ written from scratch, only by <pcpa@gnu.org>, and have now
+ copyright assignment to the FSF. This is the core of the new
+ lightning rework. Previously it was integrated in code with
+ a garbage collector and several custom types like vectors and
+ hash tables, so this first code merge with lightning converts
+ that code into a library extracting only the jit bits, and at
+ first only for x86_64 GNU/Linux.
+
+ * lightning.h, m4/lightning.m4: Removed. These are no longer
+ required in the new lightning code.
+
+ .gitignore, Makefile.am, configure.ac: Update for the new
+ lightning code.
+
+2012-12-02 Paulo Andrade <pcpa@gnu.org>
+ * .cvsignore: Removed for extra cleanup.
+
+ * build-aux: Rename directory to m4.
+
+ * m4: Renamed to "default" name and for consistency with merge
+ with code rework to be imported in lightning.
+
+ * .gitignore, configure.ac, Makefile.am, doc/Makefile.am:
+ Update for build-aux to m4 rename.
+
+2012-12-01 Paulo Andrade <pcpa@gnu.org>
+
+ * opcode/Makefile.am, opcode/Makefile.in, opcode/ansidecl.h,
+ opcode/bfd.h, opcode/dis-asm.h, opcode/dis-buf.c, opcode/disass.c,
+ opcode/i386-dis.c, opcode/i386.h, opcode/ppc-dis.c, opcode/ppc-opc.c,
+ opcode/ppc.h, opcode/sparc-dis.c, opcode/sparc-opc.c, opcode/sparc.h,
+ opcode/sysdep.h: Removed. Do not bundle GNU binutils files.
+
+ * aclocal.m4, configure, Makefile.in, config.h.in, doc/Makefile.in,
+ lightning/Makefile.in, tests/Makefile.in: Removed. Do not maintain
+ autogenerated files that also generate too much diff noise when
+ regenerated in git.
+
+ * build-aux/help2man, build-aux/texinfo.tex, build-aux/texi2dvi:
+ Removed. Buildenvironment must have an up to date version from
+ upstream installed.
+
+ * build-aux/config.guess, build-aux/config.sub, build-aux/depcomp,
+ build-aux/install-sh build-aux/mdate-sh build-aux/missing: Removed.
+ Do not maintain a copy of automake files in git. Release tarballs
+ must use an up to date version.
+
+ * lightningize.in, doc/lightningize.1: Removed. Do not encourage
+ bundling lightning in other packages. It should use a system package
+ or a proper thirdy part subdirectory.
+
+ * INSTALL: Removed. Autoreconf removes it and creates a symlink
+ when regenerating files, so, avoid conflicts in git and let
+ automake create the symlink.
+
+ * .gitignore: Add INSTALL and autogenerated files.
+
+ * configure.ac, Makefile.am: Update for removal of opcode subdir,
+ auto generated files and lightningize.
+
+ * tests/Makefile.am, tests/3to2.c, tests/add.c, tests/bp.c,
+ tests/fib.c, tests/fibdelay.c, tests/fibit.c, tests/funcfp.c,
+ tests/incr.c, tests/printf.c, tests/rpn.c, tests/rpnfp.c,
+ tests/sete.c, tests/testfp.c: Update for removal of opcode subdir.
+
+ * doc/Makefile.am: Update for removal of lightningize.
+
+ * configure.ac, lightning/ppc/funcs.h, lightning/sparc/funcs.h,
+ lightning/i386/fp.h, lightning/i386/core.h, lightning/i386/asm.h,
+ tests/3to2.c, tests/add.c, tests/bp.c, tests/fib.c, tests/fibdelay.c,
+ tests/fibit.c, tests/funcfp.c, tests/incr.c, tests/printf.c,
+ tests/rpn.c, tests/rpnfp.c, tests/sete.c, tests/testfp.c:
+ Remove LIGHTNING_CROSS, it is half supported and incomplete.
+
+ * tests/3to2.c, tests/funcfp.c, tests/rpnfp.c: Remove preprocessor
+ check on JIT_FPR. If no hardware registers are available, the backend
+ must provide an alternative for software float.
+
+ * lightning/ppc/core.h, lightning/sparc/core.h, tests/Makefile.am:
+ Remove JIT_NEED_PUSH_POP. It is absolutely not trivial to implement
+ properly on some backends due to stack alignment constraints, and
+ whenever it is required, using jit_allocai and using a properly
+ aligned stack vector, or a heap buffer, is better.
+
+ * tests/push-pop.c, tests/push-pop.ok: Removed due to
+ JIT_NEED_PUSH_POP no longer available.
+
+2011-02-28 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core-64.h: Add jit_add{c,x}{i,r}_l, jit_mulr_{l,ul}_,
+ fix jit_mul{i,r}_{l,ul}.
+
+2010-08-20 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/fp-64.h: Return patch address from jit_bXYr_{f,d}.
+ Reported by Paulo César Pereira de Andrade.
+ * lightning/ppc/fp.h: Likewise.
+ * lightning/sparc/fp.h: Implement FP branches.
+
+2010-08-18 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/fp-64.h: Fix jp in jit_bner_{f,d}.
+
+2010-08-18 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/fp-32.h: Fix -D_ASM_SAFETY compilation.
+ Reported by Paulo César Pereira de Andrade.
+
+2010-08-15 Paolo Bonzini <bonzini@gnu.org>
+
+ * tests/ldst.c: Update.
+ * tests/Makefile.am: Use -ffloat-store to compile it.
+
+2010-08-15 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core.h (jit_ldr_c, jit_ldxr_c, jit_ldr_s,
+ jit_ldxr_s): Move...
+ * lightning/i386/core-32.h: ... here.
+ * lightning/i386/core-64.h (jit_ldr_c, jit_ldxr_c, jit_ldr_s,
+ Use movsbq and movswq.
+
+2010-08-10 Paulo César Pereira de Andrade <pcpa@mandriva.com.br>
+
+ * lightning/i386/core-32.h (jit_replace): Use MOVLrr, not MOVLir.
+ (jit_movbrm): Check index register as well.
+ * lightning/i386/fp-64.h: Add jit_extr_f_d and jit_extr_d_f.
+ * lightning/fp-common.h: Add jit_extr_f_d and jit_extr_d_f.
+
+2010-07-28 Paolo Bonzini <bonzini@gnu.org>
+
+ * tests/Makefile.am: Add ldst test.
+ * tests/Makefile.in: Regenerate.
+ * tests/ldst.c: New.
+ * tests/ldst.ok: New.
+
+2010-07-28 Paolo Bonzini <bonzini@gnu.org>
+
+ * THANKS: Add Paulo Cesar Pereira de Andrade.
+ * doc/porting.texi: Fix ordering of arguments in jit_stxi.
+ * lightning/i386/core-32.h (jit_replace): Remove cmp argument.
+ * lightning/i386/fp-64.h (jit_movi_f): Fix.
+
+2010-07-26 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core-32.h (jit_replace): Move here (removed
+ 2009-03-01).
+
+2010-07-19 Paolo Bonzini <bonzini@gnu.org>
+
+ * build-aux/lightning.m4: Always set and replace lightning_frag.
+ * Makefile.in: Regenerate.
+ * aclocal.m4: Regenerate.
+ * config.h.in: Regenerate.
+ * configure: Regenerate.
+ * doc/Makefile.in: Regenerate.
+ * doc/lightningize.1: Regenerate.
+ * doc/version.texi: Regenerate.
+ * lightning/Makefile.in: Regenerate.
+ * opcode/Makefile.in: Regenerate.
+ * tests/Makefile.in: Regenerate.
+
+2009-03-01 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core-64.h: Use Mike's macros for x86-64 too.
+ * lightning/i386/core.h: Remove jit_replace.
+
+ 2009-02-27 Mike Spivey <mike@comlab.ox.ac.uk>
+
+ * lightning/i386/core.h: Rewrite shift-handling macros.
+ * lightning/fp-common.h: Fix jit_extr_{f_d,d_f}.
+
+2009-02-17 Mike Spivey <mike@comlab.ox.ac.uk>
+
+ * lightning/i386/core.h: Fix blunder in operand order.
+
+2009-02-17 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/fp-32.h: Another fix to jit_fp_btest.
+
+2009-02-17 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/fp-common.h: Define double branches if missing.
+ * lightning/i386/asm.h: Define JC and JNC mnemonics.
+ * lightning/i386/fp-32.h: Fix jit_fp_btest. All reported
+ by Mike Spivey.
+
+2008-10-09 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/ppc/funcs.h (jit_flush_code): Subtract 1 from end.
+ Reported by Eli Barzilay and Matthew Flatt.
+
+2008-08-23 Nix <nix@esperi.org.uk>
+
+ * lightning/i386/Makefile.frag: fp-32.h and fp-64.h are target files.
+
+2008-07-02 Laurent Michel <ldm@engr.uconn.edu>
+
+ * lightning/ppc/funcs.h (jit_flush_code): modified the computation
+ of start/end. The pointer arithmetic was done without casting. It
+ prevented compilation with recent gcc versions.
+ * lightning/ppc/core.h (jit_pushr_i): The offset for the store was
+ incorrect. Should have been 4 bytes below SP (not above).
+ * lightning/ppc/core.h (jit_popr_i): The offset for the load was
+ incorrect. Should have been 0 (not +8).
+
+2008-06-17 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/asm-64.h: Forward IMULQir to IMULQirr,
+ fix REXQ order for IMULQirr.
+
+2008-06-17 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core.h: Fix _rN vs. _rR.
+
+2008-06-16 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core.h: Use jit_save in jit_replace. Move JIT_R
+ definition...
+ * lightning/i386/core-32.h: ... here; define jit_save so that
+ the core.h has no effect on the 32-bit backend.
+ * lightning/i386/core-64.h: Place JIT_R1/JIT_R2 in R10/R11,
+ place outgoing arguments in the right spot from the beginning,
+ define jit_save, fix jit_reg8/jit_reg16.
+
+2008-06-15 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core-64.h: Rewrite argument passing to
+ support up to 6 arguments and generate less code.
+
+2008-06-14 Laurent Michel <ldm@thorgal.homelinux.org>
+
+ * lightning/i386/core-64.h (jit_movi_l): When the operand is 0,
+ the XOR should be on a quadword.
+ * lightning/i386/core-64.h (jit_prolog): Keep 16-byte stack
+ alignment.
+ (jit_ret): Always use LEAVE.
+
+2008-06-13 Laurent Michel <ldm@thorgal.homelinux.org>
+
+ * lightning/i386/core-64.h: Add (void) casts for C++ compatibility.
+ * lightning/i386/asm.h: Likewise.
+
+2008-06-12 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core.h: Move JIT_V definition...
+ * lightning/i386/core-32.h: ... here.
+ * lightning/i386/core-64.h: ... and here. Avoid dancing between
+ RSI/RDI and R12/R13, and place JIT_V1/JIT_V2 in R12/R13.
+
+2008-06-11 Paolo Bonzini <bonzini@gnu.org>
+
+ * build-aux/lightning.m4: Adjust LIGHTNING_BACKENDS, don't
+ use suffix support to distinguish i386/x86_64.
+ * lightning/i386/Makefile.frag: Use LIGHTNING_TARGET_FILES
+ to distribute *-32.h and *-64.h files now.
+ * lightning/i386/asm-i386: Moved to...
+ * lightning/i386/asm.h: Include the appropriate subtarget file.
+ * lightning/i386/core-i386: Moved to...
+ * lightning/i386/core.h: Include the appropriate subtarget file.
+ * lightning/i386/fp.h: New, include the appropriate subtarget file.
+ * lightning/i386/asm-32: Do not include asm-i386.h.
+ * lightning/i386/asm-64.h: Likewise.
+ * lightning/i386/core-32: Do not include core-i386.h.
+ * lightning/i386/core-64.h: Likewise.
+ * lightning/Makefile.am: Adjust for renamed files.
+
+ * configure.ac: Define LIGHTNING_TARGET here.
+ * opcode/disass.c: Change list of valid LIGHTNING_TARGET values.
+
+ * lightningize.in: Robustify against missing subtarget files.
+
+2008-06-11 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core-32.h: Use MOVLir instead of jit_movi_l
+ to implement jit_movi_p.
+
+2008-06-11 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core-32.h: Use separate __APPLE__ and SysV
+ prolog/ret macros. Subtract 12 bytes in __APPLE__ case to
+ keep stack aligned, and always use LEAVE in the epilog.
+
+2008-06-11 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core-i386.h: Fix C++ incompatibility.
+
+2008-06-10 Laurent Michel <ldm@engr.uconn.edu>
+
+ * lightning/i386/core-i386.h: Fix jit_replace8 for
+ case when one of the operands is _EAX.
+
+2008-05-19 Paolo Bonzini <bonzini@gnu.org>
+
+ * tests/run-test: Avoid CRLF issues on mingw.
+
+2008-03-21 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core-64.h: Fix jit_{ld,st}{,x}i_{i,l}.
+ Remove jit_ld{,x}i_ul.
+ * lightning/core-common.h: Make jit_ld{,x}{i,r}_ul
+ always a synonym of the _l variant.
+ * doc/porting.texi: Document this.
+
+2008-03-19 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core-64.h: Fix uses of jit_qop_.
+
+2008-03-19 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core-64.h: Add boolean operations.
+
+2008-03-19 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/asm-64.h: Add LEAQmr.
+
+2008-03-19 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core-64.h: Misc bugfixes.
+
+2008-03-19 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core-i386.c: Remove jit_ldr_i, jit_ldxr_i.
+ * lightning/i386/core-32.h: Add jit_ldr_i, jit_ldxr_i.
+ * lightning/i386/core-64.h: Add jit_ld{r,xr,i,xi}_{ui,l,ul};
+ move jit_ldr_i, jit_ldxr_i, jit_str_l, jit_stxr_l with others.
+
+2008-03-19 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/asm-common.h: Add _s32P.
+
+2008-03-19 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core-64.h: Implement long mul/div/mod.
+
+2008-03-19 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/asm-i386.h: Cast memory address to long for JCCim.
+
+2008-03-15 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/asm-common.h: Add underscores around __unused__
+ attribute.
+
+2008-03-15 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/ppc/core.h: Avoid some "value computed is not used"
+ warnings.
+ * lightnings/tests/allocai.c: Silence other warnings.
+
+2008-03-14 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightningize.in: Fix some problems (not all).
+
+2008-03-14 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core-32.h: Avoid some "value computed is not used"
+ warnings; reported by Sam Steingold.
+
+2008-03-08 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core-32.h: Fix stxr_c(_EAX, _EBX, _ESI).
+
+2008-02-13 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/asm-32.h: Avoid redefinition of _r1, reported by
+ Sam Steingold.
+ * lightning/i386/asm-64.h: Likewise.
+
+2008-02-08 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/asm-i386.h: Don't define _VOID, reported
+ by Reini Urban.
+
+2008-02-03 Paolo Bonzini <bonzini@gnu.org>
+
+ * build-aux/lightning.m4: Add --with-lightning-prefix option, suggested
+ by Sam Steingold.
+
+2008-01-14 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core-64.h: Use CALLsr, not CALLLsr.
+
+2008-01-13 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core-i386.h: Move jit_calli and jit_callr...
+ * lightning/i386/core-32.h: ... here.
+ * lightning/i386/core-64.h: Redefine them.
+
+2008-01-05 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/fp-32.h: Fix sub(a,0,a).
+ * lightning/tests/3to2.c: Add new testcases.
+ * lightning/tests/3to2.ok: Add new testcases.
+
+2008-01-02 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/fp-32.h: Fix sub(a,b,a) with a ~= JIT_FPR0.
+ * lightning/tests/3to2.c: New.
+ * lightning/tests/3to2.ok: New.
+
+2007-11-07 Paolo Bonzini <bonzini@gnu.org>
+
+ * opcode/Makefile.am: Fix AM_CPPFLAGS.
+
+2007-08-12 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core-i386.h: Improve encoding of set* instructions.
+ * lightning/i386/core-64.h: Fix jit_bra_l.
+ * tests/sete.c: New.
+ * tests/sete.ok: New.
+
+2007-06-29 Paolo Bonzini <bonzini@gnu.org>
+
+ * tests/bp.c: Upgrade to GPL/LGPLv3.
+ * lightning/i386/asm-32.h: Upgrade to GPL/LGPLv3.
+ * lightning/i386/asm-64.h: Upgrade to GPL/LGPLv3.
+ * lightning/i386/core-32.h: Upgrade to GPL/LGPLv3.
+ * lightning/i386/core-64.h: Upgrade to GPL/LGPLv3.
+ * lightning/i386/fp-64.h: Upgrade to GPL/LGPLv3.
+ * lightning/sparc/asm.h: Upgrade to GPL/LGPLv3.
+ * lightning/sparc/core.h: Upgrade to GPL/LGPLv3.
+ * lightning/sparc/fp.h: Upgrade to GPL/LGPLv3.
+ * lightning/sparc/funcs.h: Upgrade to GPL/LGPLv3.
+ * lightning/i386/asm-i386.h: Upgrade to GPL/LGPLv3.
+ * lightning/i386/core-i386.h: Upgrade to GPL/LGPLv3.
+ * lightning/i386/fp-32.h: Upgrade to GPL/LGPLv3.
+ * lightning/i386/funcs.h: Upgrade to GPL/LGPLv3.
+ * lightning/ppc/asm.h: Upgrade to GPL/LGPLv3.
+ * lightning/ppc/core.h: Upgrade to GPL/LGPLv3.
+ * lightning/ppc/fp.h: Upgrade to GPL/LGPLv3.
+ * lightning/ppc/funcs.h: Upgrade to GPL/LGPLv3.
+ * lightning.h: Upgrade to GPL/LGPLv3.
+ * tests/add.c: Upgrade to GPL/LGPLv3.
+ * tests/fib.c: Upgrade to GPL/LGPLv3.
+ * tests/testfp.c: Upgrade to GPL/LGPLv3.
+ * tests/fibdelay.c: Upgrade to GPL/LGPLv3.
+ * tests/fibit.c: Upgrade to GPL/LGPLv3.
+ * tests/funcfp.c: Upgrade to GPL/LGPLv3.
+ * tests/incr.c: Upgrade to GPL/LGPLv3.
+ * tests/printf.c: Upgrade to GPL/LGPLv3.
+ * tests/printf2.c: Upgrade to GPL/LGPLv3.
+ * tests/rpn.c: Upgrade to GPL/LGPLv3.
+ * tests/rpnfp.c: Upgrade to GPL/LGPLv3.
+ * lightning/asm-common.h: Upgrade to GPL/LGPLv3.
+ * lightning/core-common.h: Upgrade to GPL/LGPLv3.
+ * lightning/fp-common.h: Upgrade to GPL/LGPLv3.
+ * lightning/funcs-common.h: Upgrade to GPL/LGPLv3.
+ * opcode/dis-buf.c: Upgrade to GPL/LGPLv3.
+ * opcode/disass.c: Upgrade to GPL/LGPLv3.
+ * opcode/i386-dis.c: Upgrade to GPL/LGPLv3.
+ * opcode/sparc-dis.c: Upgrade to GPL/LGPLv3.
+ * opcode/sparc-opc.c: Upgrade to GPL/LGPLv3.
+ * lightningize.in: Upgrade to GPL/LGPLv3.
+ * opcode/bfd.h: Upgrade to GPL/LGPLv3.
+ * opcode/i386.h: Upgrade to GPL/LGPLv3.
+ * opcode/sparc.h: Upgrade to GPL/LGPLv3.
+
+2007-01-26 Thomas Girard <thomas.g.girard@free.fr>
+
+ * lightning/Makefile.am: Add clean-local target.
+
+2006-12-02 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/asm-i386.h: Add CVTTS?2SIL.
+ * lightning/i386/asm-64.h: Add CVTTS?2SIQ.
+ * lightning/i386/fp-64.h: Use it.
+
+ * lightning/Makefile.am: Place files in nodist_lightning_HEADERS.
+
+2006-11-23 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/core-common.h: Add casts in "*i_p" variants.
+ * lightning/i386/asm-32.h: Add _r1.
+ * lightning/i386/asm-64.h: Likewise, and add SSE instructions.
+ * lightning/i386/asm-i386.h: Merge SSE instructions from Gwenole.
+ Use short form for 16-bit AX instructions. Remove _r1
+ * lightning/i386/core-64.h: Add FP ABI support in its infancy.
+ * lightning/i386/core-i386.h: Move jit_arg_f and jit_arg_d...
+ * lightning/i386/core-32.h: ... and jit_prepare_f and jit_prepare_d...
+ * lightning/i386/fp-32.h: ... here.
+ * lightning/i386/fp-64.h: Write the code.
+ * lightning/sparc/fp.h: Fix jit_extr_{f_d,d_f} register order.
+
+2006-11-22 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/asm-i386.h: Move x86-64 instructions...
+ * lightning/i386/asm-64.h: ... here.
+ * lightning/i386/fp-32.h: Fix bugfixes worked around in froofyJIT.
+ Add JIT_FPRET.
+ * lightning/sparc/fp.h: Likewise.
+ * lightning/ppc/fp.h: Likewise.
+ * lightning/fp-common.h: Adjust for JIT_FPRET.
+ * tests/funcfp.c: Adjust for JIT_FPRET.
+ * tests/rpnfp.c: Adjust for JIT_FPRET.
+
+2006-11-20 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/asm-i386.h: Add an underscore to macros without
+ a parameter.
+
+2006-11-20 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core-i386.h: Move jit_movip, jit_check8, jit_reg8,
+ jit_reg16, jit_movbrm...
+ * lightning/i386/core-32.h: ... here.
+ * lightning/i386/core-64.h: Redefine them. Fix other bugs.
+
+ * tests/printf.c: Do not do a varargs call.
+
+2006-11-20 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/asm-i386.h: Check in rewrite from Basilisk II.
+ * lightning/i386/asm-32.h: Adjust.
+ * lightning/i386/asm-64.h: Adjust.
+ * lightning/i386/fp-32.h: Adjust.
+
+ * lightning/i386/core-32.h: Adjust. Add jit_{ld,ldx,st,stx}i*.
+ * lightning/i386/core-64.h: Adjust. Add jit_{ld,ldx,st,stx}i*.
+ * lightning/i386/core-i386.h: Adjust. Remove these patterns.
+
+2006-11-20 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/asm-i386.h: Merge 64-bit cleanliness changes from
+ mzscheme.
+ Add SSE.
+ * lightning/i386/asm-64.h: Likewise.
+
+2006-11-20 Paolo Bonzini <bonzini@gnu.org>
+ Ludovic Courtes <ludo@chbouib.org>
+
+ * lightning/i386/core-32.h: Disable jit_push and jit_pop if stack not
+ needed.
+ * lightning/i386/core-64.h: Disable jit_push and jit_pop if stack not
+ needed.
+ * lightning/sparc/core.h: Merge final implementation of jit_pushr and
+ jit_popr.
+ * lightning/ppc/core.h: Fix implementation of jit_pushr and jit_popr to
+ work (more or less) across function calls.
+
+ * tests/push-pop.c, tests/push-pop.ok: New test.
+ * tests/Makefile.am: Run it.
+
+2006-11-20 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/asm-common.h: Make 64-bit safe.
+ * lightning/i386/funcs.h: Make 64-bit safe.
+
+ * lightning/i386/asm-64.h: More merge from mzscheme.
+ * lightning/i386/asm-i386.h: More merge from mzscheme.
+ * lightning/i386/core-32.h: More merge from mzscheme.
+ * lightning/i386/core-64.h: More merge from mzscheme.
+ * lightning/i386/core-i386.h: More merge from mzscheme.
+
+ * tests/rpnfp.c, tests/testfp.c, tests/funcfp.c: Skip if no
+ floating-point support.
+
+2006-11-04 Paolo Bonzini <bonzini@gnu.org>
+
+ * tests/rpn.c: Remove pushr/popr.
+
+2006-11-04 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/ppc/core.h: Implement jit_allocai, define JIT_FP to be R1.
+ * lightning/ppc/funcs.h: Store frame size into _jitl. Store R1 before
+ the STMW, so that the offset is unchanged when we patch the STMW.
+ * lightning/i386/core.h: Define JIT_FP to be EBP.
+ * lightning/i386/core-32.h: Implement jit_allocai, put LEAVE in the
+ epilog if jit_allocai was used.
+ * lightning/i386/core-64.h: Implement jit_allocai, put LEAVE in the
+ epilog if jit_allocai was used.
+
+2006-11-04 Ludovic Courtes <ludo@chbouib.org>
+
+ * lightning/sparc/core.h: Implement jit_allocai.
+ * tests/allocai.c: New.
+ * tests/Makefile.am: Point to new tests.
+
+2006-11-03 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/ppc/core.h: Fix jit_bms using BNE rather than BGT.
+ "AND." does signed comparisons.
+
+2006-10-31 Paolo Bonzini <bonzini@gnu.org>
+
+ * doc/porting.texi: Rename JIT_FP to JIT_AP.
+ * lightning/core-common.h: Likewise.
+ * lightning/i386/core-i386.h: Likewise.
+ * lightning/fp-common.h: Provide default versions of jit_getarg_[fd].
+ * lightning/i386/fp-32.h: Don't provide jit_getarg_[fd].
+ * lightning/ppc/fp.h: Likewise.
+
+2006-10-31 Ludovic Courtes <ludo@chbouib.org>
+
+ * doc/using.texi (The instruction set): Clarified the use of `JIT_RET' and
+ documented `jit_retval'.
+ * tests/ret.c (generate_function_proxy): After `jit_finish', use
+ `jit_retval_i' to move FUNC's return value into the correct register.
+
+2006-10-31 Paolo Bonzini <bonzini@gnu.org>
+ Ludovic Courtes <ludo@chbouib.org>
+
+ * tests/divi.c, tests/divi.ok, tests/movi.c, tests/movi.ok: New.
+ * tests/ldxi.c: Ensure large pointer is generated.
+ * tests/Makefile.am: Point to new tests.
+ * lightning.h: Include funcs-common.h before funcs.h.
+ * lightning/sparc/core.h: Fix bugs in modi/divi.
+
+2006-10-30 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/Makefile.am: Use "ln -sf".
+ * lightning/core-common.h: Define jit_negr_l if necessary.
+
+2006-10-30 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/asm.h (MOVS*, MOVZ*): Use correct _r[124] macros.
+
+2006-10-29 Paolo Bonzini <bonzini@gnu.org>
+
+ * configure.ac: Use lightning.m4 macros.
+ * lightning.m4: Refactor to use common code in configure.ac. Move...
+ * build-aux/lightning.m4: ... here.
+ * lightningize.in: Support suffixes.
+ * opcode/disass.in: Adapt to changes in configure.ac.
+
+ * lightning/ppc/funcs.h: Use __APPLE__ instead of _CALL_DARWIN.
+ * lightning/i386/core-32.h: Likewise.
+
+2006-10-26 Paolo Bonzini <bonzini@gnu.org>
+
+ * configure.ac: Fix compilation test.
+ * lightning/Makefile.am: Symlink LIGHTNING_TARGET_FILES in
+ non-distribution mode.
+ * lightning/i386/Makefile.frag: Use LIGHTNING_TARGET_FILES.
+
+2006-10-26 Paolo Bonzini <bonzini@gnu.org>
+
+ * configure.ac: Subst cpu.
+ * lightning/core-common.h: Make tests pass on i386.
+ * lightning/i386/asm-32.h: Make tests pass on i386.
+ * lightning/i386/asm-64.h: Make tests pass on i386.
+ * lightning/i386/asm-i386.h: Make tests pass on i386.
+ * lightning/i386/core-32.h: Make tests pass on i386.
+ * lightning/i386/core-64.h: Make tests pass on i386.
+ * lightning/i386/core-i386.h: Make tests pass on i386.
+ * tests/Makefile.am: Include files from cpu directory.
+
+2006-10-26 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/asm.h: Move to asm-i386.h
+ * lightning/i386/asm-32.h: New, from Matthew Flatt.
+ * lightning/i386/asm-64.h: New, from Matthew Flatt.
+ * lightning/i386/core.h: Move to core-i386.h
+ * lightning/i386/core-32.h: New, from Matthew Flatt.
+ * lightning/i386/core-64.h: New, from Matthew Flatt.
+ * lightning/i386/fp.h: Move to fp-32.h
+ * lightning/i386/fp-64.h: New, dummy.
+ * lightning/i386/Makefile.frag: New.
+ * lightning/Makefile.am: Support per-target Makefile fragments.
+ * configure.ac: Support per-target Makefile fragments and CPU suffixes.
+
+2006-10-16 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/i386.h (jit_flush_code): Fix syntax error. :-(
+
+2006-07-06 Paolo Bonzini <bonzini@gnu.org>
+ Ludovic Courtes <ludovic.courtes@laas.fr>
+
+ * doc/using.texi: Clarify "Using autoconf" section
+ and rename it to "Bundling lightning"
+ * lightning.m4: Work also if lightning is not bundled.
+
+2006-07-06 Paolo Bonzini <bonzini@gnu.org>
+ Ludovic Courtes <ludovic.courtes@laas.fr>
+
+ * lightning/ppc/core.h (_jit_mod): Replace with...
+ (_jit_mod_big, _jit_mod_small): ... these.
+ (jit_modi_i, jit_modi_ui): Rewrite.
+ * tests/modi.c, tests/modi.ok: New tests.
+
+2006-05-18 Matthew Flatt <mflatt@cs.utah.edu>
+
+ * lightning/i386/asm.h: Fix test for extending the mprotect area
+ towards lower addresses.
+
+2006-05-16 Bruno Haible <bruno@clisp.org>
+
+ * lightning/asm-common.h: Don't use __func__ nor __FUNCTION__ if
+ not compiling with GNU C.
+
+2006-02-16 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/ppc/core.h: Fix jit_ldxi_* with big displacement.
+
+2006-01-23 Paolo Bonzini <bonzini@gnu.org>
+
+ * configure.ac: Fix comments in config.h.in.
+
+2005-11-25 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/sparc/fp.h: Fix header comment.
+ * lightning/ppc/fp.h: Fix header comment.
+
+2005-04-27 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/asm.h (JCm, JCSm, JNCm, JNCSm): New.
+
+2004-11-26 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/ppc/funcs.h (_jit_epilog): Remove unused variable.
+
+2004-11-13 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/funcs.h [__linux__]: Include sys/mman.h.
+
+2004-11-09 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/sparc/fp.h: Fix fp-to-integer conversions.
+ * lightning/ppc/testfp.c: Test fp-to-integer conversions
+ of integer numbers.
+ * lightning/ppc/testfp.ok: Adjust for the above.
+
+2004-11-08 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/ppc/testfp.c: Always flush code before
+ testing it.
+
+2004-11-08 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/ppc/fp.h: Do not clobber f31.
+
+2004-11-08 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning.h: New name of...
+ * lightning-inst.h: ... this file.
+ * lightning.h.in: Removed.
+
+ * opcodes/disass.c: Include config.h.
+ * tests/add.c: Include config.h.
+ * tests/bp.c: Include config.h.
+ * tests/fib.c: Include config.h.
+ * tests/fibdelay.c: Include config.h.
+ * tests/fibit.c: Include config.h.
+ * tests/funcfp.c: Include config.h.
+ * tests/incr.c: Include config.h.
+ * tests/printf.c: Include config.h.
+ * tests/printf2.c: Include config.h.
+ * tests/rpn.c: Include config.h.
+ * tests/rpnfp.c: Include config.h.
+ * tests/testfp.c: Include config.h.
+
+2004-10-12 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/fp.h: Fix bugs in conditional branches.
+
+2004-10-10 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/funcs.h: Fix pasto in jit_flush_code.
+
+2004-10-08 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/ppc/fp.h: Optimized conditional branches.
+
+2004-09-20 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/ppc/asm.h: Fix more typos.
+
+2004-09-20 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/ppc/asm.h: Fix typos, replace `26' with JIT_AUX.
+
+2004-09-20 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/ppc/fp.h: Added conditional branches.
+
+2004-09-18 Laurent Michel <ldm@thorgal.homelinux.org>
+
+ * lightning/ppc/fp.h (jit_unler_d, jit_unltr_d, jit_unger_d,
+ jit_ungtr_d, jit_ltgt_d, jit_uneq_d): Implemented missing tests
+ to fully support testfp.
+ (jit_floorr_d_i, jit_ceilr_d_i, jit_roundr_d_i, jit_truncr_d_i):
+ New macros.
+ * lightning/ppc/asm.h: Added missing opcodes FCTIWZ and MTFSFI.
+ * lightning/ppc/funcs.h (_jit_prolog): Fixed minor mistake in
+ the initialization of _jitl.nextarg_geti, relying on the
+ JIT_AUX macro as well to get the register offset.
+
+2004-09-07 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/ppc/funcs.h: Fix typo.
+
+2004-09-06 Paolo Bonzini <bonzini@gnu.org>
+
+ * tests/funcfp.c: Use %g. Remove C99 variable declarations.
+ * tests/testfp.c: Don't use __builtin_nan.
+
+ * lightning/ppc/core.h: Add three V registers.
+ * lightning/ppc/funcs.h: Adjust.
+
+ * lightning/sparc/core.h: Some fixes related to FP argument passing.
+ Move R0 to %g2, use %o7 for JIT_BIG2.
+ * lightning/sparc/fp.h: Some fixes related to FP argument passing.
+
+2004-09-02 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/sparc/core.h: Add another V register,
+ move R0 to %o7.
+
+2004-07-15 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/funcs.h: Implement jit_flush_code,
+ in order to support Fedora's exec-shield.
+
+2004-07-14 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/core-common.h: Add more jit_extr_*_* macros.
+ * lightning/doc/using.texi: Be clearer about the order
+ of arguments in jit_extr_*_*.
+ * lightning/doc/porting.texi: Add more jit_extr_*_* macros.
+ * lightning/i386/fp.h: Fix typo in jit_extr_i_d.
+
+2004-07-14 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/ppc/funcs.h: Adjust offset of LR into
+ stack frame if running under the Darwin ABI.
+
+2004-07-13 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/fp.h: Rename jit_exti_d to jit_extr_i_d.
+
+2004-07-13 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/ppc/core.h: Fix thinko.
+
+ * lightning/i386/core.h: Fix jit_lti_ui.
+ * lightning/core-common.h: Add missing macros.
+
+ * lightning/ppc/fp.h: Rename jit_neg_* to jit_negr_*.
+ * lightning/i386/fp.h: Rename jit_neg_* to jit_negr_*.
+ * lightning/sparc/fp.h: Rename jit_neg_* to jit_negr_*.
+ * lightning/fp-common.h: Rename jit_neg_* to jit_negr_*.
+ * doc/porting.texi: Add undocumented macros.
+
+2004-07-12 Paolo Bonzini <bonzini@gnu.org>
+
+ * doc/porting.texi: Add missing macros.
+
+2004-07-12 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/ppc/funcs.h: Don't generate trampolines.
+ Separate prolog and epilog generation.
+ * lightning/ppc/core.h: Generate epilog explicitly.
+ Don't reserve r31 anymore.
+ * lightning/core-common.h: Remove call to jit_setup_code.
+
+2004-07-09 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/lightning.h.in: Avoid preprocessor warnings.
+ * lightning/lightning-inst.h: Likewise.
+
+ * lightning/i386/core.h: Define JIT_R, JIT_R_NUM, JIT_V,
+ JIT_V_NUM.
+ * lightning/ppc/core.h: Likewise.
+ * lightning/sparc/core.h: Likewise.
+ * lightning/i386/fp.h: Define JIT_FPR, JIT_FPR_NUM.
+ * lightning/ppc/fp.h: Likewise.
+ * lightning/sparc/fp.h: Likewise.
+ * lightning/core-common.h: Define fixed register names.
+ * lightning/fp-common.h: Likewise for FP regs.
+
+2004-07-09 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/ppc/funcs.h: Fix location where return address
+ is stored.
+ * lightning/i386/asm.h: Add a trailing _ to opcodes without
+ any parameter.
+ * lightning/i386/core.h: Adjust for the above.
+
+2004-04-15 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/fp.h: Change "and" to "_and"
+ to satisfy C++ compilers.
+
+2004-04-14 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/sparc/fp.h: Use memcpy to implement jit_movi.
+ * lightning/ppc/fp.h: Use memcpy to implement jit_movi.
+ Move floating-point opcodes...
+ * lightning/ppc/asm.h: ... here.
+
+2004-04-14 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/core-common.h: Add jit_finishr.
+ * lightning/ppc/core.h: Add jit_callr and jit_finishr.
+ * lightning/i386/core.h: Add jit_callr.
+ * lightning/sparc/core.h: Add jit_callr. Fix typo.
+
+2004-04-14 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core.h: Fix pasto in jit_b*_ui.
+
+2004-03-30 Laurent Michel
+
+ * lightning/ppc: Implement PowerPC floating point
+ (ChangeLog entry missing).
+
+2004-03-12 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/fp-common.h: Load/store macros are not the
+ same for floats and doubles anywhere, but jit_retval may be.
+ * lightning/i386/asm.h: Fix = mistaken for == in ESCrri.
+ * lightning/i386/core.h: Fix typo in jit_prepare_[fd].
+ * lightning/i386/fp.h: Rewritten.
+ * tests/testfp.c: Add tests for unordered comparisons.
+ * tests/testfp.ok: Add results.
+
+2004-03-15 Paolo Bonzini <bonzini@gnu.org>
+
+ Merge changes from Laurent Michel.
+
+ * lightning/asm-common.h: Add _jit_I_noinc.
+ * lightning/core-common.h: Support jit_init,
+ jit_setup_code, jit_patch_at. Return patchable IP from
+ jit_movi_p.
+ * lightning/funcs-common.h: Provide defaults
+ for jit_setup_code, jit_start_pfx, jit_end_pfx
+ * lightning/i386/core.h: Add jit_patch_at, jit_patch_movi.
+ * lightning/ppc/core.h: Likewise.
+ * lightning/sparc/core.h: Likewise.
+ * lightning/ppc/asm.h: Fix generation of branch destination
+ displacements in _FB and _BB
+ * lightning/ppc/core.h: Generate trampolines in the user
+ area.
+ * lightning/ppc/funcs.h: Add a few casts.
+ * tests/bc.c: New testcase.
+
+ * lightning/i386/asm.h: Wrap into #ifndef LIGHTNING_DEBUG.
+ * lightning/ppc/asm.h: Wrap into #ifndef LIGHTNING_DEBUG.
+ * lightning/sparc/asm.h: Wrap into #ifndef LIGHTNING_DEBUG.
+
+
+2004-03-09 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/sparc/fp.h: Rewrite. Move macros for
+ FP code generation...
+ * lightning/sparc/asm.h: ... here.
+ * lightning/sparc/core.h: Rename jit_prepare to
+ jit_prepare_i, jit_retval to jit_retval_i.
+ * lightning/ppc/core.h: Rename jit_prepare to
+ jit_prepare_i, jit_retval to jit_retval_i.
+ * lightning/i386/core.h: Rename jit_prepare to
+ jit_prepare_i, jit_retval to jit_retval_i.
+ * lightning/core-common.h: Provide backwards
+ compatible synonyms for the above.
+ * lightning/fp-common.h: Rewrite.
+ * lightning-inst.h: Include fp unconditionally.
+ * lightning.h.in: Include fp unconditionally.
+ * tests/Makefile.am: Enable fp tests.
+ * tests/fib.c: Use jit_retval_i.
+ * tests/fibit.c: Cast codeBuffer to char *.
+ * tests/funcfp.c: Use new fp macros.
+ * tests/printf.c: Use jit_retval_i.
+ * tests/rpnfp.c: Use new fp macros.
+ * tests/testfp.c: Use new fp macros.
+
+2004-03-02 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core.h: generate correct code when
+ doing lt/le/ge/etc. on ESI and EDI. Use MOVZX/MOVSX
+ where possible.
+ * lightning/i386/asm.h: Add macros for MOVZX/MOVSX.
+ Move macros for x87 here, and add many of them.
+ * lightning/i386/fp.h: Use new macros for x87.
+
+2004-02-06 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core.h: avoid generating MOV reg, reg.
+ * lightning/sparc/core.h: fix several bugs.
+ * lightning/ppc/core.h: fix several bugs.
+ * tests/rpn.c: rewritten.
+
+2004-01-08 Paolo Bonzini <bonzini@gnu.org>
+
+ * tests/rpnfp.c: new example, suggested by Basile
+ Starynkevitch.
+ * tests/rpnfp.ok: new example.
+
+2003-12-12 Paolo Bonzini <bonzini@gnu.org>
+
+ * tests/add.c: new test, suggested by Steve Dekorte.
+ * tests/add.c: new test.
+
+2003-11-14 Paolo Bonzini <bonzini@gnu.org>
+ John Redford <eirenik@hotmail.com>
+
+ * lightning/asm-common.h: change the 'pc' field of _jit to
+ be a union of various data types, because ISO C99 doesn't
+ permit using ++ on a = cast. Change the incremented casts of
+ _jit.pc to be _jit.x.uc_pc, _jit.x.us_pc, etc.
+ * all files: change all non-cast instances of _jit.pc to be
+ _jit.x.pc.
+ * lightning/i386/core.h: remove casts from jit_might.
+
+2003-05-25 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core.h: use JITSORRY in jit_replace
+ * lightning/asm-common.h: define JITSORRY
+
+2003-05-14 Paolo Bonzini <bonzini@gnu.org>
+
+ * lightning/i386/core.h: fix missing comma in several
+ load/store macros.
+ * lightning/core-common.h: fix long/unsigned long/pointer
+ jit_pushr/jit_popr.
+ * lightning/ppc/funcs.h: correctly align stack pointer
+
+No changelogs for the assemblers (lightning directory) until 1.0
+
+2003-03-27 Paolo Bonzini <bonzini@gnu.org>
+
+ * tests/printf2.c: new test
+
+2001-05-03 Paolo Bonzini <bonzini@gnu.org>
+
+ * tests/printf.c: made the message platform independent
+
+2001-01-19 Paolo Bonzini <bonzini@gnu.org>
+
+ * configure.in: support cross-assembling
+
+ * disass/bfd.h, disass/dis-asm.h, disass/dis-buf.c,
+ disass/i386-dis.c, disass/i386.h, disass/ppc-dis.c,
+ disass/ppc.h, disass/ppc-opc.c, disass/sparc-dis.c,
+ disass/sparc.h, disass/sparc-opc.c: new files, from GDB
+
+ * disass/disass.c, disass/Makefile.am: new files
+
+ * tests/fib.c, tests/fibit.c, tests/incr.c, tests/printf.c,
+ tests/rpn.c, tests/testfp.c, tests/Makefile.am: support
+ disassembling
diff --git a/deps/lightening/NEWS b/deps/lightening/NEWS
new file mode 100644
index 0000000..f56dd79
--- /dev/null
+++ b/deps/lightening/NEWS
@@ -0,0 +1,199 @@
+NEWS FROM 1.99 TO 1.99a
+
+o Lightning now builds and pass all test cases on AIX 7.1 powerpc,
+ HP-UX 11iv2 hppa, HP-UX 11iv3 ia64, Solaris 10 Sparc, Solaris 11
+ x86_64, and Irix 6.5.30 mips (using n32 abi).
+
+NEWS FROM VERSION 1.3 TO 1.99
+
+o The 1.99 version is a major lightning redesign and an
+ alpha version.
+
+o Unless for some special power users usage, the major
+ difference in the rework is that now function calls push
+ arguments from left to right, what is both, more natural for
+ programers, and also more natural to implement for architectures
+ that pass arguments in registers and have alignment constraints,
+ usually for 64 bit double arguments.
+
+o Add mips backend, implementing the o32 abi.
+
+o Added arm backend implementing all combinations of software float,
+ vfp, neon, arm and thumb instruction sets, softfp and hardp abis,
+ armv5, armv6, and armv7.
+
+o Added sse2+ code generation for the 32 bit x86 backend.
+
+o Added sse3 and sse4.x optional code generation for the 64 bit
+ x86 backend, code generation based on detected cpu.
+
+o Reworked and added full lightning instruction set to ppc 32;
+ tested on ppc64 hardware and Darwin 32 operating system.
+
+o Added ppc64 backend, built and tested on Fedora ppc.
+
+o Reworked the sparc backend, built and tested on Debian sparc.
+
+o Added an ia64 backend, built and tested on Debian ia64.
+
+o Added an hppa backend, built and tested on Debian hppa.
+
+---
+
+NEWS FROM VERSION 1.2 TO 1.3
+
+o Initial support for x86-64 back-end (mostly untested).
+
+o lightning is more strict on casts from integer to pointer.
+ Be sure to use the _p variants when your immediates are
+ of pointer type. This was done to ease 64-bit cleanliness
+ tests.
+
+o Many bug fixes.
+
+o JIT_FPRET is used as JIT_RET to move return values.
+ jit_retval_[fd] is used to retrieve return values.
+
+o jit_pushr/jit_popr are deprecated, you need to #define
+ JIT_NEED_PUSH_POP prior to including lightning.h if you
+ want to use them.
+
+o Support for stack-allocated variables. Because of this,
+ backends defining JIT_FP should now rename it to JIT_AP.
+ JIT_FP is now a user-visible register used in ldxi/ldxr
+ to access stack-allocated variables.
+
+
+---
+
+NEWS FROM VERSION 1.1.2 TO 1.2
+
+o Floating-point interface rewritten, uses a register file
+ architecture rather than a stack.
+
+o Many bug fixes.
+
+o jit_prepare and jit_retval are now jit_prepare_i and
+ jit_retval_i.
+
+o Support for Fedora Core 1's exec-shield feature.
+
+o PPC supports both SysV and Darwin ABIs.
+
+o More (and more complete) examples provided.
+
+---
+
+NEWS FROM VERSION 1.1.1 TO 1.1.2
+
+o This release fixes the bugs in PowerPC cache flushing and in
+ SPARC testing.
+
+---
+
+NEWS FROM VERSION 1.1 TO 1.1.1
+
+o Merge changes from Debian
+
+This version was released to have a distributable version of lightning
+after the recent crack of the GNU FTP machines. It does not fix
+outstanding bugs; I apologize for the inconvenience.
+
+---
+
+NEWS FROM VERSION 1.0 TO 1.1
+
+o Several bug fixes
+
+o improved infrastructure for embedding GNU lightning (lightningize
+ script)
+
+---
+
+NEWS FROM VERSION 0.99 TO 1.0
+
+o SPARC backend tested on GNU Smalltalk
+
+
+---
+
+NEWS FROM VERSION 0.98 TO 0.99
+
+o Added floating point function support (thanks to Laurent Michel);
+ unfortunately this broke even more the PPC and SPARC floating point
+ stuff :-(
+
+---
+
+NEWS FROM VERSION 0.97 to 0.98
+
+o PPC backend tested on GNU Smalltalk
+
+o switched to autoconf 2.50
+
+o new (much faster) PPC cache flushing code by John McIntosh
+
+---
+
+NEWS FROM VERSION 0.96 to 0.97
+
+o support for cross-assembling and for disassembling the code that the tests
+ generate
+
+o PPC microtests pass (tested directly by me), SPARC was said to work
+
+---
+
+NEWS FROM VERSION 0.95 to 0.96
+
+o fixed implementation of delay slots to be coherent with the manual
+
+---
+
+NEWS FROM VERSION 0.94 to 0.95
+
+o adc/sbc replaced with addc/addx/subc/subx to allow for more optimization
+ (inspired by the PPC instruction set).
+
+o A few fixes and much less warnings from the compiler
+
+o Automake-ized everything
+
+o i386 backend generates smaller code for bms/bmc/or/xor by using byte
+ or word versions if possible
+
+o Moved backends to separate directories
+
+---
+
+NEWS FROM VERSION 0.93 to 0.94
+
+o Manual builds as DVI file.
+
+---
+
+NEWS FROM VERSION 0.92 to 0.93
+
+o Floating-point front-end (began supporting PPC & SPARC).
+
+---
+
+NEWS FROM VERSION 0.91 to 0.92
+
+o Floating-point front-end (only x86 supported).
+
+---
+
+NEWS FROM VERSION 0.9 to 0.91
+
+o Carrying supported in addition/subtraction.
+
+o insn type changed to jit_insn.
+
+o Misc bug fixes.
+
+o Reentrancy supported.
+
+o SPARC run-time assembler rewritten.
+
+o The run-time assembler can be disabled for debugging purposes.
diff --git a/deps/lightening/README.md b/deps/lightening/README.md
new file mode 100644
index 0000000..515c3ee
--- /dev/null
+++ b/deps/lightening/README.md
@@ -0,0 +1,57 @@
+# Lightening
+
+Lightening is a just-in-time code generation library derived from GNU
+Lightning, adapted to the purposes of the GNU Guile project.
+
+## Use
+
+```
+gcc -flto -O2 -g -o lightening.o -c lightening/lightening.c
+gcc -flto -O2 -g -o my-program lightening.o my-program.c
+```
+
+See the GNU Lightning manual for more on how to program against
+Lightening (much of the details are the same).
+
+## What's the difference with GNU Lightning?
+
+This project is called Lightening because it's lighter-weight than GNU
+Lightning. When you go to generate code at run-time with GNU Lightning,
+what happens is that you build up a graph of nodes which GNU Lightning
+"optimizes" before finally emitting machine code. These optimizations
+can improve register allocation around call sites. However they are not
+helpful from a Guile perspective, as they get in the way of register
+allocation that we need to do; and they actually prevent access to all
+the registers that we would like to have.
+
+Guile needs a simple, light-weight code generation library. The GNU
+Lightning architecture-specific backends provide the bulk of this
+functionality, and Lightening wraps it all in a lightweight API.
+
+## Supported targets
+
+Lightening can generate code for the x86-64, i686, ARMv7, and AArch64
+architectures. It supports the calling conventions of MS Windows,
+GNU/Linux, and Mac OS.
+
+On i686, Lightening requires SSE support. On ARMv7, we require hardware
+floating-point support (the VFP instructions), as well as the UDIV/SDIV
+instructions.
+
+Lightening is automatically tested using GitLab's continuous integration
+for under the supported architectures, for GNU/Linux; for a list of
+recent jobs, see [the CI
+page](https://gitlab.com/wingo/lightening/-/jobs).
+
+## Future targets
+
+Lightening has some inherited code from GNU Lightning for MIPS, PPC64,
+and s390. Patches to adapt this code to the Lightening code structure
+are quite welcome.
+
+RISC-V support would be fun too.
+
+## Status
+
+Lightening is used in GNU Guile since version 2.9.2 and seems to work
+well.
diff --git a/deps/lightening/THANKS b/deps/lightening/THANKS
new file mode 100644
index 0000000..42bbfc6
--- /dev/null
+++ b/deps/lightening/THANKS
@@ -0,0 +1,19 @@
+Thanks to all the following people for their help in
+improving GNU lightning:
+
+Paolo Bonzini <bonzini@gnu.org>
+Eli Barzilay <eli@barzilay.org>
+Ludovic Courtes <ludo@chbouib.org>
+Matthew Flatt <mflatt@cs.utah.edu>
+Laurent Michel <ldm@thorgal.homelinux.org>
+Paulo Cesar Pereira de Andrade <pcpa@gnu.org>
+Mike Spivey <mike@comlab.ox.ac.uk>
+Basile Starynkevitch <basile@starynkevitch.net>
+Sam Steingold <sds@gnu.org>
+Jens Troeger <savage@light-speed.de>
+Tom Tromey <tromey@redhat.com>
+Trent Nelson <trent@snakebite.org>
+Vitaly Magerya <vmagerya@gmail.com>
+Brandon Invergo <brandon@gnu.org>
+Holger Hans Peter Freyther <holger@moiji-mobile.com>
+Jon Arintok <jon.arintok@gmail.com>
diff --git a/deps/lightening/lightening.am b/deps/lightening/lightening.am
new file mode 100644
index 0000000..2c9089e
--- /dev/null
+++ b/deps/lightening/lightening.am
@@ -0,0 +1,58 @@
+# Copyright 2019 Free Software Foundation, Inc.
+#
+# This file is part of Lightening.
+#
+# Lightening is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# Lightening is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+
+lightening = $(srcdir)/lightening
+
+lightening_c_files = \
+ $(lightening)/lightening/lightening.c
+
+lightening_extra_files = \
+ $(lightening)/AUTHORS \
+ $(lightening)/ChangeLog \
+ $(lightening)/ChangeLog.lightning \
+ $(lightening)/COPYING \
+ $(lightening)/COPYING.DOC \
+ $(lightening)/COPYING.LESSER \
+ $(lightening)/lightening.am \
+ $(lightening)/lightning.texi \
+ $(lightening)/NEWS \
+ $(lightening)/README.md \
+ $(lightening)/THANKS \
+ \
+ $(lightening)/lightening.h \
+ \
+ $(lightening)/lightening/endian.h \
+ \
+ $(lightening)/lightening/aarch64.h \
+ $(lightening)/lightening/arm.h \
+ $(lightening)/lightening/mips.h \
+ $(lightening)/lightening/ppc.h \
+ $(lightening)/lightening/x86.h \
+ \
+ $(lightening)/lightening/aarch64.c \
+ $(lightening)/lightening/aarch64-cpu.c \
+ $(lightening)/lightening/aarch64-fpu.c \
+ $(lightening)/lightening/arm.c \
+ $(lightening)/lightening/arm-cpu.c \
+ $(lightening)/lightening/arm-vfp.c \
+ $(lightening)/lightening/mips.c \
+ $(lightening)/lightening/mips-cpu.c \
+ $(lightening)/lightening/mips-fpu.c \
+ $(lightening)/lightening/ppc.c \
+ $(lightening)/lightening/ppc-cpu.c \
+ $(lightening)/lightening/ppc-fpu.c \
+ $(lightening)/lightening/x86.c \
+ $(lightening)/lightening/x86-cpu.c \
+ $(lightening)/lightening/x86-sse.c
diff --git a/deps/lightening/lightening.h b/deps/lightening/lightening.h
new file mode 100644
index 0000000..309e350
--- /dev/null
+++ b/deps/lightening/lightening.h
@@ -0,0 +1,715 @@
+/*
+ * Copyright (C) 2012-2020 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ * Andy Wingo
+ */
+
+#ifndef _jit_h
+#define _jit_h
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stddef.h>
+
+#include "lightening/endian.h"
+
+CHOOSE_32_64(typedef int32_t jit_word_t,
+ typedef int64_t jit_word_t);
+CHOOSE_32_64(typedef uint32_t jit_uword_t,
+ typedef uint64_t jit_uword_t);
+typedef float jit_float32_t;
+typedef double jit_float64_t;
+typedef void* jit_pointer_t;
+typedef int jit_bool_t;
+
+typedef void* jit_addr_t;
+typedef ptrdiff_t jit_off_t;
+typedef intptr_t jit_imm_t;
+typedef uintptr_t jit_uimm_t;
+
+typedef struct jit_gpr { uint8_t regno; } jit_gpr_t;
+typedef struct jit_fpr { uint8_t regno; } jit_fpr_t;
+
+// Precondition: regno between 0 and 63, inclusive.
+#define JIT_GPR(regno) ((jit_gpr_t) { regno })
+#define JIT_FPR(regno) ((jit_fpr_t) { regno })
+
+static inline uint8_t jit_gpr_regno (jit_gpr_t reg) { return reg.regno; }
+static inline uint8_t jit_fpr_regno (jit_fpr_t reg) { return reg.regno; }
+
+static inline jit_bool_t
+jit_same_gprs (jit_gpr_t a, jit_gpr_t b)
+{
+ return jit_gpr_regno (a) == jit_gpr_regno (b);
+}
+
+static inline jit_bool_t
+jit_same_fprs (jit_fpr_t a, jit_fpr_t b)
+{
+ return jit_fpr_regno (a) == jit_fpr_regno (b);
+}
+
+#if defined(__i386__) || defined(__x86_64__)
+# include "lightening/x86.h"
+#elif defined(__mips__)
+# include "lightening/mips.h"
+#elif defined(__arm__)
+# include "lightening/arm.h"
+#elif defined(__ppc__) || defined(__powerpc__)
+# include "lightening/ppc.h"
+#elif defined(__aarch64__)
+# include "lightening/aarch64.h"
+#elif defined(__s390__) || defined(__s390x__)
+# include "lightening/s390.h"
+#endif
+
+#ifndef JIT_EXTRA_SPACE
+#define JIT_EXTRA_SPACE 0
+#endif
+
+#ifndef JIT_JMP_MAX_SIZE
+#define JIT_JMP_MAX_SIZE sizeof(uint32_t)
+#endif
+
+#ifndef JIT_LITERAL_MAX_SIZE
+#define JIT_LITERAL_MAX_SIZE (sizeof(uintptr_t) * 2)
+#endif
+
+#ifndef JIT_INST_MAX_SIZE
+#define JIT_INST_MAX_SIZE sizeof(uint32_t)
+#endif
+
+#ifndef JIT_CALL_STACK_ALIGN_WORD
+#define JIT_CALL_STACK_ALIGN_WORD 1
+#endif
+
+enum jit_reloc_kind
+{
+ JIT_RELOC_ABSOLUTE,
+ JIT_RELOC_REL8,
+ JIT_RELOC_REL16,
+ JIT_RELOC_REL32,
+ JIT_RELOC_REL64,
+#ifdef JIT_NEEDS_LITERAL_POOL
+ JIT_RELOC_JMP_WITH_VENEER,
+ JIT_RELOC_JCC_WITH_VENEER,
+ JIT_RELOC_LOAD_FROM_POOL,
+#endif
+#ifdef JIT_USE_IMMEDIATE_RELOC
+ JIT_RELOC_IMMEDIATE,
+#endif
+ JIT_RELOC_MASK = 15,
+ JIT_RELOC_FLAG_0 = 16,
+};
+
+typedef struct jit_reloc
+{
+ uint8_t kind;
+ uint8_t inst_start_offset;
+ uint8_t pc_base_offset;
+ uint8_t rsh;
+ uint32_t offset;
+} jit_reloc_t;
+
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+# define JIT_API extern __attribute__ ((__visibility__("hidden")))
+#else
+# define JIT_API extern
+#endif
+
+typedef struct jit_state jit_state_t;
+
+enum jit_operand_abi
+{
+ JIT_OPERAND_ABI_UINT8,
+ JIT_OPERAND_ABI_INT8,
+ JIT_OPERAND_ABI_UINT16,
+ JIT_OPERAND_ABI_INT16,
+ JIT_OPERAND_ABI_UINT32,
+ JIT_OPERAND_ABI_INT32,
+ JIT_OPERAND_ABI_UINT64,
+ JIT_OPERAND_ABI_INT64,
+ JIT_OPERAND_ABI_POINTER,
+ JIT_OPERAND_ABI_FLOAT,
+ JIT_OPERAND_ABI_DOUBLE,
+ JIT_OPERAND_ABI_WORD = CHOOSE_32_64(JIT_OPERAND_ABI_INT32,
+ JIT_OPERAND_ABI_INT64)
+};
+
+enum jit_operand_kind
+{
+ JIT_OPERAND_KIND_IMM,
+ JIT_OPERAND_KIND_GPR,
+ JIT_OPERAND_KIND_FPR,
+ JIT_OPERAND_KIND_MEM,
+#ifdef JIT_PASS_DOUBLES_IN_GPR_PAIRS
+ JIT_OPERAND_KIND_GPR_PAIR,
+#endif
+};
+
+typedef struct jit_operand
+{
+ enum jit_operand_abi abi;
+ enum jit_operand_kind kind;
+ union
+ {
+ intptr_t imm;
+ struct { jit_gpr_t gpr; ptrdiff_t addend; } gpr;
+ struct { jit_fpr_t fpr;
+#if JIT_PASS_FLOATS_IN_GPRS
+ jit_gpr_t gpr;
+#endif
+ } fpr;
+ struct { jit_gpr_t base; ptrdiff_t offset; ptrdiff_t addend; } mem;
+#if JIT_PASS_DOUBLES_IN_GPR_PAIRS
+ struct { jit_gpr_t l; jit_gpr_t h; } gpr_pair;
+#endif
+ } loc;
+} jit_operand_t;
+
+static inline jit_operand_t
+jit_operand_imm (enum jit_operand_abi abi, jit_imm_t imm)
+{
+ return (jit_operand_t){ abi, JIT_OPERAND_KIND_IMM, { .imm = imm } };
+}
+
+static inline jit_operand_t
+jit_operand_gpr_with_addend (enum jit_operand_abi abi, jit_gpr_t gpr,
+ ptrdiff_t addend)
+{
+ return (jit_operand_t){ abi, JIT_OPERAND_KIND_GPR,
+ { .gpr = { gpr, addend } } };
+}
+
+static inline jit_operand_t
+jit_operand_gpr (enum jit_operand_abi abi, jit_gpr_t gpr)
+{
+ return jit_operand_gpr_with_addend (abi, gpr, 0);
+}
+
+static inline jit_operand_t
+jit_operand_fpr (enum jit_operand_abi abi, jit_fpr_t fpr)
+{
+ return (jit_operand_t){ abi, JIT_OPERAND_KIND_FPR, { .fpr = { fpr } } };
+}
+
+static inline jit_operand_t
+jit_operand_mem_with_addend (enum jit_operand_abi abi, jit_gpr_t base,
+ ptrdiff_t offset, ptrdiff_t addend)
+{
+ return (jit_operand_t){ abi, JIT_OPERAND_KIND_MEM,
+ { .mem = { base, offset, addend } } };
+}
+
+static inline jit_operand_t
+jit_operand_mem (enum jit_operand_abi abi, jit_gpr_t base, ptrdiff_t offset)
+{
+ return jit_operand_mem_with_addend (abi, base, offset, 0);
+}
+
+#ifdef JIT_PASS_DOUBLES_IN_GPR_PAIRS
+static inline jit_operand_t
+jit_operand_gpr_pair(enum jit_operand_abi abi, jit_gpr_t l, jit_gpr_t h)
+{
+ return (jit_operand_t){abi, JIT_OPERAND_KIND_GPR_PAIR,
+ { .gpr_pair = { l, h } } };
+}
+#endif
+
+static inline jit_operand_t
+jit_operand_addi (jit_operand_t op, ptrdiff_t addend)
+{
+ switch (op.kind) {
+ case JIT_OPERAND_KIND_GPR:
+ return jit_operand_gpr_with_addend (op.abi, op.loc.gpr.gpr,
+ op.loc.gpr.addend + addend);
+ case JIT_OPERAND_KIND_MEM:
+ return jit_operand_mem_with_addend (op.abi, op.loc.mem.base,
+ op.loc.mem.offset,
+ op.loc.mem.addend + addend);
+ default:
+ abort ();
+ }
+}
+
+JIT_API jit_bool_t init_jit(void);
+
+JIT_API jit_state_t *jit_new_state(void* (*alloc_fn)(size_t),
+ void (*free_fn)(void*));
+JIT_API void jit_destroy_state(jit_state_t*);
+
+JIT_API void jit_begin(jit_state_t*, uint8_t*, size_t);
+JIT_API jit_bool_t jit_has_overflow(jit_state_t*);
+JIT_API void jit_reset(jit_state_t*);
+JIT_API void* jit_end(jit_state_t*, size_t*);
+
+JIT_API void jit_align(jit_state_t*, unsigned);
+
+JIT_API jit_pointer_t jit_address(jit_state_t*);
+typedef void (*jit_function_pointer_t)();
+JIT_API jit_function_pointer_t jit_address_to_function_pointer(jit_pointer_t);
+JIT_API void jit_patch_here(jit_state_t*, jit_reloc_t);
+JIT_API void jit_patch_there(jit_state_t*, jit_reloc_t, jit_pointer_t);
+
+JIT_API void jit_move_operands (jit_state_t *_jit, jit_operand_t *dst,
+ jit_operand_t *src, size_t argc);
+
+JIT_API size_t jit_align_stack (jit_state_t *_jit, size_t expand);
+JIT_API void jit_shrink_stack (jit_state_t *_jit, size_t diff);
+
+JIT_API size_t jit_enter_jit_abi (jit_state_t *_jit,
+ size_t v, size_t vf, size_t frame_size);
+JIT_API void jit_leave_jit_abi (jit_state_t *_jit,
+ size_t v, size_t vf, size_t frame_size);
+
+/* Note that all functions that take jit_operand_t args[] use the args
+ as scratch space while shuffling values into position. */
+JIT_API void jit_calli(jit_state_t *, jit_pointer_t f,
+ size_t argc, jit_operand_t args[]);
+JIT_API void jit_callr(jit_state_t *, jit_gpr_t f,
+ size_t argc, jit_operand_t args[]);
+JIT_API void jit_locate_args(jit_state_t*, size_t argc, jit_operand_t args[]);
+JIT_API void jit_load_args(jit_state_t*, size_t argc, jit_operand_t dst[]);
+
+static inline void
+jit_calli_0(jit_state_t *_jit, jit_pointer_t f)
+{
+ return jit_calli(_jit, f, 0, NULL);
+}
+
+static inline void
+jit_calli_1(jit_state_t *_jit, jit_pointer_t f, jit_operand_t arg)
+{
+ jit_operand_t args[] = { arg };
+ return jit_calli(_jit, f, 1, args);
+}
+
+static inline void
+jit_calli_2(jit_state_t *_jit, jit_pointer_t f, jit_operand_t a,
+ jit_operand_t b)
+{
+ jit_operand_t args[] = { a, b };
+ return jit_calli(_jit, f, 2, args);
+}
+
+static inline void
+jit_calli_3(jit_state_t *_jit, jit_pointer_t f, jit_operand_t a,
+ jit_operand_t b, jit_operand_t c)
+{
+ jit_operand_t args[] = { a, b, c };
+ return jit_calli(_jit, f, 3, args);
+}
+
+static inline void
+jit_callr_0(jit_state_t *_jit, jit_gpr_t f)
+{
+ return jit_callr(_jit, f, 0, NULL);
+}
+
+static inline void
+jit_callr_1(jit_state_t *_jit, jit_gpr_t f, jit_operand_t arg)
+{
+ jit_operand_t args[] = { arg };
+ return jit_callr(_jit, f, 1, args);
+}
+
+static inline void
+jit_callr_2(jit_state_t *_jit, jit_gpr_t f, jit_operand_t a, jit_operand_t b)
+{
+ jit_operand_t args[] = { a, b };
+ return jit_callr(_jit, f, 2, args);
+}
+
+static inline void
+jit_callr_3(jit_state_t *_jit, jit_gpr_t f, jit_operand_t a, jit_operand_t b,
+ jit_operand_t c)
+{
+ jit_operand_t args[] = { a, b, c };
+ return jit_callr(_jit, f, 3, args);
+}
+
+static inline void
+jit_load_args_1(jit_state_t *_jit, jit_operand_t a)
+{
+ jit_operand_t args[] = { a };
+ return jit_load_args(_jit, 1, args);
+}
+
+static inline void
+jit_load_args_2(jit_state_t *_jit, jit_operand_t a, jit_operand_t b)
+{
+ jit_operand_t args[] = { a, b };
+ return jit_load_args(_jit, 2, args);
+}
+
+static inline void
+jit_load_args_3(jit_state_t *_jit, jit_operand_t a, jit_operand_t b,
+ jit_operand_t c)
+{
+ jit_operand_t args[] = { a, b, c };
+ return jit_load_args(_jit, 3, args);
+}
+
+#define JIT_PROTO_0(stem, ret) \
+ ret jit_##stem (jit_state_t* _jit)
+#define JIT_PROTO_1(stem, ret, ta) \
+ ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a)
+#define JIT_PROTO_2(stem, ret, ta, tb) \
+ ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b)
+#define JIT_PROTO_3(stem, ret, ta, tb, tc) \
+ ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, jit_##tc##_t c)
+#define JIT_PROTO_4(stem, ret, ta, tb, tc, td) \
+ ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, jit_##tc##_t c, jit_##td##_t d)
+
+#define JIT_PROTO_RFF__(stem) JIT_PROTO_2(stem, jit_reloc_t, fpr, fpr)
+#define JIT_PROTO_RGG__(stem) JIT_PROTO_2(stem, jit_reloc_t, gpr, gpr)
+#define JIT_PROTO_RG___(stem) JIT_PROTO_1(stem, jit_reloc_t, gpr)
+#define JIT_PROTO_RGi__(stem) JIT_PROTO_2(stem, jit_reloc_t, gpr, imm)
+#define JIT_PROTO_RGu__(stem) JIT_PROTO_2(stem, jit_reloc_t, gpr, uimm)
+#define JIT_PROTO_R____(stem) JIT_PROTO_0(stem, jit_reloc_t)
+#define JIT_PROTO__FFF_(stem) JIT_PROTO_3(stem, void, fpr, fpr, fpr)
+#define JIT_PROTO__FF__(stem) JIT_PROTO_2(stem, void, fpr, fpr)
+#define JIT_PROTO__FGG_(stem) JIT_PROTO_3(stem, void, fpr, gpr, gpr)
+#define JIT_PROTO__FG__(stem) JIT_PROTO_2(stem, void, fpr, gpr)
+#define JIT_PROTO__FGo_(stem) JIT_PROTO_3(stem, void, fpr, gpr, off)
+#define JIT_PROTO__F___(stem) JIT_PROTO_1(stem, void, fpr)
+#define JIT_PROTO__Fd__(stem) JIT_PROTO_2(stem, void, fpr, float64)
+#define JIT_PROTO__Ff__(stem) JIT_PROTO_2(stem, void, fpr, float32)
+#define JIT_PROTO__Fp__(stem) JIT_PROTO_2(stem, void, fpr, pointer)
+#define JIT_PROTO__GF__(stem) JIT_PROTO_2(stem, void, gpr, fpr)
+#define JIT_PROTO__GGF_(stem) JIT_PROTO_3(stem, void, gpr, gpr, fpr)
+#define JIT_PROTO__GGGG(stem) JIT_PROTO_4(stem, void, gpr, gpr, gpr, gpr)
+#define JIT_PROTO__GGG_(stem) JIT_PROTO_3(stem, void, gpr, gpr, gpr)
+#define JIT_PROTO__GGGi(stem) JIT_PROTO_4(stem, void, gpr, gpr, gpr, imm)
+#define JIT_PROTO__GGGu(stem) JIT_PROTO_4(stem, void, gpr, gpr, gpr, uimm)
+#define JIT_PROTO__GG__(stem) JIT_PROTO_2(stem, void, gpr, gpr)
+#define JIT_PROTO__GGi_(stem) JIT_PROTO_3(stem, void, gpr, gpr, imm)
+#define JIT_PROTO__GGo_(stem) JIT_PROTO_3(stem, void, gpr, gpr, off)
+#define JIT_PROTO__GGu_(stem) JIT_PROTO_3(stem, void, gpr, gpr, uimm)
+#define JIT_PROTO__G___(stem) JIT_PROTO_1(stem, void, gpr)
+#define JIT_PROTO__Gi__(stem) JIT_PROTO_2(stem, void, gpr, imm)
+#define JIT_PROTO__Gp__(stem) JIT_PROTO_2(stem, void, gpr, pointer)
+#define JIT_PROTO______(stem) JIT_PROTO_0(stem, void)
+#define JIT_PROTO__i___(stem) JIT_PROTO_1(stem, void, imm)
+#define JIT_PROTO__oGF_(stem) JIT_PROTO_3(stem, void, off, gpr, fpr)
+#define JIT_PROTO__oGG_(stem) JIT_PROTO_3(stem, void, off, gpr, gpr)
+#define JIT_PROTO__pF__(stem) JIT_PROTO_2(stem, void, pointer, fpr)
+#define JIT_PROTO__pG__(stem) JIT_PROTO_2(stem, void, pointer, gpr)
+#define JIT_PROTO__p___(stem) JIT_PROTO_1(stem, void, pointer)
+
+#define FOR_EACH_INSTRUCTION(M) \
+ M(_GGG_, addr) \
+ M(_FFF_, addr_f) \
+ M(_FFF_, addr_d) \
+ M(_GGi_, addi) \
+ M(_GGG_, addcr) \
+ M(_GGi_, addci) \
+ M(_GGG_, addxr) \
+ M(_GGi_, addxi) \
+ M(_GGG_, subr) \
+ M(_FFF_, subr_f) \
+ M(_FFF_, subr_d) \
+ M(_GGi_, subi) \
+ M(_GGG_, subcr) \
+ M(_GGi_, subci) \
+ M(_GGG_, subxr) \
+ M(_GGi_, subxi) \
+ M(_GGG_, mulr) \
+ M(_FFF_, mulr_f) \
+ M(_FFF_, mulr_d) \
+ M(_GGi_, muli) \
+ M(_GGGG, qmulr) \
+ M(_GGGi, qmuli) \
+ M(_GGGG, qmulr_u) \
+ M(_GGGu, qmuli_u) \
+ M(_GGG_, divr) \
+ M(_FFF_, divr_f) \
+ M(_FFF_, divr_d) \
+ M(_GGi_, divi) \
+ M(_GGG_, divr_u) \
+ M(_GGu_, divi_u) \
+ M(_GGGG, qdivr) \
+ M(_GGGi, qdivi) \
+ M(_GGGG, qdivr_u) \
+ M(_GGGu, qdivi_u) \
+ M(_GGG_, remr) \
+ M(_GGi_, remi) \
+ M(_GGG_, remr_u) \
+ M(_GGu_, remi_u) \
+ \
+ M(_GGG_, andr) \
+ M(_GGu_, andi) \
+ M(_GGG_, orr) \
+ M(_GGu_, ori) \
+ M(_GGG_, xorr) \
+ M(_GGu_, xori) \
+ \
+ M(_GGG_, lshr) \
+ M(_GGu_, lshi) \
+ M(_GGG_, rshr) \
+ M(_GGu_, rshi) \
+ M(_GGG_, rshr_u) \
+ M(_GGu_, rshi_u) \
+ \
+ M(_GG__, negr) \
+ M(_GG__, comr) \
+ \
+ M(_GG__, movr) \
+ M(_Gi__, movi) \
+ M(RG___, mov_addr) \
+ M(_GG__, extr_c) \
+ M(_GG__, extr_uc) \
+ M(_GG__, extr_s) \
+ M(_GG__, extr_us) \
+ WHEN_64(M(_GG__, extr_i)) \
+ WHEN_64(M(_GG__, extr_ui)) \
+ \
+ M(_GG__, bswapr_us) \
+ M(_GG__, bswapr_ui) \
+ WHEN_64(M(_GG__, bswapr_ul)) \
+ \
+ M(_GG__, ldr_c) \
+ M(_Gp__, ldi_c) \
+ M(_GG__, ldr_uc) \
+ M(_Gp__, ldi_uc) \
+ M(_GG__, ldr_s) \
+ M(_Gp__, ldi_s) \
+ M(_GG__, ldr_us) \
+ M(_Gp__, ldi_us) \
+ M(_GG__, ldr_i) \
+ M(_Gp__, ldi_i) \
+ WHEN_64(M(_GG__, ldr_ui)) \
+ WHEN_64(M(_Gp__, ldi_ui)) \
+ WHEN_64(M(_GG__, ldr_l)) \
+ WHEN_64(M(_Gp__, ldi_l)) \
+ M(_FG__, ldr_f) \
+ M(_Fp__, ldi_f) \
+ M(_FG__, ldr_d) \
+ M(_Fp__, ldi_d) \
+ \
+ M(_GGG_, ldxr_c) \
+ M(_GGo_, ldxi_c) \
+ M(_GGG_, ldxr_uc) \
+ M(_GGo_, ldxi_uc) \
+ M(_GGG_, ldxr_s) \
+ M(_GGo_, ldxi_s) \
+ M(_GGG_, ldxr_us) \
+ M(_GGo_, ldxi_us) \
+ M(_GGG_, ldxr_i) \
+ M(_GGo_, ldxi_i) \
+ WHEN_64(M(_GGG_, ldxr_ui)) \
+ WHEN_64(M(_GGo_, ldxi_ui)) \
+ WHEN_64(M(_GGG_, ldxr_l)) \
+ WHEN_64(M(_GGo_, ldxi_l)) \
+ M(_FGG_, ldxr_f) \
+ M(_FGo_, ldxi_f) \
+ M(_FGG_, ldxr_d) \
+ M(_FGo_, ldxi_d) \
+ \
+ M(_GG__, ldr_atomic) \
+ M(_GG__, str_atomic) \
+ M(_GGG_, swap_atomic) \
+ M(_GGGG, cas_atomic) \
+ \
+ M(_GG__, str_c) \
+ M(_pG__, sti_c) \
+ M(_GG__, str_s) \
+ M(_pG__, sti_s) \
+ M(_GG__, str_i) \
+ M(_pG__, sti_i) \
+ WHEN_64(M(_GG__, str_l)) \
+ WHEN_64(M(_pG__, sti_l)) \
+ M(_GF__, str_f) \
+ M(_pF__, sti_f) \
+ M(_GF__, str_d) \
+ M(_pF__, sti_d) \
+ \
+ M(_GGG_, stxr_c) \
+ M(_oGG_, stxi_c) \
+ M(_GGG_, stxr_s) \
+ M(_oGG_, stxi_s) \
+ M(_GGG_, stxr_i) \
+ M(_oGG_, stxi_i) \
+ WHEN_64(M(_GGG_, stxr_l)) \
+ WHEN_64(M(_oGG_, stxi_l)) \
+ M(_GGF_, stxr_f) \
+ M(_oGF_, stxi_f) \
+ M(_GGF_, stxr_d) \
+ M(_oGF_, stxi_d) \
+ \
+ M(RGG__, bltr) \
+ M(RFF__, bltr_f) \
+ M(RFF__, bltr_d) \
+ M(RGi__, blti) \
+ M(RGG__, bltr_u) \
+ M(RGu__, blti_u) \
+ M(RGG__, bler) \
+ M(RFF__, bler_f) \
+ M(RFF__, bler_d) \
+ M(RGi__, blei) \
+ M(RGG__, bler_u) \
+ M(RGu__, blei_u) \
+ M(RGG__, beqr) \
+ M(RFF__, beqr_f) \
+ M(RFF__, beqr_d) \
+ M(RGi__, beqi) \
+ M(RGG__, bger) \
+ M(RFF__, bger_f) \
+ M(RFF__, bger_d) \
+ M(RGi__, bgei) \
+ M(RGG__, bger_u) \
+ M(RGu__, bgei_u) \
+ M(RGG__, bgtr) \
+ M(RFF__, bgtr_f) \
+ M(RFF__, bgtr_d) \
+ M(RGi__, bgti) \
+ M(RGG__, bgtr_u) \
+ M(RGu__, bgti_u) \
+ M(RGG__, bner) \
+ M(RFF__, bner_f) \
+ M(RFF__, bner_d) \
+ M(RGi__, bnei) \
+ \
+ M(RFF__, bunltr_f) \
+ M(RFF__, bunltr_d) \
+ M(RFF__, bunler_f) \
+ M(RFF__, bunler_d) \
+ M(RFF__, buneqr_f) \
+ M(RFF__, buneqr_d) \
+ M(RFF__, bunger_f) \
+ M(RFF__, bunger_d) \
+ M(RFF__, bungtr_f) \
+ M(RFF__, bungtr_d) \
+ M(RFF__, bltgtr_f) \
+ M(RFF__, bltgtr_d) \
+ M(RFF__, bordr_f) \
+ M(RFF__, bordr_d) \
+ M(RFF__, bunordr_f) \
+ M(RFF__, bunordr_d) \
+ \
+ M(RGG__, bmsr) \
+ M(RGu__, bmsi) \
+ M(RGG__, bmcr) \
+ M(RGu__, bmci) \
+ \
+ M(RGG__, boaddr) \
+ M(RGi__, boaddi) \
+ M(RGG__, boaddr_u) \
+ M(RGu__, boaddi_u) \
+ M(RGG__, bxaddr) \
+ M(RGi__, bxaddi) \
+ M(RGG__, bxaddr_u) \
+ M(RGu__, bxaddi_u) \
+ M(RGG__, bosubr) \
+ M(RGi__, bosubi) \
+ M(RGG__, bosubr_u) \
+ M(RGu__, bosubi_u) \
+ M(RGG__, bxsubr) \
+ M(RGi__, bxsubi) \
+ M(RGG__, bxsubr_u) \
+ M(RGu__, bxsubi_u) \
+ \
+ M(_G___, jmpr) \
+ M(_p___, jmpi) \
+ M(R____, jmp) \
+ \
+ M(_p___, jmpi_with_link) \
+ M(_____, pop_link_register) \
+ M(_____, push_link_register) \
+ \
+ M(_____, ret) \
+ M(_G___, retr) \
+ M(_F___, retr_f) \
+ M(_F___, retr_d) \
+ M(_i___, reti) \
+ M(_G___, retval_c) \
+ M(_G___, retval_uc) \
+ M(_G___, retval_s) \
+ M(_G___, retval_us) \
+ M(_G___, retval_i) \
+ WHEN_64(M(_G___, retval_ui)) \
+ WHEN_64(M(_G___, retval_l)) \
+ M(_F___, retval_f) \
+ M(_F___, retval_d) \
+ \
+ M(_____, breakpoint) \
+ \
+ M(_FF__, negr_f) \
+ M(_FF__, negr_d) \
+ M(_FF__, absr_f) \
+ M(_FF__, absr_d) \
+ M(_FF__, sqrtr_f) \
+ M(_FF__, sqrtr_d) \
+ \
+ M(_GF__, truncr_f_i) \
+ M(_FG__, extr_f) \
+ M(_FG__, extr_d) \
+ M(_FF__, extr_d_f) \
+ M(_FF__, extr_f_d) \
+ M(_FF__, movr_f) \
+ M(_FF__, movr_d) \
+ M(_Ff__, movi_f) \
+ M(_Fd__, movi_d) \
+ M(_GF__, truncr_d_i) \
+ WHEN_64(M(_GF__, truncr_f_l)) \
+ WHEN_64(M(_GF__, truncr_d_l)) \
+ /* EOL */
+
+#define DECLARE_INSTRUCTION(kind, stem) JIT_API JIT_PROTO_##kind(stem);
+FOR_EACH_INSTRUCTION(DECLARE_INSTRUCTION)
+#undef DECLARE_INSTRUCTION
+
+#if __WORDSIZE == 32
+# define jit_ldr(j,u,v) jit_ldr_i(j,u,v)
+# define jit_ldi(j,u,v) jit_ldi_i(j,u,v)
+# define jit_ldxr(j,u,v,w) jit_ldxr_i(j,u,v,w)
+# define jit_ldxi(j,u,v,w) jit_ldxi_i(j,u,v,w)
+# define jit_str(j,u,v) jit_str_i(j,u,v)
+# define jit_sti(j,u,v) jit_sti_i(j,u,v)
+# define jit_stxr(j,u,v,w) jit_stxr_i(j,u,v,w)
+# define jit_stxi(j,u,v,w) jit_stxi_i(j,u,v,w)
+# define jit_retval(j,u) jit_retval_i(j,u)
+# define jit_bswapr(j,u,v) jit_bswapr_ui(j,u,v)
+# define jit_truncr_d(j,u,v) jit_truncr_d_i(j,u,v)
+# define jit_truncr_f(j,u,v) jit_truncr_f_i(j,u,v)
+#else
+# define jit_ldr(j,u,v) jit_ldr_l(j,u,v)
+# define jit_ldi(j,u,v) jit_ldi_l(j,u,v)
+# define jit_ldxr(j,u,v,w) jit_ldxr_l(j,u,v,w)
+# define jit_ldxi(j,u,v,w) jit_ldxi_l(j,u,v,w)
+# define jit_str(j,u,v) jit_str_l(j,u,v)
+# define jit_sti(j,u,v) jit_sti_l(j,u,v)
+# define jit_stxr(j,u,v,w) jit_stxr_l(j,u,v,w)
+# define jit_stxi(j,u,v,w) jit_stxi_l(j,u,v,w)
+# define jit_retval(j,u) jit_retval_l(j,u)
+# define jit_bswapr(j,u,v) jit_bswapr_ul(j,u,v)
+# define jit_truncr_d(j,u,v) jit_truncr_d_l(j,u,v)
+# define jit_truncr_f(j,u,v) jit_truncr_f_l(j,u,v)
+#endif
+
+void jit_begin_data(jit_state_t *, size_t max_size_or_zero);
+void jit_end_data(jit_state_t *);
+void jit_emit_u8(jit_state_t *, uint8_t);
+void jit_emit_u16(jit_state_t *, uint16_t);
+void jit_emit_u32(jit_state_t *, uint32_t);
+void jit_emit_u64(jit_state_t *, uint64_t);
+void jit_emit_ptr(jit_state_t *, void *);
+jit_reloc_t jit_emit_addr(jit_state_t *);
+
+#endif /* _jit_h */
diff --git a/deps/lightening/lightening/aarch64-cpu.c b/deps/lightening/lightening/aarch64-cpu.c
new file mode 100644
index 0000000..2094e35
--- /dev/null
+++ b/deps/lightening/lightening/aarch64-cpu.c
@@ -0,0 +1,2584 @@
+/*
+ * Copyright (C) 2013-2017, 2019 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+#if __BYTE_ORDER != __LITTLE_ENDIAN
+#error AArch64 requires little-endian host
+#endif
+
+static int32_t
+logical_immediate(jit_word_t imm)
+{
+ /* There are 5334 possible immediate values, but to avoid the
+ * need of either too complex code or large lookup tables,
+ * only check for (simply) encodable common/small values */
+ switch (imm) {
+ case -16: return 0xf3b;
+ case -15: return 0xf3c;
+ case -13: return 0xf3d;
+ case -9: return 0xf3e;
+ case -8: return 0xf7c;
+ case -7: return 0xf7d;
+ case -5: return 0xf7e;
+ case -4: return 0xfbd;
+ case -3: return 0xfbe;
+ case -2: return 0xffe;
+ case 1: return 0x000;
+ case 2: return 0xfc0;
+ case 3: return 0x001;
+ case 4: return 0xf80;
+ case 6: return 0xfc1;
+ case 7: return 0x002;
+ case 8: return 0xf40;
+ case 12: return 0xf81;
+ case 14: return 0xfc2;
+ case 15: return 0x003;
+ case 16: return 0xf00;
+ default: return -1;
+ }
+}
+
+static void
+oxxx(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ uint32_t inst = Op;
+ inst = write_Rd_bitfield(inst, Rd);
+ inst = write_Rn_bitfield(inst, Rn);
+ inst = write_Rm_bitfield(inst, Rm);
+ emit_u32_with_pool(_jit, inst);
+}
+
+static void
+oxxi(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rn, int32_t Imm12)
+{
+ uint32_t inst = Op;
+ inst = write_Rd_bitfield(inst, Rd);
+ inst = write_Rn_bitfield(inst, Rn);
+ inst = write_imm12_bitfield(inst, Imm12);
+ emit_u32_with_pool(_jit, inst);
+}
+
+static void
+oxx9(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rn, int32_t Simm9)
+{
+ uint32_t inst = Op;
+ inst = write_Rd_bitfield(inst, Rd);
+ inst = write_Rn_bitfield(inst, Rn);
+ inst = write_simm9_bitfield(inst, Simm9);
+ emit_u32_with_pool(_jit, inst);
+}
+
+static uint32_t
+encode_ox19(int32_t Op, int32_t Rd)
+{
+ uint32_t inst = Op;
+ inst = write_Rd_bitfield(inst, Rd);
+ return inst;
+}
+
+static uint32_t
+encode_oc19(int32_t Op, int32_t Cc)
+{
+ uint32_t inst = Op;
+ inst = write_cond2_bitfield(inst, Cc);
+ return inst;
+}
+
+static uint32_t
+encode_o26(int32_t Op)
+{
+ uint32_t inst = Op;
+ return inst;
+}
+
+static void
+ox_x(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rm)
+{
+ uint32_t inst = Op;
+ inst = write_Rd_bitfield(inst, Rd);
+ inst = write_Rm_bitfield(inst, Rm);
+ emit_u32_with_pool(_jit, inst);
+}
+
+static void
+o_xx(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rn)
+{
+ uint32_t inst = Op;
+ inst = write_Rd_bitfield(inst, Rd);
+ inst = write_Rn_bitfield(inst, Rn);
+ emit_u32_with_pool(_jit, inst);
+}
+
+static void
+oxx_(jit_state_t *_jit, int32_t Op, int32_t Rn, int32_t Rm)
+{
+ uint32_t inst = Op;
+ inst = write_Rn_bitfield(inst, Rn);
+ inst = write_Rm_bitfield(inst, Rm);
+ emit_u32_with_pool(_jit, inst);
+}
+
+static void
+o_x_(jit_state_t *_jit, int32_t Op, int32_t Rn)
+{
+ uint32_t inst = Op;
+ inst = write_Rn_bitfield(inst, Rn);
+ emit_u32_with_pool(_jit, inst);
+}
+
+static void
+ox_h(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Imm16)
+{
+ uint32_t inst = Op;
+ inst = write_Rd_bitfield(inst, Rd);
+ inst = write_imm16_bitfield(inst, Imm16);
+ emit_u32_with_pool(_jit, inst);
+}
+
+static void
+oxxrs(jit_state_t *_jit, int32_t Op,
+ int32_t Rd, int32_t Rn, int32_t R, int32_t S)
+{
+ uint32_t inst = Op;
+ inst = write_Rd_bitfield(inst, Rd);
+ inst = write_Rn_bitfield(inst, Rn);
+ inst = write_immr_bitfield(inst, R);
+ inst = write_imms_bitfield(inst, S);
+ emit_u32_with_pool(_jit, inst);
+}
+
+#define XZR_REGNO 0x1f
+#define WZR_REGNO XZR_REGNO
+#define LSL_12 0x00400000
+#define MOVI_LSL_16 0x00200000
+#define MOVI_LSL_32 0x00400000
+#define MOVI_LSL_48 0x00600000
+#define XS 0x80000000 /* Wn -> Xn */
+#define BCC_EQ 0x0
+#define BCC_NE 0x1
+#define BCC_CS 0x2
+#define BCC_HS BCC_CS
+#define BCC_CC 0x3
+#define BCC_LO BCC_CC
+#define BCC_MI 0x4
+#define BCC_PL 0x5
+#define BCC_VS 0x6
+#define BCC_VC 0x7
+#define BCC_HI 0x8
+#define BCC_LS 0x9
+#define BCC_GE 0xa
+#define BCC_LT 0xb
+#define BCC_GT 0xc
+#define BCC_LE 0xd
+#define BCC_AL 0xe
+#define BCC_NV 0xf
+/* adapted and cut down to only tested and required by lightening,
+ * from data in binutils/aarch64-tbl.h */
+#define A64_ADCS 0x3a000000
+#define A64_SBCS 0x7a000000
+#define A64_ADDI 0x11000000
+#define A64_ADDSI 0xb1000000
+#define A64_SUBI 0x51000000
+#define A64_SUBSI 0x71000000
+#define A64_ADD 0x0b000000
+#define A64_ADDS 0x2b000000
+#define A64_SUB 0x4b000000
+#define A64_NEG 0x4b0003e0
+#define A64_SUBS 0x6b000000
+#define A64_CMP 0x6b00001f
+#define A64_SBFM 0x93400000
+#define A64_UBFM 0x53400000
+#define A64_UBFX 0x53000000
+#define A64_B 0x14000000
+#define A64_BL 0x94000000
+#define A64_BR 0xd61f0000
+#define A64_BLR 0xd63f0000
+#define A64_RET 0xd65f0000
+#define A64_CBZ 0x34000000
+#define A64_CBNZ 0x35000000
+#define A64_B_C 0x54000000
+#define A64_REV 0xdac00c00
+#define A64_UDIV 0x1ac00800
+#define A64_SDIV 0x1ac00c00
+#define A64_LSL 0x1ac02000
+#define A64_LSR 0x1ac02400
+#define A64_ASR 0x1ac02800
+#define A64_MUL 0x1b007c00
+#define A64_SMULH 0x9b407c00
+#define A64_UMULH 0x9bc07c00
+#define A64_LDAR 0xc8dffc00
+#define A64_STLR 0xc89ffc00
+#define A64_LDAXR 0xc85ffc00
+#define A64_STLXR 0xc800fc00
+#define A64_STRBI 0x39000000
+#define A64_LDRBI 0x39400000
+#define A64_LDRSBI 0x39800000
+#define A64_STRI 0xf9000000
+#define A64_LDRI 0xf9400000
+#define A64_LDRI_LITERAL 0x58000000
+#define A64_STRHI 0x79000000
+#define A64_LDRHI 0x79400000
+#define A64_LDRSHI 0x79800000
+#define A64_STRWI 0xb9000000
+#define A64_LDRWI 0xb9400000
+#define A64_LDRSWI 0xb9800000
+#define A64_STRB 0x38206800
+#define A64_LDRB 0x38606800
+#define A64_LDRSB 0x38e06800
+#define A64_STR 0xf8206800
+#define A64_LDR 0xf8606800
+#define A64_STRH 0x78206800
+#define A64_LDRH 0x78606800
+#define A64_LDRSH 0x78a06800
+#define A64_STRW 0xb8206800
+#define A64_LDRW 0xb8606800
+#define A64_LDRSW 0xb8a06800
+#define A64_STURB 0x38000000
+#define A64_LDURB 0x38400000
+#define A64_LDURSB 0x38800000
+#define A64_STUR 0xf8000000
+#define A64_LDUR 0xf8400000
+#define A64_STURH 0x78000000
+#define A64_LDURH 0x78400000
+#define A64_LDURSH 0x78800000
+#define A64_STURW 0xb8000000
+#define A64_LDURW 0xb8400000
+#define A64_LDURSW 0xb8800000
+#define A64_ANDI 0x12400000
+#define A64_ORRI 0x32400000
+#define A64_EORI 0x52400000
+#define A64_ANDSI 0x72000000
+#define A64_AND 0x0a000000
+#define A64_ORR 0x2a000000
+#define A64_MOV 0x2a0003e0 /* AKA orr Rd,xzr,Rm */
+#define A64_MVN 0x2a2003e0
+#define A64_UXTW 0x2a0003e0 /* AKA MOV */
+#define A64_EOR 0x4a000000
+#define A64_ANDS 0x6a000000
+#define A64_MOVN 0x12800000
+#define A64_MOVZ 0x52800000
+#define A64_MOVK 0x72800000
+#define A64_BRK 0xd4200000
+
+static void
+SBFM(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t ImmR, int32_t ImmS)
+{
+ return oxxrs(_jit, A64_SBFM|XS,Rd,Rn,ImmR,ImmS);
+}
+
+static void
+UBFM(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t ImmR, int32_t ImmS)
+{
+ return oxxrs(_jit, A64_UBFM|XS,Rd,Rn,ImmR,ImmS);
+}
+
+static void
+UBFX(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t ImmR, int32_t ImmS)
+{
+ return oxxrs(_jit, A64_UBFX,Rd,Rn,ImmR,ImmS);
+}
+
+static void
+CMP(jit_state_t *_jit, int32_t Rn, int32_t Rm)
+{
+ return oxx_(_jit, A64_CMP|XS,Rn,Rm);
+}
+
+static void
+CMPI(jit_state_t *_jit, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_SUBSI|XS,XZR_REGNO,Rn,Imm12);
+}
+
+static void
+CMPI_12(jit_state_t *_jit, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_SUBSI|XS|LSL_12,XZR_REGNO,Rn,Imm12);
+}
+
+static void
+CMNI(jit_state_t *_jit, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_ADDSI|XS,XZR_REGNO,Rn,Imm12);
+}
+
+static void
+CMNI_12(jit_state_t *_jit, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_ADDSI|XS|LSL_12,XZR_REGNO,Rn,Imm12);
+}
+
+static void
+TST(jit_state_t *_jit, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_ANDS|XS,XZR_REGNO,Rn,Rm);
+}
+
+/* actually should use oxxrs but logical_immediate returns proper encoding */
+static void
+TSTI(jit_state_t *_jit, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_ANDSI,XZR_REGNO,Rn,Imm12);
+}
+
+static void
+MOV(jit_state_t *_jit, int32_t Rd, int32_t Rm)
+{
+ return ox_x(_jit, A64_MOV|XS,Rd,Rm);
+}
+
+static void
+MVN(jit_state_t *_jit, int32_t Rd, int32_t Rm)
+{
+ return ox_x(_jit, A64_MVN|XS,Rd,Rm);
+}
+
+static void
+NEG(jit_state_t *_jit, int32_t Rd, int32_t Rm)
+{
+ return ox_x(_jit, A64_NEG|XS,Rd,Rm);
+}
+
+static void
+MOVN(jit_state_t *_jit, int32_t Rd, int32_t Imm16)
+{
+ return ox_h(_jit, A64_MOVN|XS,Rd,Imm16);
+}
+
+static void
+MOVN_16(jit_state_t *_jit, int32_t Rd, int32_t Imm16)
+{
+ return ox_h(_jit, A64_MOVN|XS|MOVI_LSL_16,Rd,Imm16);
+}
+
+static void
+MOVN_32(jit_state_t *_jit, int32_t Rd, int32_t Imm16)
+{
+ return ox_h(_jit, A64_MOVN|XS|MOVI_LSL_32,Rd,Imm16);
+}
+
+static void
+MOVN_48(jit_state_t *_jit, int32_t Rd, int32_t Imm16)
+{
+ return ox_h(_jit, A64_MOVN|XS|MOVI_LSL_48,Rd,Imm16);
+}
+
+static void
+MOVZ(jit_state_t *_jit, int32_t Rd, int32_t Imm16)
+{
+ return ox_h(_jit, A64_MOVZ|XS,Rd,Imm16);
+}
+
+static void
+MOVZ_16(jit_state_t *_jit, int32_t Rd, int32_t Imm16)
+{
+ return ox_h(_jit, A64_MOVZ|XS|MOVI_LSL_16,Rd,Imm16);
+}
+
+static void
+MOVZ_32(jit_state_t *_jit, int32_t Rd, int32_t Imm16)
+{
+ return ox_h(_jit, A64_MOVZ|XS|MOVI_LSL_32,Rd,Imm16);
+}
+
+static void
+MOVZ_48(jit_state_t *_jit, int32_t Rd, int32_t Imm16)
+{
+ return ox_h(_jit, A64_MOVZ|XS|MOVI_LSL_48,Rd,Imm16);
+}
+
+static void
+MOVK_16(jit_state_t *_jit, int32_t Rd, int32_t Imm16)
+{
+ return ox_h(_jit, A64_MOVK|XS|MOVI_LSL_16,Rd,Imm16);
+}
+
+static void
+MOVK_32(jit_state_t *_jit, int32_t Rd, int32_t Imm16)
+{
+ return ox_h(_jit, A64_MOVK|XS|MOVI_LSL_32,Rd,Imm16);
+}
+
+static void
+MOVK_48(jit_state_t *_jit, int32_t Rd, int32_t Imm16)
+{
+ return ox_h(_jit, A64_MOVK|XS|MOVI_LSL_48,Rd,Imm16);
+}
+
+static void
+ADD(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_ADD|XS,Rd,Rn,Rm);
+}
+
+static void
+ADDI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_ADDI|XS,Rd,Rn,Imm12);
+}
+
+static void
+ADDI_12(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_ADDI|XS|LSL_12,Rd,Rn,Imm12);
+}
+
+static void
+ADDS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_ADDS|XS,Rd,Rn,Rm);
+}
+
+static void
+ADDSI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_ADDSI|XS,Rd,Rn,Imm12);
+}
+
+static void
+ADDSI_12(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_ADDSI|XS|LSL_12,Rd,Rn,Imm12);
+}
+
+static void
+ADCS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_ADCS|XS,Rd,Rn,Rm);
+}
+
+static void
+SUB(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_SUB|XS,Rd,Rn,Rm);
+}
+
+static void
+SUBI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_SUBI|XS,Rd,Rn,Imm12);
+}
+
+static void
+SUBI_12(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_SUBI|XS|LSL_12,Rd,Rn,Imm12);
+}
+
+static void
+SUBS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_SUBS|XS,Rd,Rn,Rm);
+}
+
+static void
+SUBSI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_SUBSI|XS,Rd,Rn,Imm12);
+}
+
+static void
+SUBSI_12(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_SUBSI|XS|LSL_12,Rd,Rn,Imm12);
+}
+
+static void
+SBCS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_SBCS|XS,Rd,Rn,Rm);
+}
+
+static void
+MUL(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_MUL|XS,Rd,Rn,Rm);
+}
+
+static void
+SMULH(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_SMULH,Rd,Rn,Rm);
+}
+
+static void
+UMULH(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_UMULH,Rd,Rn,Rm);
+}
+
+static void
+SDIV(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_SDIV|XS,Rd,Rn,Rm);
+}
+
+static void
+UDIV(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_UDIV|XS,Rd,Rn,Rm);
+}
+
+static void
+LSL(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_LSL|XS,Rd,Rn,Rm);
+}
+
+static void
+LSLI(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+ return UBFM(_jit, r0,r1,(64-i0)&63,63-i0);
+}
+
+static void
+ASR(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_ASR|XS,Rd,Rn,Rm);
+}
+
+static void
+ASRI(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+ return SBFM(_jit, r0,r1,i0,63);
+}
+
+static void
+LSR(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_LSR|XS,Rd,Rn,Rm);
+}
+
+static void
+LSRI(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+ return UBFM(_jit, r0,r1,i0,63);
+}
+
+static void
+AND(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_AND|XS,Rd,Rn,Rm);
+}
+
+/* actually should use oxxrs but logical_immediate returns proper encoding */;
+static void
+ANDI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_ANDI|XS,Rd,Rn,Imm12);
+}
+
+static void
+ORR(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_ORR|XS,Rd,Rn,Rm);
+}
+
+/* actually should use oxxrs but logical_immediate returns proper encoding */
+static void
+ORRI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_ORRI|XS,Rd,Rn,Imm12);
+}
+
+static void
+EOR(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_EOR|XS,Rd,Rn,Rm);
+}
+
+/* actually should use oxxrs but logical_immediate returns proper encoding */
+static void
+EORI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_EORI|XS,Rd,Rn,Imm12);
+}
+
+static void
+SXTB(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ return SBFM(_jit, Rd,Rn,0,7);
+}
+
+static void
+SXTH(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ return SBFM(_jit, Rd,Rn,0,15);
+}
+
+static void
+SXTW(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ return SBFM(_jit, Rd,Rn,0,31);
+}
+
+static void
+UXTB(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ return UBFX(_jit, Rd,Rn,0,7);
+}
+
+static void
+UXTH(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ return UBFX(_jit, Rd,Rn,0,15);
+}
+
+static void
+UXTW(jit_state_t *_jit, int32_t Rd, int32_t Rm)
+{
+ return ox_x(_jit, A64_UXTW,Rd,Rm);
+}
+
+static void
+REV(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ return o_xx(_jit, A64_REV,Rd,Rn);
+}
+
+static void
+LDAR(jit_state_t *_jit, int32_t Rt, int32_t Rn)
+{
+ return o_xx(_jit, A64_LDAR, Rt, Rn);
+}
+
+static void
+STLR(jit_state_t *_jit, int32_t Rt, int32_t Rn)
+{
+ return o_xx(_jit, A64_STLR, Rt, Rn);
+}
+
+static void
+LDAXR(jit_state_t *_jit, int32_t Rt, int32_t Rn)
+{
+ return o_xx(_jit, A64_LDAXR, Rt, Rn);
+}
+
+static void
+STLXR(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_STLXR, Rt, Rn, Rm);
+}
+
+static void
+LDRSB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_LDRSB,Rt,Rn,Rm);
+}
+
+static void
+LDRSBI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_LDRSBI,Rt,Rn,Imm12);
+}
+
+static void
+LDURSB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9)
+{
+ return oxx9(_jit, A64_LDURSB,Rt,Rn,Imm9);
+}
+
+static void
+LDRB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_LDRB,Rt,Rn,Rm);
+}
+
+static void
+LDRBI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_LDRBI,Rt,Rn,Imm12);
+}
+
+static void
+LDURB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9)
+{
+ return oxx9(_jit, A64_LDURB,Rt,Rn,Imm9);
+}
+
+static void
+LDRSH(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_LDRSH,Rt,Rn,Rm);
+}
+
+static void
+LDRSHI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_LDRSHI,Rt,Rn,Imm12);
+}
+
+static void
+LDURSH(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9)
+{
+ return oxx9(_jit, A64_LDURSH,Rt,Rn,Imm9);
+}
+
+static void
+LDRH(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_LDRH,Rt,Rn,Rm);
+}
+
+static void
+LDRHI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_LDRHI,Rt,Rn,Imm12);
+}
+
+static void
+LDURH(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9)
+{
+ return oxx9(_jit, A64_LDURH,Rt,Rn,Imm9);
+}
+
+static void
+LDRSW(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_LDRSW,Rt,Rn,Rm);
+}
+
+static void
+LDRSWI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_LDRSWI,Rt,Rn,Imm12);
+}
+
+static void
+LDURSW(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9)
+{
+ return oxx9(_jit, A64_LDURSW,Rt,Rn,Imm9);
+}
+
+static void
+LDRW(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_LDRW,Rt,Rn,Rm);
+}
+
+static void
+LDRWI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_LDRWI,Rt,Rn,Imm12);
+}
+
+static void
+LDURW(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9)
+{
+ return oxx9(_jit, A64_LDURW,Rt,Rn,Imm9);
+}
+
+static void
+LDR(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_LDR,Rt,Rn,Rm);
+}
+
+static void
+LDRI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_LDRI,Rt,Rn,Imm12);
+}
+
+static void
+LDUR(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9)
+{
+ return oxx9(_jit, A64_LDUR,Rt,Rn,Imm9);
+}
+
+static void
+STRB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_STRB,Rt,Rn,Rm);
+}
+
+static void
+STRBI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_STRBI,Rt,Rn,Imm12);
+}
+
+static void
+STURB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9)
+{
+ return oxx9(_jit, A64_STURB,Rt,Rn,Imm9);
+}
+
+static void
+STRH(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_STRH,Rt,Rn,Rm);
+}
+
+static void
+STRHI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_STRHI,Rt,Rn,Imm12);
+}
+
+static void
+STURH(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9)
+{
+ return oxx9(_jit, A64_STURH,Rt,Rn,Imm9);
+}
+
+static void
+STRW(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_STRW,Rt,Rn,Rm);
+}
+
+static void
+STRWI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_STRWI,Rt,Rn,Imm12);
+}
+
+static void
+STURW(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9)
+{
+ return oxx9(_jit, A64_STURW,Rt,Rn,Imm9);
+}
+
+static void
+STR(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm)
+{
+ return oxxx(_jit, A64_STR,Rt,Rn,Rm);
+}
+
+static void
+STRI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12)
+{
+ return oxxi(_jit, A64_STRI,Rt,Rn,Imm12);
+}
+
+static void
+STUR(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9)
+{
+ return oxx9(_jit, A64_STUR,Rt,Rn,Imm9);
+}
+
+static jit_reloc_t
+B(jit_state_t *_jit)
+{
+ return emit_jmp(_jit, encode_o26(A64_B));
+}
+
+static jit_reloc_t
+BL(jit_state_t *_jit)
+{
+ return emit_jmp(_jit, encode_o26(A64_BL));
+}
+
+static void
+BR(jit_state_t *_jit, int32_t Rn)
+{
+ return o_x_(_jit, A64_BR,Rn);
+}
+
+static void
+BLR(jit_state_t *_jit, int32_t Rn)
+{
+ return o_x_(_jit, A64_BLR,Rn);
+}
+
+static void
+RET(jit_state_t *_jit)
+{
+ return o_x_(_jit, A64_RET,jit_gpr_regno(_LR));
+}
+
+static jit_reloc_t
+B_C(jit_state_t *_jit, int32_t Cc)
+{
+ return emit_jcc(_jit, encode_oc19(A64_B_C, Cc));
+}
+
+static jit_reloc_t
+CBZ(jit_state_t *_jit, int32_t Rd)
+{
+ return emit_jcc(_jit, encode_ox19(A64_CBZ|XS,Rd));
+}
+
+static jit_reloc_t
+CBNZ(jit_state_t *_jit, int32_t Rd)
+{
+ return emit_jcc(_jit, encode_ox19(A64_CBNZ|XS,Rd));
+}
+
+static void
+NOP(jit_state_t *_jit)
+{
+ return emit_u32_with_pool(_jit, 0xd503201f);
+}
+
+static void
+BRK(jit_state_t *_jit)
+{
+ emit_u32_with_pool(_jit, A64_BRK);
+}
+
+static jit_reloc_t
+movi_from_pool(jit_state_t *_jit, int32_t Rt)
+{
+ return emit_load_from_pool(_jit, encode_ox19(A64_LDRI_LITERAL, Rt));
+}
+
+static void
+emit_veneer(jit_state_t *_jit, jit_pointer_t target)
+{
+ jit_gpr_t tmp = get_temp_gpr(_jit);
+ uint32_t ldr = encode_ox19(A64_LDRI_LITERAL, jit_gpr_regno(tmp));
+ uint32_t br = write_Rn_bitfield(A64_BR, jit_gpr_regno(tmp));
+ uint32_t *loc = _jit->pc.ui;
+ emit_u32(_jit, ldr);
+ emit_u32(_jit, br);
+ unget_temp_gpr(_jit);
+ if (_jit->overflow)
+ return;
+ // Patch load to here, divided by 4.
+ patch_load_from_pool_offset(loc, _jit->pc.ui - loc);
+ emit_u64(_jit, (uint64_t) target);
+}
+
+static void
+movr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (r0 != r1) {
+ // Stack pointer requires special handling
+ if (r1 == jit_gpr_regno(_X31) || r0 == jit_gpr_regno(_X31))
+ ADDI(_jit, r0, r1, 0);
+ else
+ MOV(_jit, r0, r1);
+ }
+}
+
+static void
+addr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return ADD(_jit,r0,r1,r2);
+}
+
+static void
+addcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return ADDS(_jit,r0,r1,r2);
+}
+
+static void
+addxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return ADCS(_jit,r0,r1,r2);
+}
+
+static void
+subr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return SUB(_jit,r0,r1,r2);
+}
+
+static void
+subcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return SUBS(_jit,r0,r1,r2);
+}
+
+static void
+subxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return SBCS(_jit,r0,r1,r2);
+}
+
+static void
+mulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return MUL(_jit,r0,r1,r2);
+}
+
+static void
+divr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return SDIV(_jit,r0,r1,r2);
+}
+
+static void
+divr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return UDIV(_jit,r0,r1,r2);
+}
+
+static void
+iqdivr(jit_state_t *_jit, jit_bool_t sign,
+ int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ int32_t rg0, rg1;
+ if (r0 == r2 || r0 == r3) {
+ rg0 = jit_gpr_regno(get_temp_gpr(_jit));
+ } else {
+ rg0 = r0;
+ }
+ if (r1 == r2 || r1 == r3) {
+ rg1 = jit_gpr_regno(get_temp_gpr(_jit));
+ } else {
+ rg1 = r1;
+ }
+ if (sign)
+ divr(_jit, rg0, r2, r3);
+ else
+ divr_u(_jit, rg0, r2, r3);
+ mulr(_jit, rg1, r3, rg0);
+ subr(_jit, rg1, r2, rg1);
+ if (rg0 != r0) {
+ movr(_jit, r0, rg0);
+ unget_temp_gpr(_jit);
+ }
+ if (rg1 != r1) {
+ movr(_jit, r1, rg1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+qdivr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ return iqdivr(_jit,1,r0,r1,r2,r3);
+}
+
+static void
+qdivr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ return iqdivr(_jit,0,r0,r1,r2,r3);
+}
+
+static void
+lshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return LSL(_jit,r0,r1,r2);
+}
+
+static void
+rshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return ASR(_jit,r0,r1,r2);
+}
+
+static void
+rshr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return LSR(_jit,r0,r1,r2);
+}
+
+static void
+negr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return NEG(_jit,r0,r1);
+}
+
+static void
+comr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return MVN(_jit,r0,r1);
+}
+
+static void
+andr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return AND(_jit,r0,r1,r2);
+}
+
+static void
+orr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return ORR(_jit,r0,r1,r2);
+}
+
+static void
+xorr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return EOR(_jit,r0,r1,r2);
+}
+
+static void
+ldr_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return LDRSBI(_jit,r0,r1,0);
+}
+
+static void
+ldr_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return LDRSHI(_jit,r0,r1,0);
+}
+
+static void
+ldr_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return LDRSWI(_jit,r0,r1,0);
+}
+
+static void
+ldxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return LDRSH(_jit,r0,r1,r2);
+}
+
+static void
+ldxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return LDRSW(_jit,r0,r1,r2);
+}
+
+static void
+ldxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return LDR(_jit,r0,r1,r2);
+}
+
+static void
+str_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return STRBI(_jit,r1,r0,0);
+}
+
+static void
+str_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return STRHI(_jit,r1,r0,0);
+}
+
+static void
+str_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return STRWI(_jit,r1,r0,0);
+}
+
+static void
+str_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return STRI(_jit,r1,r0,0);
+}
+
+static void
+stxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return STRB(_jit,r2,r1,r0);
+}
+
+static void
+stxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return STRH(_jit,r2,r1,r0);
+}
+
+static void
+stxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return STRW(_jit,r2,r1,r0);
+}
+
+static void
+stxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return STR(_jit,r2,r1,r0);
+}
+
+static void
+bswapr_ul(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return REV(_jit,r0,r1);
+}
+
+static void
+extr_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return SXTB(_jit,r0,r1);
+}
+
+static void
+extr_uc(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return UXTB(_jit,r0,r1);
+}
+
+static void
+extr_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return SXTH(_jit,r0,r1);
+}
+
+static void
+extr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return UXTH(_jit,r0,r1);
+}
+
+static void
+extr_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return SXTW(_jit,r0,r1);
+}
+
+static void
+extr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return UXTW(_jit,r0,r1);
+}
+
+static void
+movi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_word_t n0 = ~i0, ibit = 0, nbit = 0;
+ if (i0 & 0x000000000000ffffL) ibit |= 1;
+ if (i0 & 0x00000000ffff0000L) ibit |= 2;
+ if (i0 & 0x0000ffff00000000L) ibit |= 4;
+ if (i0 & 0xffff000000000000L) ibit |= 8;
+ if (n0 & 0x000000000000ffffL) nbit |= 1;
+ if (n0 & 0x00000000ffff0000L) nbit |= 2;
+ if (n0 & 0x0000ffff00000000L) nbit |= 4;
+ if (n0 & 0xffff000000000000L) nbit |= 8;
+ switch (ibit) {
+ case 0:
+ MOVZ (_jit, r0, 0);
+ break;
+ case 1:
+ MOVZ (_jit, r0, i0 & 0xffff);
+ break;
+ case 2:
+ MOVZ_16(_jit, r0, (i0 >> 16) & 0xffff);
+ break;
+ case 3:
+ MOVZ (_jit, r0, i0 & 0xffff);
+ MOVK_16(_jit, r0, (i0 >> 16) & 0xffff);
+ break;
+ case 4:
+ MOVZ_32(_jit, r0, (i0 >> 32) & 0xffff);
+ break;
+ case 5:
+ MOVZ (_jit, r0, i0 & 0xffff);
+ MOVK_32(_jit, r0, (i0 >> 32) & 0xffff);
+ break;
+ case 6:
+ MOVZ_16(_jit, r0, (i0 >> 16) & 0xffff);
+ MOVK_32(_jit, r0, (i0 >> 32) & 0xffff);
+ break;
+ case 7:
+ if (nbit == 8) {
+ MOVN_48(_jit, r0, (n0 >> 48) & 0xffff);
+ } else {
+ MOVZ (_jit, r0, i0 & 0xffff);
+ MOVK_16(_jit, r0, (i0 >> 16) & 0xffff);
+ MOVK_32(_jit, r0, (i0 >> 32) & 0xffff);
+ }
+ break;
+ case 8:
+ MOVZ_48(_jit, r0, (i0 >> 48) & 0xffff);
+ break;
+ case 9:
+ MOVZ (_jit, r0, i0 & 0xffff);
+ MOVK_48(_jit, r0, (i0 >> 48) & 0xffff);
+ break;
+ case 10:
+ MOVZ_16(_jit, r0, (i0 >> 16) & 0xffff);
+ MOVK_48(_jit, r0, (i0 >> 48) & 0xffff);
+ break;
+ case 11:
+ if (nbit == 4) {
+ MOVN_32(_jit, r0, (n0 >> 32) & 0xffff);
+ } else {
+ MOVZ (_jit, r0, i0 & 0xffff);
+ MOVK_16(_jit, r0, (i0 >> 16) & 0xffff);
+ MOVK_48(_jit, r0, (i0 >> 48) & 0xffff);
+ }
+ break;
+ case 12:
+ MOVZ_32(_jit, r0, (i0 >> 32) & 0xffff);
+ MOVK_48(_jit, r0, (i0 >> 48) & 0xffff);
+ break;
+ case 13:
+ if (nbit == 2) {
+ MOVN_16(_jit, r0, (n0 >> 16) & 0xffff);
+ } else {
+ MOVZ (_jit, r0, i0 & 0xffff);
+ MOVK_32(_jit, r0, (i0 >> 32) & 0xffff);
+ MOVK_48(_jit, r0, (i0 >> 48) & 0xffff);
+ }
+ break;
+ case 14:
+ if (nbit == 1) {
+ MOVN (_jit, r0, (n0) & 0xffff);
+ } else {
+ MOVZ_16(_jit, r0, (i0 >> 16) & 0xffff);
+ MOVK_32(_jit, r0, (i0 >> 32) & 0xffff);
+ MOVK_48(_jit, r0, (i0 >> 48) & 0xffff);
+ }
+ break;
+ case 15:
+ if (nbit == 0) {
+ MOVN (_jit, r0, 0);
+ } else if (nbit == 1) {
+ MOVN (_jit, r0, n0 & 0xffff);
+ } else if (nbit == 8) {
+ MOVN_48(_jit, r0, (n0 >> 48) & 0xffff);
+ } else {
+ MOVZ (_jit, r0, i0 & 0xffff);
+ MOVK_16(_jit, r0, (i0 >> 16) & 0xffff);
+ MOVK_32(_jit, r0, (i0 >> 32) & 0xffff);
+ MOVK_48(_jit, r0, (i0 >> 48) & 0xffff);
+ }
+ break;
+ default:
+ abort();
+ }
+}
+
+static jit_reloc_t
+bccr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1)
+{
+ CMP(_jit, r0, r1);
+ return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+bcci(jit_state_t *_jit, int32_t cc, int32_t r0, jit_word_t i1)
+{
+ jit_word_t is = i1 >> 12;
+ jit_word_t in = -i1;
+ jit_word_t iS = in >> 12;
+ if ( i1 >= 0 && i1 <= 0xfff) {
+ CMPI (_jit, r0, i1);
+ } else if ((is << 12) == i1 && is >= 0 && is <= 0xfff) {
+ CMPI_12(_jit, r0, is);
+ } else if ( in >= 0 && in <= 0xfff) {
+ CMNI (_jit, r0, in);
+ } else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff) {
+ CMNI_12(_jit, r0, iS);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i1);
+ CMP(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+ return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+bltr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit,BCC_LT,r0,r1);
+}
+
+static jit_reloc_t
+blti(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return bcci(_jit,BCC_LT,r0,i1);
+}
+
+static jit_reloc_t
+bltr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit,BCC_CC,r0,r1);
+}
+
+static jit_reloc_t
+blti_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return bcci(_jit,BCC_CC,r0,i1);
+}
+
+static jit_reloc_t
+bler(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit,BCC_LE,r0,r1);
+}
+
+static jit_reloc_t
+blei(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return bcci(_jit,BCC_LE,r0,i1);
+}
+
+static jit_reloc_t
+bler_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit,BCC_LS,r0,r1);
+}
+
+static jit_reloc_t
+blei_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return bcci(_jit,BCC_LS,r0,i1);
+}
+
+static jit_reloc_t
+beqr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit,BCC_EQ,r0,r1);
+}
+
+static jit_reloc_t
+bger(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit,BCC_GE,r0,r1);
+}
+
+static jit_reloc_t
+bgei(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return bcci(_jit,BCC_GE,r0,i1);
+}
+
+static jit_reloc_t
+bger_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit,BCC_CS,r0,r1);
+}
+
+static jit_reloc_t
+bgei_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return bcci(_jit,BCC_CS,r0,i1);
+}
+
+static jit_reloc_t
+bgtr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit,BCC_GT,r0,r1);
+}
+
+static jit_reloc_t
+bgti(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return bcci(_jit,BCC_GT,r0,i1);
+}
+
+static jit_reloc_t
+bgtr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit,BCC_HI,r0,r1);
+}
+
+static jit_reloc_t
+bgti_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return bcci(_jit,BCC_HI,r0,i1);
+}
+
+static jit_reloc_t
+bner(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit,BCC_NE,r0,r1);
+}
+
+static void
+addi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_word_t is = i0 >> 12;
+ jit_word_t in = -i0;
+ jit_word_t iS = in >> 12;
+ if ( i0 >= 0 && i0 <= 0xfff) {
+ ADDI (_jit, r0, r1, i0);
+ } else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) {
+ ADDI_12(_jit, r0, r1, is);
+ } else if ( in >= 0 && in <= 0xfff) {
+ SUBI (_jit, r0, r1, in);
+ } else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff) {
+ SUBI_12(_jit, r0, r1, iS);
+ } else {
+ int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0;
+ movi(_jit, r2, i0);
+ addr(_jit, r0, r1, r2);
+ if (r0 == r1)
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+addci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_word_t is = i0 >> 12;
+ jit_word_t in = -i0;
+ jit_word_t iS = in >> 12;
+ if ( i0 >= 0 && i0 <= 0xfff) {
+ ADDSI (_jit, r0, r1, i0);
+ } else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) {
+ ADDSI_12(_jit, r0, r1, is);
+ } else if ( in >= 0 && in <= 0xfff) {
+ SUBSI (_jit, r0, r1, in);
+ } else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff) {
+ SUBSI_12(_jit, r0, r1, iS);
+ } else {
+ int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0;
+ movi(_jit, r2, i0);
+ addcr(_jit, r0, r1, r2);
+ if (r0 == r1)
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+addxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0;
+ movi(_jit, r2, i0);
+ addxr(_jit, r0, r1, r2);
+ if (r0 == r1)
+ unget_temp_gpr(_jit);
+}
+
+static void
+subi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_word_t is = i0 >> 12;
+ if ( i0 >= 0 && i0 <= 0xfff) {
+ SUBI (_jit, r0, r1, i0);
+ } else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) {
+ SUBI_12(_jit, r0, r1, is);
+ } else {
+ int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0;
+ movi(_jit, r2, i0);
+ subr(_jit, r0, r1, r2);
+ if (r0 == r1)
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+subci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_word_t is = i0 >> 12;
+ if ( i0 >= 0 && i0 <= 0xfff) {
+ SUBSI (_jit, r0, r1, i0);
+ } else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) {
+ SUBSI_12(_jit, r0, r1, is);
+ } else {
+ int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0;
+ movi(_jit, r2, i0);
+ subcr(_jit, r0, r1, r2);
+ if (r0 == r1)
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+subxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0;
+ movi(_jit, r2, i0);
+ subxr(_jit, r0, r1, r2);
+ if (r0 == r1)
+ unget_temp_gpr(_jit);
+}
+
+static jit_reloc_t
+baddr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1)
+{
+ addcr(_jit, r0, r0, r1);
+ return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+baddi(jit_state_t *_jit, int32_t cc, int32_t r0, jit_word_t i1)
+{
+ addci(_jit, r0, r0, i1);
+ return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+boaddr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return baddr(_jit,BCC_VS,r0,r1);
+}
+
+static jit_reloc_t
+boaddi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return baddi(_jit,BCC_VS,r0,i1);
+}
+
+static jit_reloc_t
+boaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return baddr(_jit,BCC_HS,r0,r1);
+}
+
+static jit_reloc_t
+boaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return baddi(_jit,BCC_HS,r0,i1);
+}
+
+static jit_reloc_t
+bxaddr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return baddr(_jit,BCC_VC,r0,r1);
+}
+
+static jit_reloc_t
+bxaddi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return baddi(_jit,BCC_VC,r0,i1);
+}
+
+static jit_reloc_t
+bxaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return baddr(_jit,BCC_LO,r0,r1);
+}
+
+static jit_reloc_t
+bxaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return baddi(_jit,BCC_LO,r0,i1);
+}
+
+static jit_reloc_t
+bsubr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1)
+{
+ subcr(_jit, r0, r0, r1);
+ return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+bsubi(jit_state_t *_jit, int32_t cc, int32_t r0, jit_word_t i1)
+{
+ subci(_jit, r0, r0, i1);
+ return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+bosubr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bsubr(_jit,BCC_VS,r0,r1);
+}
+
+static jit_reloc_t
+bosubi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return bsubi(_jit,BCC_VS,r0,i1);
+}
+
+static jit_reloc_t
+bosubr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bsubr(_jit,BCC_LO,r0,r1);
+}
+
+static jit_reloc_t
+bosubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return bsubi(_jit,BCC_LO,r0,i1);
+}
+
+static jit_reloc_t
+bxsubr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bsubr(_jit,BCC_VC,r0,r1);
+}
+
+static jit_reloc_t
+bxsubi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return bsubi(_jit,BCC_VC,r0,i1);
+}
+
+static jit_reloc_t
+bxsubr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bsubr(_jit,BCC_HS,r0,r1);
+}
+
+static jit_reloc_t
+bxsubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return bsubi(_jit,BCC_HS,r0,i1);
+}
+
+static jit_reloc_t
+bmxr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1)
+{
+ TST(_jit, r0, r1);
+ return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+bmxi(jit_state_t *_jit, int32_t cc, int32_t r0, jit_word_t i1)
+{
+ int32_t imm;
+ imm = logical_immediate(i1);
+ if (imm != -1) {
+ TSTI(_jit, r0, imm);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i1);
+ TST(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+ return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+bmsr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bmxr(_jit,BCC_NE,r0,r1);
+}
+
+static jit_reloc_t
+bmsi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return bmxi(_jit,BCC_NE,r0,i1);
+}
+
+static jit_reloc_t
+bmcr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bmxr(_jit,BCC_EQ,r0,r1);
+}
+
+static jit_reloc_t
+bmci(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return bmxi(_jit,BCC_EQ,r0,i1);
+}
+
+static void
+jmpr(jit_state_t *_jit, int32_t r0)
+{
+ return BR(_jit, r0);
+}
+
+static void
+callr(jit_state_t *_jit, int32_t r0)
+{
+ return BLR(_jit,r0);
+}
+
+static void
+nop(jit_state_t *_jit, int32_t i0)
+{
+ for (; i0 > 0; i0 -= 4)
+ NOP(_jit);
+ ASSERT(i0 == 0);
+}
+
+static void
+muli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0;
+ movi(_jit, r2, i0);
+ mulr(_jit, r0, r1, r2);
+ if (r0 == r1)
+ unget_temp_gpr(_jit);
+}
+
+static void
+qmulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ jit_gpr_t reg;
+ if (r0 == r2 || r0 == r3) {
+ reg = get_temp_gpr(_jit);
+ mulr(_jit, jit_gpr_regno(reg), r2, r3);
+ } else {
+ mulr(_jit, r0, r2, r3);
+ }
+ SMULH(_jit, r1, r2, r3);
+ if (r0 == r2 || r0 == r3) {
+ movr(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+qmuli(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ qmulr(_jit, r0, r1, r2, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+qmulr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ jit_gpr_t reg;
+ if (r0 == r2 || r0 == r3) {
+ reg = get_temp_gpr(_jit);
+ mulr(_jit, jit_gpr_regno(reg), r2, r3);
+ } else {
+ mulr(_jit, r0, r2, r3);
+ }
+ UMULH(_jit, r1, r2, r3);
+ if (r0 == r2 || r0 == r3) {
+ movr(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+qmuli_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ qmulr_u(_jit, r0, r1, r2, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+divi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0;
+ movi(_jit, r2, i0);
+ divr(_jit, r0, r1, r2);
+ if (r0 == r1)
+ unget_temp_gpr(_jit);
+}
+
+static void
+divi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0;
+ movi(_jit, r2, i0);
+ divr_u(_jit, r0, r1, r2);
+ if (r0 == r1)
+ unget_temp_gpr(_jit);
+}
+
+static void
+qdivi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ qdivr(_jit, r0, r1, r2, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+qdivi_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ qdivr_u(_jit, r0, r1, r2, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+remr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r1 || r0 == r2) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ divr(_jit, jit_gpr_regno(reg), r1, r2);
+ mulr(_jit, jit_gpr_regno(reg), r2, jit_gpr_regno(reg));
+ subr(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ } else {
+ divr(_jit, r0, r1, r2);
+ mulr(_jit, r0, r2, r0);
+ subr(_jit, r0, r1, r0);
+ }
+}
+
+static void
+remi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ remr(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+remr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r1 || r0 == r2) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ divr_u(_jit, jit_gpr_regno(reg), r1, r2);
+ mulr(_jit, jit_gpr_regno(reg), r2, jit_gpr_regno(reg));
+ subr(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ } else {
+ divr_u(_jit, r0, r1, r2);
+ mulr(_jit, r0, r2, r0);
+ subr(_jit, r0, r1, r0);
+ }
+}
+
+static void
+remi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ remr_u(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+lshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0) {
+ movr(_jit, r0, r1);
+ } else {
+ ASSERT(i0 > 0 && i0 < 64);
+ LSLI(_jit, r0, r1, i0);
+ }
+}
+
+static void
+rshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0) {
+ movr(_jit, r0, r1);
+ } else {
+ ASSERT(i0 > 0 && i0 < 64);
+ ASRI(_jit, r0, r1, i0);
+ }
+}
+
+static void
+rshi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0) {
+ movr(_jit, r0, r1);
+ } else {
+ ASSERT(i0 > 0 && i0 < 64);
+ LSRI(_jit, r0, r1, i0);
+ }
+}
+
+static void
+andi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t imm;
+ if (i0 == 0) {
+ movi(_jit, r0, 0);
+ } else if (i0 == -1){
+ movr(_jit, r0, r1);
+ } else {
+ imm = logical_immediate(i0);
+ if (imm != -1) {
+ ANDI(_jit, r0, r1, imm);
+ } else {
+ int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0;
+ movi(_jit, r2, i0);
+ andr(_jit, r0, r1, r2);
+ if (r0 == r1)
+ unget_temp_gpr(_jit);
+ }
+ }
+}
+
+static void
+ori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t imm;
+ if (i0 == 0) {
+ movr(_jit, r0, r1);
+ } else if (i0 == -1) {
+ movi(_jit, r0, -1);
+ } else {
+ imm = logical_immediate(i0);
+ if (imm != -1) {
+ ORRI(_jit, r0, r1, imm);
+ } else {
+ int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0;
+ movi(_jit, r2, i0);
+ orr(_jit, r0, r1, r2);
+ if (r0 == r1)
+ unget_temp_gpr(_jit);
+ }
+ }
+}
+
+static void
+xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t imm;
+ if (i0 == 0) {
+ movr(_jit, r0, r1);
+ } else if (i0 == -1) {
+ comr(_jit, r0, r1);
+ } else {
+ imm = logical_immediate(i0);
+ if (imm != -1) {
+ EORI(_jit, r0, r1, imm);
+ } else {
+ int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0;
+ movi(_jit, r2, i0);
+ xorr(_jit, r0, r1, r2);
+ if (r0 == r1)
+ unget_temp_gpr(_jit);
+ }
+ }
+}
+
+static void
+bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ bswapr_ul(_jit, r0, r1);
+ rshi_u(_jit, r0, r0, 48);
+}
+
+static void
+bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ bswapr_ul(_jit, r0, r1);
+ rshi_u(_jit, r0, r0, 32);
+}
+
+static void
+ldi_c(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ movi(_jit, r0, i0);
+ ldr_c(_jit, r0, r0);
+}
+
+static void
+ldr_uc(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ LDRBI(_jit, r0, r1, 0);
+#if 0
+ extr_uc(_jit, r0, r0);
+#endif
+}
+
+static void
+ldi_uc(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ movi(_jit, r0, i0);
+ ldr_uc(_jit, r0, r0);
+}
+
+static void
+ldi_s(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ movi(_jit, r0, i0);
+ ldr_s(_jit, r0, r0);
+}
+
+static void
+ldr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ LDRHI(_jit, r0, r1, 0);
+#if 0
+ extr_us(_jit, r0, r0);
+#endif
+}
+
+static void
+ldi_us(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ movi(_jit, r0, i0);
+ ldr_us(_jit, r0, r0);
+}
+
+static void
+ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ movi(_jit, r0, i0);
+ ldr_i(_jit, r0, r0);
+}
+
+static void
+ldr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ LDRWI(_jit, r0, r1, 0);
+#if 0
+ extr_ui(_jit, r0, r0);
+#endif
+}
+
+static void
+ldi_ui(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ movi(_jit, r0, i0);
+ ldr_ui(_jit, r0, r0);
+}
+
+static void
+ldr_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ LDRI(_jit, r0, r1, 0);
+}
+
+static void
+ldi_l(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ movi(_jit, r0, i0);
+ ldr_l(_jit, r0, r0);
+}
+
+static void
+ldxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ LDRSB(_jit, r0, r1, r2);
+ extr_c(_jit, r0, r0);
+}
+
+static void
+ldxi_c(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 >= 0 && i0 <= 4095) {
+ LDRSBI(_jit, r0, r1, i0);
+ } else if (i0 > -256 && i0 < 0) {
+ LDURSB(_jit, r0, r1, i0);
+ } else {
+ int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0;
+ movi(_jit, r2, i0);
+ LDRSB(_jit, r0, r1, r2);
+ if (r0 == r1)
+ unget_temp_gpr(_jit);
+ }
+ extr_c(_jit, r0, r0);
+}
+
+static void
+ldxr_uc(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ LDRB(_jit, r0, r1, r2);
+#if 0
+ extr_uc(_jit, r0, r0);
+#endif
+}
+
+static void
+ldxi_uc(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 >= 0 && i0 <= 4095) {
+ LDRBI(_jit, r0, r1, i0);
+ } else if (i0 > -256 && i0 < 0) {
+ LDURB(_jit, r0, r1, i0);
+ } else {
+ int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0;
+ addi(_jit, r2, r1, i0);
+ ldr_uc(_jit, r0, r2);
+ if (r0 == r1)
+ unget_temp_gpr(_jit);
+ }
+#if 0
+ extr_uc(_jit, r0, r0);
+#endif
+}
+
+static void
+ldxi_s(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ ASSERT(!(i0 & 1));
+ if (i0 >= 0 && i0 <= 8191) {
+ LDRSHI(_jit, r0, r1, i0 >> 1);
+ } else if (i0 > -256 && i0 < 0) {
+ LDURSH(_jit, r0, r1, i0);
+ } else {
+ int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0;
+ movi(_jit, r2, i0);
+ LDRSH(_jit, r0, r1, r2);
+ if (r0 == r1)
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_us(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ LDRH(_jit, r0, r1, r2);
+#if 0
+ extr_us(_jit, r0, r0);
+#endif
+}
+
+static void
+ldxi_us(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ ASSERT(!(i0 & 1));
+ if (i0 >= 0 && i0 <= 8191) {
+ LDRHI(_jit, r0, r1, i0 >> 1);
+ } else if (i0 > -256 && i0 < 0) {
+ LDURH(_jit, r0, r1, i0);
+ } else {
+ int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0;
+ movi(_jit, r2, i0);
+ LDRH(_jit, r0, r1, r2);
+ if (r0 == r1)
+ unget_temp_gpr(_jit);
+ }
+#if 0
+ extr_us(_jit, r0, r0);
+#endif
+}
+
+static void
+ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ ASSERT(!(i0 & 3));
+ if (i0 >= 0 && i0 <= 16383) {
+ LDRSWI(_jit, r0, r1, i0 >> 2);
+ } else if (i0 > -256 && i0 < 0) {
+ LDURSW(_jit, r0, r1, i0);
+ } else {
+ int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0;
+ addi(_jit, r2, r1, i0);
+ ldr_i(_jit, r0, r2);
+ if (r0 == r1)
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_ui(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ LDRW(_jit, r0, r1, r2);
+#if 0
+ extr_ui(_jit, r0, r0);
+#endif
+}
+
+static void
+ldxi_ui(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ ASSERT(!(i0 & 3));
+ if (i0 >= 0 && i0 <= 16383) {
+ LDRWI(_jit, r0, r1, i0 >> 2);
+ } else if (i0 > -256 && i0 < 0) {
+ LDURW(_jit, r0, r1, i0);
+ } else {
+ int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0;
+ movi(_jit, r2, i0);
+ LDRW(_jit, r0, r1, r2);
+ if (r0 == r1)
+ unget_temp_gpr(_jit);
+ }
+#if 0
+ extr_ui(_jit, r0, r0);
+#endif
+}
+
+static void
+ldxi_l(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ ASSERT(!(i0 & 7));
+ if (i0 >= 0 && i0 <= 32767) {
+ LDRI(_jit, r0, r1, i0 >> 3);
+ } else if (i0 > -256 && i0 < 0) {
+ LDUR(_jit, r0, r1, i0);
+ } else {
+ int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0;
+ addi(_jit, r2, r1, i0);
+ ldr_l(_jit, r0, r2);
+ if (r0 == r1)
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+sti_c(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ str_c(_jit, jit_gpr_regno(reg), r0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+sti_s(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ str_s(_jit, jit_gpr_regno(reg), r0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+sti_i(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ str_i(_jit, jit_gpr_regno(reg), r0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+sti_l(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ str_l(_jit, jit_gpr_regno(reg), r0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (i0 >= 0 && i0 <= 4095) {
+ STRBI(_jit, r1, r0, i0);
+ } else if (i0 > -256 && i0 < 0) {
+ STURB(_jit, r1, r0, i0);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(reg), r0, i0);
+ str_c(_jit, jit_gpr_regno(reg), r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+stxi_s(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ ASSERT(!(i0 & 1));
+ if (i0 >= 0 && i0 <= 8191) {
+ STRHI(_jit, r1, r0, i0 >> 1);
+ } else if (i0 > -256 && i0 < 0) {
+ STURH(_jit, r1, r0, i0);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(reg), r0, i0);
+ str_s(_jit, jit_gpr_regno(reg), r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ ASSERT(!(i0 & 3));
+ if (i0 >= 0 && i0 <= 16383) {
+ STRWI(_jit, r1, r0, i0 >> 2);
+ } else if (i0 > -256 && i0 < 0) {
+ STURW(_jit, r1, r0, i0);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(reg), r0, i0);
+ str_i(_jit, jit_gpr_regno(reg), r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+stxi_l(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ ASSERT(!(i0 & 7));
+ if (i0 >= 0 && i0 <= 32767) {
+ STRI(_jit, r1, r0, i0 >> 3);
+ } else if (i0 > -256 && i0 < 0) {
+ STUR(_jit, r1, r0, i0);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(reg), r0, i0);
+ str_l(_jit, jit_gpr_regno(reg), r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static jit_reloc_t
+mov_addr(jit_state_t *_jit, int32_t r0)
+{
+ return movi_from_pool(_jit, r0);
+}
+
+static jit_reloc_t
+beqi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (i1 == 0) {
+ return CBZ(_jit, r0);
+ } else {
+ return bcci(_jit, BCC_EQ, r0, i1);
+ }
+}
+
+static jit_reloc_t
+bnei(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (i1 == 0) {
+ return CBNZ(_jit, r0);
+ } else {
+ return bcci(_jit, BCC_NE, r0, i1);
+ }
+}
+
+static jit_reloc_t
+jmp(jit_state_t *_jit)
+{
+ return B(_jit);
+}
+
+static void
+jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+ return jit_patch_there(_jit, jmp(_jit), (void*)i0);
+}
+
+static jit_reloc_t
+call(jit_state_t *_jit)
+{
+ return BL(_jit);
+}
+
+static void
+calli(jit_state_t *_jit, jit_word_t i0)
+{
+ return jit_patch_there(_jit, call(_jit), (void*)i0);
+}
+
+static void
+jmpi_with_link(jit_state_t *_jit, jit_word_t i0)
+{
+ return calli(_jit, i0);
+}
+
+static void
+push_link_register(jit_state_t *_jit)
+{
+}
+
+static void
+pop_link_register(jit_state_t *_jit)
+{
+}
+
+static void
+ret(jit_state_t *_jit)
+{
+ RET(_jit);
+}
+
+static void
+retr(jit_state_t *_jit, int32_t r)
+{
+ movr(_jit, jit_gpr_regno(_X0), r);
+ ret(_jit);
+}
+
+static void
+reti(jit_state_t *_jit, int32_t i)
+{
+ movi(_jit, jit_gpr_regno(_X0), i);
+ ret(_jit);
+}
+
+static void
+retval_c(jit_state_t *_jit, int32_t r0)
+{
+ extr_c(_jit, r0, jit_gpr_regno(_X0));
+}
+
+static void
+retval_uc(jit_state_t *_jit, int32_t r0)
+{
+ extr_uc(_jit, r0, jit_gpr_regno(_X0));
+}
+
+static void
+retval_s(jit_state_t *_jit, int32_t r0)
+{
+ extr_s(_jit, r0, jit_gpr_regno(_X0));
+}
+
+static void
+retval_us(jit_state_t *_jit, int32_t r0)
+{
+ extr_us(_jit, r0, jit_gpr_regno(_X0));
+}
+
+static void
+retval_i(jit_state_t *_jit, int32_t r0)
+{
+ extr_i(_jit, r0, jit_gpr_regno(_X0));
+}
+
+static void
+retval_ui(jit_state_t *_jit, int32_t r0)
+{
+ extr_ui(_jit, r0, jit_gpr_regno(_X0));
+}
+
+static void
+retval_l(jit_state_t *_jit, int32_t r0)
+{
+ movr(_jit, r0, jit_gpr_regno(_X0));
+}
+
+static uint32_t*
+jmp_without_veneer(jit_state_t *_jit)
+{
+ uint32_t *loc = _jit->pc.ui;
+ emit_u32(_jit, encode_o26(A64_B));
+ return loc;
+}
+
+static void
+patch_jmp_without_veneer(jit_state_t *_jit, uint32_t *loc)
+{
+ patch_jmp_offset(loc, _jit->pc.ui - loc);
+}
+
+static void
+ldr_atomic(jit_state_t *_jit, int32_t dst, int32_t loc)
+{
+ LDAR(_jit, dst, loc);
+}
+
+static void
+str_atomic(jit_state_t *_jit, int32_t loc, int32_t val)
+{
+ STLR(_jit, val, loc);
+}
+
+static void
+swap_atomic(jit_state_t *_jit, int32_t dst, int32_t loc, int32_t val)
+{
+ int32_t result = jit_gpr_regno(get_temp_gpr(_jit));
+ int32_t dst_or_tmp;
+ if (dst == val || dst == loc)
+ dst_or_tmp = jit_gpr_regno(get_temp_gpr(_jit));
+ else
+ dst_or_tmp = dst;
+
+ void *retry = jit_address(_jit);
+ LDAXR(_jit, dst_or_tmp, loc);
+ STLXR(_jit, val, loc, result);
+ jit_patch_there(_jit, bnei(_jit, result, 0), retry);
+ movr(_jit, dst, dst_or_tmp);
+
+ if (dst == val || dst == loc) unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+}
+
+static void
+cas_atomic(jit_state_t *_jit, int32_t dst, int32_t loc, int32_t expected,
+ int32_t desired)
+{
+ int32_t dst_or_tmp;
+ if (dst == loc || dst == expected || dst == desired)
+ dst_or_tmp = jit_gpr_regno(get_temp_gpr(_jit));
+ else
+ dst_or_tmp = dst;
+ void *retry = jit_address(_jit);
+ LDAXR(_jit, dst_or_tmp, loc);
+ jit_reloc_t bad = bner(_jit, dst_or_tmp, expected);
+ int result = jit_gpr_regno(get_temp_gpr(_jit));
+ STLXR(_jit, desired, loc, result);
+ jit_patch_there(_jit, bnei(_jit, result, 0), retry);
+ unget_temp_gpr(_jit);
+ jit_patch_here(_jit, bad);
+ movr(_jit, dst, dst_or_tmp);
+
+ if (dst == loc || dst == expected || dst == desired)
+ unget_temp_gpr(_jit);
+}
+
+static void
+breakpoint(jit_state_t *_jit)
+{
+ BRK(_jit);
+}
diff --git a/deps/lightening/lightening/aarch64-fpu.c b/deps/lightening/lightening/aarch64-fpu.c
new file mode 100644
index 0000000..6297342
--- /dev/null
+++ b/deps/lightening/lightening/aarch64-fpu.c
@@ -0,0 +1,810 @@
+/*
+ * Copyright (C) 2013-2019 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+static void
+osvvv(jit_state_t *_jit, int32_t Op, int32_t Sz, int32_t Rd, int32_t Rn,
+ int32_t Rm)
+{
+ uint32_t inst = Op;
+ inst = write_size_bitfield(inst, Sz);
+ inst = write_Rd_bitfield(inst, Rd);
+ inst = write_Rn_bitfield(inst, Rn);
+ inst = write_Rm_bitfield(inst, Rm);
+ emit_u32_with_pool(_jit, inst);
+}
+
+static void
+osvv_(jit_state_t *_jit, int32_t Op, int32_t Sz, int32_t Rd, int32_t Rn)
+{
+ uint32_t inst = Op;
+ inst = write_size_bitfield(inst, Sz);
+ inst = write_Rd_bitfield(inst, Rd);
+ inst = write_Rn_bitfield(inst, Rn);
+ emit_u32_with_pool(_jit, inst);
+}
+
+static void
+os_vv(jit_state_t *_jit, int32_t Op, int32_t Sz, int32_t Rn, int32_t Rm)
+{
+ uint32_t inst = Op;
+ inst = write_size_bitfield(inst, Sz);
+ inst = write_Rn_bitfield(inst, Rn);
+ inst = write_Rm_bitfield(inst, Rm);
+ emit_u32_with_pool(_jit, inst);
+}
+
+#define A64_SCVTF 0x1e220000
+#define A64_FMOVWV 0x1e260000
+#define A64_FMOVVW 0x1e270000
+#define A64_FMOVXV 0x9e260000
+#define A64_FMOVVX 0x9e270000
+#define A64_FCVTZS 0x1e380000
+#define A64_FCMPE 0x1e202010
+#define A64_FMOV 0x1e204000
+#define A64_FABS 0x1e20c000
+#define A64_FNEG 0x1e214000
+#define A64_FSQRT 0x1e21c000
+#define A64_FCVTS 0x1e224000
+#define A64_FCVTD 0x1e22c000
+#define A64_FMUL 0x1e200800
+#define A64_FDIV 0x1e201800
+#define A64_FADD 0x1e202800
+#define A64_FSUB 0x1e203800
+
+static void
+FCMPES(jit_state_t *_jit, int32_t Rn, int32_t Rm)
+{
+ os_vv(_jit, A64_FCMPE, 0, Rn, Rm);
+}
+
+static void
+FCMPED(jit_state_t *_jit, int32_t Rn, int32_t Rm)
+{
+ os_vv(_jit, A64_FCMPE, 1, Rn, Rm);
+}
+
+static void
+FMOVS(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_FMOV, 0, Rd, Rn);
+}
+
+static void
+FMOVD(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_FMOV, 1, Rd, Rn);
+}
+
+static void
+FMOVWS(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_FMOVWV, 0, Rd, Rn);
+}
+
+static void
+FMOVSW(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_FMOVVW, 0, Rd, Rn);
+}
+
+static void
+FMOVXD(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_FMOVXV, 1, Rd, Rn);
+}
+
+static void
+FMOVDX(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_FMOVVX, 1, Rd, Rn);
+}
+
+static void
+FCVT_SD(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_FCVTS, 1, Rd, Rn);
+}
+
+static void
+FCVT_DS(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_FCVTD, 0, Rd, Rn);
+}
+
+static void
+SCVTFS(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_SCVTF|XS, 0, Rd, Rn);
+}
+
+static void
+SCVTFD(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_SCVTF|XS, 1, Rd, Rn);
+}
+
+static void
+FCVTSZ_WS(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_FCVTZS, 0, Rd, Rn);
+}
+
+static void
+FCVTSZ_WD(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_FCVTZS, 1, Rd, Rn);
+}
+
+static void
+FCVTSZ_XS(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_FCVTZS|XS, 0, Rd, Rn);
+}
+
+static void
+FCVTSZ_XD(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_FCVTZS|XS, 1, Rd, Rn);
+}
+
+static void
+FABSS(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_FABS, 0, Rd, Rn);
+}
+
+static void
+FABSD(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_FABS, 1, Rd, Rn);
+}
+
+static void
+FNEGS(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_FNEG, 0, Rd, Rn);
+}
+
+static void
+FNEGD(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_FNEG, 1, Rd, Rn);
+}
+
+static void
+FSQRTS(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_FSQRT, 0, Rd, Rn);
+}
+
+static void
+FSQRTD(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+ osvv_(_jit, A64_FSQRT, 1, Rd, Rn);
+}
+
+static void
+FADDS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ osvvv(_jit, A64_FADD, 0, Rd, Rn, Rm);
+}
+
+static void
+FADDD(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ osvvv(_jit, A64_FADD, 1, Rd, Rn, Rm);
+}
+
+static void
+FSUBS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ osvvv(_jit, A64_FSUB, 0, Rd, Rn, Rm);
+}
+
+static void
+FSUBD(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ osvvv(_jit, A64_FSUB, 1, Rd, Rn, Rm);
+}
+
+static void
+FMULS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ osvvv(_jit, A64_FMUL, 0, Rd, Rn, Rm);
+}
+
+static void
+FMULD(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ osvvv(_jit, A64_FMUL, 1, Rd, Rn, Rm);
+}
+
+static void
+FDIVS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ osvvv(_jit, A64_FDIV, 0, Rd, Rn, Rm);
+}
+
+static void
+FDIVD(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+ osvvv(_jit, A64_FDIV, 1, Rd, Rn, Rm);
+}
+
+static void
+truncr_f_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ FCVTSZ_XS(_jit, r0, r1);
+}
+
+static void
+truncr_d_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ FCVTSZ_XD(_jit, r0, r1);
+}
+
+static void
+addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ FADDS(_jit, r0, r1, r2);
+}
+
+static void
+subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ FSUBS(_jit, r0, r1, r2);
+}
+
+static void
+mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ FMULS(_jit, r0, r1, r2);
+}
+
+static void
+divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ FDIVS(_jit, r0, r1, r2);
+}
+
+static void
+absr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ FABSS(_jit, r0, r1);
+}
+
+static void
+negr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ FNEGS(_jit, r0, r1);
+}
+
+static void
+sqrtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ FSQRTS(_jit, r0, r1);
+}
+
+static void
+extr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ SCVTFS(_jit, r0, r1);
+}
+
+static void
+extr_d_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ FCVT_SD(_jit, r0, r1);
+}
+
+static jit_reloc_t
+fbccr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1)
+{
+ FCMPES(_jit, r0, r1);
+ return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return fbccr(_jit, BCC_MI,r0, r1);
+}
+
+static jit_reloc_t
+bler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return fbccr(_jit, BCC_LS,r0, r1);
+}
+
+static jit_reloc_t
+beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return fbccr(_jit, BCC_EQ,r0, r1);
+}
+
+static jit_reloc_t
+bger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return fbccr(_jit, BCC_GE,r0, r1);
+}
+
+static jit_reloc_t
+bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return fbccr(_jit, BCC_GT,r0, r1);
+}
+
+static jit_reloc_t
+bner_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return fbccr(_jit, BCC_NE,r0, r1);
+}
+
+static jit_reloc_t
+bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return fbccr(_jit, BCC_LT,r0, r1);
+}
+
+static jit_reloc_t
+bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return fbccr(_jit, BCC_LE,r0, r1);
+}
+
+static jit_reloc_t
+bunger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return fbccr(_jit, BCC_PL,r0, r1);
+}
+
+static jit_reloc_t
+bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return fbccr(_jit, BCC_HI,r0, r1);
+}
+
+static jit_reloc_t
+bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return fbccr(_jit, BCC_VC,r0, r1);
+}
+
+static jit_reloc_t
+bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return fbccr(_jit, BCC_VS, r0, r1);
+}
+
+static void
+addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ FADDD(_jit, r0, r1, r2);
+}
+
+static void
+subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ FSUBD(_jit, r0, r1, r2);
+}
+
+static void
+mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ FMULD(_jit, r0, r1, r2);
+}
+
+static void
+divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ FDIVD(_jit, r0, r1, r2);
+}
+
+static void
+absr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ FABSD(_jit, r0, r1);
+}
+
+static void
+negr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ FNEGD(_jit, r0, r1);
+}
+
+static void
+sqrtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ FSQRTD(_jit, r0, r1);
+}
+
+static void
+extr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ SCVTFD(_jit, r0, r1);
+}
+
+static void
+extr_f_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ FCVT_DS(_jit, r0, r1);
+}
+
+static jit_reloc_t
+dbccr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1)
+{
+ FCMPED(_jit, r0, r1);
+ return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return dbccr(_jit, BCC_MI, r0, r1);
+}
+
+static jit_reloc_t
+bler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return dbccr(_jit, BCC_LS, r0, r1);
+}
+
+static jit_reloc_t
+beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return dbccr(_jit, BCC_EQ, r0, r1);
+}
+
+static jit_reloc_t
+bger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return dbccr(_jit, BCC_GE, r0, r1);
+}
+
+static jit_reloc_t
+bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return dbccr(_jit, BCC_GT, r0, r1);
+}
+
+static jit_reloc_t
+bner_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return dbccr(_jit, BCC_NE, r0, r1);
+}
+
+static jit_reloc_t
+bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return dbccr(_jit, BCC_LT, r0, r1);
+}
+
+static jit_reloc_t
+bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return dbccr(_jit, BCC_LE, r0, r1);
+}
+
+static jit_reloc_t
+bunger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return dbccr(_jit, BCC_PL, r0, r1);
+}
+
+static jit_reloc_t
+bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return dbccr(_jit, BCC_HI, r0, r1);
+}
+
+static jit_reloc_t
+bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return dbccr(_jit, BCC_VC, r0, r1);
+}
+
+static jit_reloc_t
+bunordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return dbccr(_jit, BCC_VS, r0, r1);
+}
+
+
+static void
+truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ FCVTSZ_WS(_jit, r0, r1);
+ extr_i(_jit, r0, r0);
+}
+
+static void
+truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ FCVTSZ_WD(_jit, r0, r1);
+ extr_i(_jit, r0, r0);
+}
+
+static void
+ldr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ ldr_i(_jit, jit_gpr_regno(reg), r1);
+ FMOVSW(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ ldi_i(_jit, jit_gpr_regno(reg), i0);
+ FMOVSW(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ ldxr_i(_jit, jit_gpr_regno(reg), r1, r2);
+ FMOVSW(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ ldxi_i(_jit, jit_gpr_regno(reg), r1, i0);
+ FMOVSW(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+str_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ FMOVWS(_jit, jit_gpr_regno(reg), r1);
+ str_i(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ FMOVWS(_jit, jit_gpr_regno(reg), r0);
+ sti_i(_jit, i0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ FMOVWS(_jit, jit_gpr_regno(reg), r2);
+ stxr_i(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ FMOVWS(_jit, jit_gpr_regno(reg), r1);
+ stxi_i(_jit, i0, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+movr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (r0 != r1)
+ FMOVS(_jit, r0, r1);
+}
+
+static void
+movi_f(jit_state_t *_jit, int32_t r0, float i0)
+{
+ union {
+ int32_t i;
+ float f;
+ } u;
+ u.f = i0;
+ if (u.i == 0)
+ FMOVSW(_jit, r0, WZR_REGNO);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ /* prevent generating unused top 32 bits */
+ movi(_jit, jit_gpr_regno(reg), ((jit_word_t)u.i) & 0xffffffff);
+ FMOVSW(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static jit_reloc_t
+buneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ FCMPES(_jit, r0, r1);
+ jit_reloc_t unordered = B_C(_jit, BCC_VS); /* unordered satisfies condition */
+ jit_reloc_t neq = B_C(_jit, BCC_NE); /* not equal (or unordered) does not satisfy */
+ jit_patch_here(_jit, unordered);
+ jit_reloc_t ret = B(_jit);
+ jit_patch_here(_jit, neq);
+ return ret;
+}
+
+static jit_reloc_t
+bltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ FCMPES(_jit, r0, r1);
+ jit_reloc_t unordered = B_C(_jit, BCC_VS); /* jump over if unordered */
+ jit_reloc_t eq = B_C(_jit, BCC_EQ); /* jump over if equal */
+ jit_reloc_t ret = B(_jit);
+ jit_patch_here(_jit, unordered);
+ jit_patch_here(_jit, eq);
+ return ret;
+}
+
+static void
+ldr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ ldr_l(_jit, jit_gpr_regno(reg), r1);
+ FMOVDX(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ ldi_l(_jit, jit_gpr_regno(reg), i0);
+ FMOVDX(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ ldxr_l(_jit, jit_gpr_regno(reg), r1, r2);
+ FMOVDX(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ ldxi_l(_jit, jit_gpr_regno(reg), r1, i0);
+ FMOVDX(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+str_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ FMOVXD(_jit, jit_gpr_regno(reg), r1);
+ str_l(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ FMOVXD(_jit, jit_gpr_regno(reg), r0);
+ sti_l(_jit, i0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ FMOVXD(_jit, jit_gpr_regno(reg), r2);
+ stxr_l(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ FMOVXD(_jit, jit_gpr_regno(reg), r1);
+ stxi_l(_jit, i0, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+movr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (r0 != r1)
+ FMOVD(_jit, r0, r1);
+}
+
+static void
+movi_d(jit_state_t *_jit, int32_t r0, double i0)
+{
+ union {
+ int64_t l;
+ double d;
+ } u;
+ u.d = i0;
+ if (u.l == 0)
+ FMOVDX(_jit, r0, XZR_REGNO);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), u.l);
+ FMOVDX(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static jit_reloc_t
+buneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ FCMPED(_jit, r0, r1);
+ jit_reloc_t unordered = B_C(_jit, BCC_VS); /* unordered satisfies condition */
+ jit_reloc_t neq = B_C(_jit, BCC_NE); /* not equal (or unordered) does not satisfy */
+ jit_patch_here(_jit, unordered);
+ jit_reloc_t ret = B(_jit);
+ jit_patch_here(_jit, neq);
+ return ret;
+}
+
+static jit_reloc_t
+bltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ FCMPED(_jit, r0, r1);
+ jit_reloc_t unordered = B_C(_jit, BCC_VS); /* jump over if unordered */
+ jit_reloc_t eq = B_C(_jit, BCC_EQ); /* jump over if equal */
+ jit_reloc_t ret = B(_jit);
+ jit_patch_here(_jit, unordered);
+ jit_patch_here(_jit, eq);
+ return ret;
+}
+
+static void
+retr_d(jit_state_t *_jit, int32_t r)
+{
+ movr_d(_jit, jit_fpr_regno(_D0), r);
+ ret(_jit);
+}
+
+static void
+retr_f(jit_state_t *_jit, int32_t r)
+{
+ movr_f(_jit, jit_fpr_regno(_D0), r);
+ ret(_jit);
+}
+
+static void
+retval_f(jit_state_t *_jit, int32_t r0)
+{
+ movr_f(_jit, r0, jit_fpr_regno(_D0));
+}
+
+static void
+retval_d(jit_state_t *_jit, int32_t r0)
+{
+ movr_d(_jit, r0, jit_fpr_regno(_D0));
+}
diff --git a/deps/lightening/lightening/aarch64.c b/deps/lightening/lightening/aarch64.c
new file mode 100644
index 0000000..1fe523a
--- /dev/null
+++ b/deps/lightening/lightening/aarch64.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2013-2020 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+/* libgcc */
+extern void __clear_cache(void *, void *);
+
+
+static inline int32_t
+read_signed_bitfield(uint32_t word, uint8_t width, uint8_t shift)
+{
+ return ((int32_t)word) << (32 - width - shift) >> (32 - width);
+}
+
+static inline uint32_t
+read_unsigned_bitfield(uint32_t word, uint8_t width, uint8_t shift)
+{
+ return word << (32 - width - shift) >> (32 - width);
+}
+
+static inline int
+in_signed_range(ptrdiff_t diff, uint8_t bits)
+{
+ return (-1 << (bits - 1)) <= diff && diff < (1 << (bits - 1));
+}
+
+static inline int
+in_unsigned_range(uint32_t val, uint8_t bits)
+{
+ ASSERT(bits < __WORDSIZE);
+ return val < (1 << bits);
+}
+
+static inline uint32_t
+write_unsigned_bitfield(uint32_t word, uint32_t val, uint8_t width, uint8_t shift)
+{
+ ASSERT(read_unsigned_bitfield(word, width, shift) == 0);
+ ASSERT(in_unsigned_range(val, width));
+ return word | (val << shift);
+}
+
+static inline int32_t
+write_signed_bitfield(uint32_t word, ptrdiff_t val, uint8_t width, uint8_t shift)
+{
+ ASSERT(read_signed_bitfield(word, width, shift) == 0);
+ ASSERT(in_signed_range(val, width));
+ return word | ((val & ((1 << width) - 1)) << shift);
+}
+
+#define DEFINE_ENCODER(name, width, shift, kind, val_t) \
+ static const uint8_t name##_width = width; \
+ static const uint8_t name##_shift = shift; \
+ static uint32_t \
+ write_##name##_bitfield(uint32_t word, val_t val) \
+ { \
+ return write_##kind##_bitfield(word, val, name##_width, name##_shift); \
+ }
+
+DEFINE_ENCODER(Rd, 5, 0, unsigned, uint32_t)
+DEFINE_ENCODER(Rm, 5, 16, unsigned, uint32_t)
+DEFINE_ENCODER(Rn, 5, 5, unsigned, uint32_t)
+DEFINE_ENCODER(cond2, 4, 0, unsigned, uint32_t)
+DEFINE_ENCODER(simm9, 9, 12, signed, ptrdiff_t)
+DEFINE_ENCODER(imm12, 12, 10, unsigned, uint32_t)
+DEFINE_ENCODER(imm16, 16, 5, unsigned, uint32_t)
+DEFINE_ENCODER(simm19, 19, 5, signed, ptrdiff_t)
+DEFINE_ENCODER(simm26, 26, 0, signed, ptrdiff_t)
+DEFINE_ENCODER(immr, 6, 16, unsigned, uint32_t)
+DEFINE_ENCODER(imms, 6, 10, unsigned, uint32_t)
+DEFINE_ENCODER(size, 2, 22, unsigned, uint32_t)
+
+#define DEFINE_PATCHABLE_INSTRUCTION(name, kind, RELOC, rsh) \
+ static inline int32_t \
+ read_##name##_offset(uint32_t *loc) \
+ { \
+ return read_signed_bitfield(*loc, kind##_width, kind##_shift); \
+ } \
+ static inline int \
+ offset_in_##name##_range(ptrdiff_t diff, int flags); \
+ static inline int \
+ offset_in_##name##_range(ptrdiff_t diff, int flags) \
+ { \
+ return in_signed_range(diff, kind##_width); \
+ } \
+ static inline void \
+ patch_##name##_offset(uint32_t *loc, ptrdiff_t diff) \
+ { \
+ *loc = write_##kind##_bitfield(*loc, diff); \
+ } \
+ static inline jit_reloc_t \
+ emit_##name(jit_state_t *_jit, uint32_t inst) \
+ { \
+ while (1) { \
+ jit_reloc_t ret = jit_reloc (_jit, JIT_RELOC_##RELOC, 0, \
+ _jit->pc.uc, _jit->pc.uc, rsh); \
+ if (add_pending_literal(_jit, ret, kind##_width - 1)) { \
+ emit_u32(_jit, inst); \
+ return ret; \
+ } \
+ } \
+ }
+
+#define DEFINE_PATCHABLE_INSTRUCTIONS(name, kind, RELOC, rsh) \
+ DEFINE_PATCHABLE_INSTRUCTION(name, kind, RELOC, rsh); \
+ DEFINE_PATCHABLE_INSTRUCTION(veneer_##name, kind, RELOC, rsh);
+
+DEFINE_PATCHABLE_INSTRUCTIONS(jmp, simm26, JMP_WITH_VENEER, 2);
+DEFINE_PATCHABLE_INSTRUCTIONS(jcc, simm19, JCC_WITH_VENEER, 2);
+DEFINE_PATCHABLE_INSTRUCTION(load_from_pool, simm19, LOAD_FROM_POOL, 2);
+
+struct veneer
+{
+ uint32_t ldr;
+ uint32_t br;
+ uint64_t addr;
+};
+
+static void
+patch_veneer(uint32_t *loc, jit_pointer_t addr)
+{
+ struct veneer *v = (struct veneer*) loc;
+ v->addr = (uint64_t) addr;
+}
+
+#include "aarch64-cpu.c"
+#include "aarch64-fpu.c"
+
+static const jit_gpr_t abi_gpr_args[] = {
+ _X0, _X1, _X2, _X3, _X4, _X5, _X6, _X7
+};
+
+static const jit_fpr_t abi_fpr_args[] = {
+ _D0, _D1, _D2, _D3, _D4, _D5, _D6, _D7
+};
+
+static const int abi_gpr_arg_count = sizeof(abi_gpr_args) / sizeof(abi_gpr_args[0]);
+static const int abi_fpr_arg_count = sizeof(abi_fpr_args) / sizeof(abi_fpr_args[0]);
+
+struct abi_arg_iterator
+{
+ const jit_operand_t *args;
+ size_t argc;
+
+ size_t arg_idx;
+ size_t gpr_idx;
+ size_t fpr_idx;
+ size_t stack_size;
+ size_t stack_padding;
+};
+
+static size_t page_size;
+
+jit_bool_t
+jit_get_cpu(void)
+{
+ page_size = sysconf(_SC_PAGE_SIZE);
+ return 1;
+}
+
+jit_bool_t
+jit_init(jit_state_t *_jit)
+{
+ return 1;
+}
+
+static size_t
+jit_initial_frame_size (void)
+{
+ return 0;
+}
+
+static void
+reset_abi_arg_iterator(struct abi_arg_iterator *iter, size_t argc,
+ const jit_operand_t *args)
+{
+ memset(iter, 0, sizeof *iter);
+ iter->argc = argc;
+ iter->args = args;
+}
+
+static void
+next_abi_arg(struct abi_arg_iterator *iter, jit_operand_t *arg)
+{
+ ASSERT(iter->arg_idx < iter->argc);
+ enum jit_operand_abi abi = iter->args[iter->arg_idx].abi;
+ if (is_gpr_arg(abi) && iter->gpr_idx < abi_gpr_arg_count) {
+ *arg = jit_operand_gpr (abi, abi_gpr_args[iter->gpr_idx++]);
+ } else if (is_fpr_arg(abi) && iter->fpr_idx < abi_fpr_arg_count) {
+ *arg = jit_operand_fpr (abi, abi_fpr_args[iter->fpr_idx++]);
+ } else {
+ *arg = jit_operand_mem (abi, JIT_SP, iter->stack_size);
+ iter->stack_size += 8;
+ }
+ iter->arg_idx++;
+}
+
+static void
+jit_flush(void *fptr, void *tptr)
+{
+ jit_word_t f = (jit_word_t)fptr & -page_size;
+ jit_word_t t = (((jit_word_t)tptr) + page_size - 1) & -page_size;
+ __clear_cache((void *)f, (void *)t);
+}
+
+static inline size_t
+jit_stack_alignment(void)
+{
+ return 16;
+}
+
+static void
+jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc, jit_pointer_t addr)
+{
+}
+
+static void*
+bless_function_pointer(void *ptr)
+{
+ return ptr;
+}
diff --git a/deps/lightening/lightening/aarch64.h b/deps/lightening/lightening/aarch64.h
new file mode 100644
index 0000000..219c615
--- /dev/null
+++ b/deps/lightening/lightening/aarch64.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2013-2017, 2019 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_aarch64_h
+#define _jit_aarch64_h
+
+
+#define JIT_NEEDS_LITERAL_POOL 1
+
+#define _X0 JIT_GPR(0)
+#define _X1 JIT_GPR(1)
+#define _X2 JIT_GPR(2)
+#define _X3 JIT_GPR(3)
+#define _X4 JIT_GPR(4)
+#define _X5 JIT_GPR(5)
+#define _X6 JIT_GPR(6)
+#define _X7 JIT_GPR(7)
+#define _X8 JIT_GPR(8)
+#define _X9 JIT_GPR(9)
+#define _X10 JIT_GPR(10)
+#define _X11 JIT_GPR(11)
+#define _X12 JIT_GPR(12)
+#define _X13 JIT_GPR(13)
+#define _X14 JIT_GPR(14)
+#define _X15 JIT_GPR(15)
+#define _X16 JIT_GPR(16)
+#define _X17 JIT_GPR(17)
+#define _X18 JIT_GPR(18)
+#define _X19 JIT_GPR(19)
+#define _X20 JIT_GPR(20)
+#define _X21 JIT_GPR(21)
+#define _X22 JIT_GPR(22)
+#define _X23 JIT_GPR(23)
+#define _X24 JIT_GPR(24)
+#define _X25 JIT_GPR(25)
+#define _X26 JIT_GPR(26)
+#define _X27 JIT_GPR(27)
+#define _X28 JIT_GPR(28)
+#define _X29 JIT_GPR(29)
+#define _X30 JIT_GPR(30)
+#define _X31 JIT_GPR(31)
+
+#define _D0 JIT_FPR(0)
+#define _D1 JIT_FPR(1)
+#define _D2 JIT_FPR(2)
+#define _D3 JIT_FPR(3)
+#define _D4 JIT_FPR(4)
+#define _D5 JIT_FPR(5)
+#define _D6 JIT_FPR(6)
+#define _D7 JIT_FPR(7)
+#define _D8 JIT_FPR(8)
+#define _D9 JIT_FPR(9)
+#define _D10 JIT_FPR(10)
+#define _D11 JIT_FPR(11)
+#define _D12 JIT_FPR(12)
+#define _D13 JIT_FPR(13)
+#define _D14 JIT_FPR(14)
+#define _D15 JIT_FPR(15)
+#define _D16 JIT_FPR(16)
+#define _D17 JIT_FPR(17)
+#define _D18 JIT_FPR(18)
+#define _D19 JIT_FPR(19)
+#define _D20 JIT_FPR(20)
+#define _D21 JIT_FPR(21)
+#define _D22 JIT_FPR(22)
+#define _D23 JIT_FPR(23)
+#define _D24 JIT_FPR(24)
+#define _D25 JIT_FPR(25)
+#define _D26 JIT_FPR(26)
+#define _D27 JIT_FPR(27)
+#define _D28 JIT_FPR(28)
+#define _D29 JIT_FPR(29)
+#define _D30 JIT_FPR(30)
+#define _D31 JIT_FPR(31)
+
+#define JIT_R0 _X0
+#define JIT_R1 _X1
+#define JIT_R2 _X2
+#define JIT_R3 _X3
+#define JIT_R4 _X4
+#define JIT_R5 _X5
+#define JIT_R6 _X6
+#define JIT_R7 _X7
+#define JIT_R8 _X8
+#define JIT_R9 _X9
+#define JIT_R10 _X10
+#define JIT_R11 _X11
+#define JIT_R12 _X12
+#define JIT_R13 _X13
+#define JIT_R14 _X14
+#define JIT_R15 _X15
+#define JIT_TMP0 _X16
+#define JIT_TMP1 _X17
+// x18 is reserved by the platform.
+#define JIT_V0 _X19
+#define JIT_V1 _X20
+#define JIT_V2 _X21
+#define JIT_V3 _X22
+#define JIT_V4 _X23
+#define JIT_V5 _X24
+#define JIT_V6 _X25
+#define JIT_V7 _X26
+#define JIT_V8 _X27
+#define JIT_V9 _X28
+
+// x29 is frame pointer; x30 is link register.
+#define JIT_PLATFORM_CALLEE_SAVE_GPRS _X29, _X30
+
+// x31 is stack pointer.
+#define JIT_LR _X30
+#define JIT_SP _X31
+
+#define JIT_F0 _D0
+#define JIT_F1 _D1
+#define JIT_F2 _D2
+#define JIT_F3 _D3
+#define JIT_F4 _D4
+#define JIT_F5 _D5
+#define JIT_F6 _D6
+#define JIT_F7 _D7
+#define JIT_F8 _D16
+#define JIT_F9 _D17
+#define JIT_F10 _D18
+#define JIT_F11 _D19
+#define JIT_F12 _D20
+#define JIT_F13 _D21
+#define JIT_F14 _D22
+#define JIT_F15 _D23
+#define JIT_F16 _D24
+#define JIT_F17 _D25
+#define JIT_F18 _D26
+#define JIT_F19 _D27
+#define JIT_F20 _D28
+#define JIT_F21 _D29
+#define JIT_F22 _D30
+#define JIT_FTMP _D31
+
+#define JIT_VF0 _D8
+#define JIT_VF1 _D9
+#define JIT_VF2 _D10
+#define JIT_VF3 _D11
+#define JIT_VF4 _D12
+#define JIT_VF5 _D13
+#define JIT_VF6 _D14
+#define JIT_VF7 _D15
+
+#define _FP _X29
+#define _LR _X30
+#define _SP _X31
+
+#define JIT_PLATFORM_CALLEE_SAVE_FPRS
+
+
+#endif /* _jit_aarch64_h */
diff --git a/deps/lightening/lightening/arm-cpu.c b/deps/lightening/lightening/arm-cpu.c
new file mode 100644
index 0000000..6618400
--- /dev/null
+++ b/deps/lightening/lightening/arm-cpu.c
@@ -0,0 +1,3116 @@
+/*
+ * Copyright (C) 2012-2017,2019-2020 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+#define _s20P(d) ((d) >= -(int)0x80000 && d <= 0x7ffff)
+#define _s24P(d) ((d) >= -(int)0x800000 && d <= 0x7fffff)
+#define _u3(v) ((v) & 0x7)
+#define _u4(v) ((v) & 0xf)
+#define _u5(v) ((v) & 0x1f)
+#define _u8(v) ((v) & 0xff)
+#define _u12(v) ((v) & 0xfff)
+#define _u13(v) ((v) & 0x1fff)
+#define _u16(v) ((v) & 0xffff)
+#define _u24(v) ((v) & 0xffffff)
+
+#define ARM_CC_EQ 0x00000000 /* Z=1 */
+#define ARM_CC_NE 0x10000000 /* Z=0 */
+#define ARM_CC_HS 0x20000000 /* C=1 */
+#define ARM_CC_LO 0x30000000 /* C=0 */
+#define ARM_CC_MI 0x40000000 /* N=1 */
+#define ARM_CC_VS 0x60000000 /* V=1 */
+#define ARM_CC_VC 0x70000000 /* V=0 */
+#define ARM_CC_HI 0x80000000 /* C=1 && Z=0 */
+#define ARM_CC_LS 0x90000000 /* C=0 || Z=1 */
+#define ARM_CC_GE 0xa0000000 /* N=V */
+#define ARM_CC_LT 0xb0000000 /* N!=V */
+#define ARM_CC_GT 0xc0000000 /* Z=0 && N=V */
+#define ARM_CC_LE 0xd0000000 /* Z=1 || N!=V */
+#define ARM_CC_AL 0xe0000000 /* always */
+#define ARM_CC_NV 0xf0000000 /* reserved */
+#define THUMB_MOV 0x4600
+#define THUMB_MOVI 0x2000
+#define THUMB2_MOVI 0xf0400000
+#define THUMB2_MOVWI 0xf2400000
+#define THUMB2_MOVTI 0xf2c00000
+#define THUMB_MVN 0x43c0
+#define THUMB2_MVN 0xea600000
+#define THUMB2_MVNI 0xf0600000
+#define ARM_S 0x00100000 /* set flags */
+#define THUMB_ADD 0x1800
+#define THUMB_ADDX 0x4400
+#define THUMB2_ADD 0xeb000000
+#define THUMB_ADDI3 0x1c00
+#define THUMB_ADDI8 0x3000
+#define THUMB2_ADDI 0xf1000000
+#define THUMB2_ADDWI 0xf2000000
+#define THUMB_ADC 0x4140
+#define THUMB2_ADC 0xeb400000
+#define THUMB2_ADCI 0xf1400000
+#define THUMB_SUB 0x1a00
+#define THUMB2_SUB 0xeba00000
+#define THUMB_SUBI3 0x1e00
+#define THUMB_SUBI8 0x3800
+#define THUMB2_SUBI 0xf1a00000
+#define THUMB2_SUBWI 0xf2a00000
+#define THUMB_SBC 0x4180
+#define THUMB2_SBC 0xeb600000
+#define THUMB2_SBCI 0xf1600000
+#define THUMB_RSBI 0x4240
+#define THUMB2_RSBI 0xf1c00000
+#define THUMB_MUL 0x4340
+#define THUMB2_MUL 0xfb00f000
+#define THUMB2_UMULL 0xfba00000
+#define THUMB2_SMULL 0xfb800000
+#define THUMB_MLS 0xfb000010
+#define THUMB2_SDIV 0xfb90f0f0
+#define THUMB2_UDIV 0xfbb0f0f0
+#define THUMB_AND 0x4000
+#define THUMB2_AND 0xea000000
+#define THUMB2_ANDI 0xf0000000
+#define THUMB2_BIC 0xea200000
+#define THUMB2_BICI 0xf0200000
+#define THUMB_ORR 0x4300
+#define THUMB2_ORR 0xea400000
+#define THUMB2_ORRI 0xf0400000
+#define THUMB_EOR 0x4040
+#define THUMB2_EOR 0xea800000
+#define THUMB2_EORI 0xf0800000
+#define THUMB_REV 0xba00
+#define THUMB2_REV 0xfa90f080
+#define THUMB_SXTB 0xb240
+#define THUMB2_SXTB 0xfa40f080
+#define THUMB_UXTB 0xb2c0
+#define THUMB2_UXTB 0xfa50f080
+#define THUMB_SXTH 0xb200
+#define THUMB2_SXTH 0xfa00f080
+#define THUMB_UXTH 0xb280
+#define THUMB2_UXTH 0xfa10f080
+#define ARM_LSL 0x00000000
+#define THUMB_LSL 0x4080
+#define THUMB2_LSL 0xfa00f000
+#define THUMB_LSLI 0x0000
+#define THUMB2_LSLI 0xea4f0000
+#define ARM_LSR 0x00000020
+#define THUMB_LSR 0x40c0
+#define THUMB2_LSR 0xfa20f000
+#define THUMB_LSRI 0x0800
+#define THUMB2_LSRI 0xea4f0010
+#define ARM_ASR 0x00000040
+#define THUMB_ASR 0x4100
+#define THUMB2_ASR 0xfa40f000
+#define THUMB_ASRI 0x1000
+#define THUMB2_ASRI 0xea4f0020
+#define THUMB_CMP 0x4280
+#define THUMB_CMPX 0x4500
+#define THUMB2_CMP 0xebb00000
+#define THUMB_CMPI 0x2800
+#define THUMB2_CMPI 0xf1b00000
+#define THUMB2_CMN 0xeb100000
+#define THUMB2_CMNI 0xf1100000
+#define THUMB_TST 0x4200
+#define THUMB2_TST 0xea100000
+#define THUMB2_TSTI 0xf0100000
+#define THUMB_BLX 0x4780
+#define THUMB_BX 0x4700
+#define THUMB_CC_B 0xd000
+#define THUMB_B 0xe000
+#define THUMB2_CC_B 0xf0008000
+#define THUMB2_B 0xf0009000
+#define THUMB2_BLI 0xf000d000
+#define THUMB2_BLXI 0xf000c000
+#define THUMB2_P 0x00000400
+#define THUMB2_U 0x00000200
+#define THUMB_LDRSB 0x5600
+#define THUMB2_LDRSB 0xf9100000
+#define THUMB2_LDRSBI 0xf9100c00
+#define THUMB2_LDRSBWI 0xf9900000
+#define THUMB_LDRB 0x5c00
+#define THUMB2_LDRB 0xf8100000
+#define THUMB_LDRBI 0x7800
+#define THUMB2_LDRBI 0xf8100c00
+#define THUMB2_LDRBWI 0xf8900000
+#define THUMB_LDRSH 0x5e00
+#define THUMB2_LDRSH 0xf9300000
+#define THUMB2_LDRSHI 0xf9300c00
+#define THUMB2_LDRSHWI 0xf9b00000
+#define THUMB_LDRH 0x5a00
+#define THUMB2_LDRH 0xf8300000
+#define THUMB_LDRHI 0x8800
+#define THUMB2_LDRHI 0xf8300c00
+#define THUMB2_LDRHWI 0xf8b00000
+#define THUMB_LDR 0x5800
+#define THUMB2_LDR 0xf8500000
+#define THUMB2_LDRP 0xf85f0000
+#define THUMB_LDRI 0x6800
+#define THUMB_LDRISP 0x9800
+#define THUMB2_LDRI 0xf8500c00
+#define THUMB2_LDRWI 0xf8d00000
+#define THUMB_STRB 0x5400
+#define THUMB2_STRB 0xf8000000
+#define THUMB_STRBI 0x7000
+#define THUMB2_STRBI 0xf8000c00
+#define THUMB2_STRBWI 0xf8800000
+#define THUMB_STRH 0x5200
+#define THUMB2_STRH 0xf8200000
+#define THUMB_STRHI 0x8000
+#define THUMB2_STRHI 0xf8200c00
+#define THUMB2_STRHWI 0xf8a00000
+#define THUMB_STR 0x5000
+#define THUMB2_STR 0xf8400000
+#define THUMB_STRI 0x6000
+#define THUMB2_STRWI 0xf8c00000
+#define THUMB_STRISP 0x9000
+#define THUMB2_STRI 0xf8400c00
+#define THUMB2_LDM_W 0x00200000
+#define THUMB2_PUSH 0xe92d0000
+#define THUMB_DMB 0xf3bf8f50
+#define THUMB_LDREX 0xe8500f00
+#define THUMB_STREX 0xe8400000
+#define THUMB_BRK 0xbe00
+
+#define _NOREG (jit_gpr_regno(_PC))
+
+#define JIT_RELOC_B JIT_RELOC_FLAG_0
+
+static void
+emit_wide_thumb(jit_state_t *_jit, uint32_t inst)
+{
+ emit_u16(_jit, inst >> 16);
+ emit_u16_with_pool(_jit, inst & 0xffff);
+}
+
+static uint32_t
+rotate_left(uint32_t v, uint32_t n) {
+ if (n == 0) {
+ return v;
+ }
+ ASSERT(n < 32);
+ return (v << n | v >> (32 - n));
+}
+
+static int
+encode_arm_immediate(unsigned int v)
+{
+ unsigned int a, i;
+
+ for (i = 0; i < 32; i += 2)
+ if ((a = rotate_left(v, i)) <= 0xff)
+ return (a | (i << 7));
+
+ return (-1);
+}
+
+static int
+encode_thumb_immediate(unsigned int v)
+{
+ int i;
+ unsigned int m;
+ unsigned int n;
+ /* 00000000 00000000 00000000 abcdefgh */
+ if ((v & 0xff) == v)
+ return (v);
+ /* 00000000 abcdefgh 00000000 abcdefgh */
+ if ((v & 0xff00ff) == v && ((v & 0xff0000) >> 16) == (v & 0xff))
+ return ((v & 0xff) | (1 << 12));
+ /* abcdefgh 00000000 abcdefgh 00000000 */
+ if (((v & 0xffff0000) >> 16) == (v & 0xffff) && (v & 0xff) == 0)
+ return (((v & 0x0000ff00) >> 8) | (2 << 12));
+ /* abcdefgh abcdefgh abcdefgh abcdefgh */
+ if ( (v & 0xff) == ((v & 0xff00) >> 8) &&
+ ((v & 0xff00) >> 8) == ((v & 0xff0000) >> 16) &&
+ ((v & 0xff0000) << 8) == (v & 0xff000000))
+ return ((v & 0xff) | (3 << 12));
+ /* 1bcdefgh << 24 ... 1bcdefgh << 1 */
+ for (i = 8, m = 0xff000000, n = 0x80000000;
+ i < 23; i++, m >>= 1, n >>= 1) {
+ if ((v & m) == v && (v & n)) {
+ v >>= 32 - i;
+ if (!(i & 1))
+ v &= 0x7f;
+ i >>= 1;
+ return (((i & 7) << 12) | ((i & 8) << 23) | v);
+ }
+ }
+ return (-1);
+}
+
+static int
+encode_thumb_word_immediate(unsigned int v)
+{
+ if ((v & 0xfffff000) == 0)
+ return (((v & 0x800) << 15) | ((v & 0x700) << 4) | (v & 0xff));
+ return (-1);
+}
+
+static uint32_t
+read_wide_thumb(uint32_t *loc)
+{
+ uint16_t *sloc = (uint16_t*)loc;
+ return (sloc[0] << 16) | sloc[1];
+}
+
+static void
+write_wide_thumb(uint32_t *loc, uint32_t v)
+{
+ uint16_t *sloc = (uint16_t *)loc;
+ sloc[0] = v >> 16;
+ sloc[1] = v & 0xffff;
+}
+
+static int
+offset_in_jmp_range(int32_t offset, int flags)
+{
+ if (!(offset & 1) && flags | JIT_RELOC_B)
+ return 0;
+ else
+ return -0x1000000 <= offset && offset <= 0xffffff;
+}
+
+static int32_t
+decode_thumb_jump(uint32_t v)
+{
+ uint32_t s = (v >> 26) & 1;
+ uint32_t j1 = (v >> 13) & 1;
+ uint32_t j2 = (v >> 11) & 1;
+ uint32_t i1 = s ? j1 : !j1;
+ uint32_t i2 = s ? j2 : !j2;
+ uint32_t hi = (v >> 16) & 0x3ff;
+ uint32_t lo = v & 0x7ff;
+
+ int32_t ret = s << 31;
+ ret >>= 8;
+ ret |= i1 << 22;
+ ret |= i2 << 21;
+ ret |= hi << 11;
+ ret |= lo;
+ return ret << 1;
+}
+
+static const uint32_t thumb_jump_mask = 0xf800d000;
+
+static uint32_t
+encode_thumb_jump(int32_t v)
+{
+ ASSERT(offset_in_jmp_range(v, 0));
+ v >>= 1;
+ uint32_t s = !!(v & 0x800000);
+ uint32_t i1 = !!(v & 0x400000);
+ uint32_t i2 = !!(v & 0x200000);
+ uint32_t j1 = s ? i1 : !i1;
+ uint32_t j2 = s ? i2 : !i2;
+ uint32_t ret = (s<<26)|((v&0x1ff800)<<5)|(j1<<13)|(j2<<11)|(v&0x7ff);
+ ASSERT(decode_thumb_jump(ret) == v << 1);
+ ASSERT((ret & thumb_jump_mask) == 0);
+ return ret;
+}
+
+static uint32_t
+patch_thumb_jump(uint32_t inst, int32_t v)
+{
+ inst &= thumb_jump_mask;
+ if (!(v & 1)) {
+ ASSERT(inst == THUMB2_BLI || inst == THUMB2_BLXI);
+ v = (v + 2) & ~2;
+ inst = THUMB2_BLXI;
+ }
+ return inst | encode_thumb_jump(v);
+}
+
+static int32_t
+read_jmp_offset(uint32_t *loc)
+{
+ return decode_thumb_jump(read_wide_thumb(loc));
+}
+
+static void
+patch_jmp_offset(uint32_t *loc, int32_t v)
+{
+ write_wide_thumb(loc, patch_thumb_jump(read_wide_thumb(loc), v));
+}
+
+static void
+patch_veneer_jmp_offset(uint32_t *loc, int32_t v)
+{
+ ASSERT(!(v & 1));
+ patch_jmp_offset(loc, v | 1);
+}
+
+static jit_reloc_t
+emit_thumb_jump(jit_state_t *_jit, uint32_t inst)
+{
+ while (1) {
+ uint8_t *pc_base = _jit->pc.uc + 4;
+ int32_t off = (uint8_t*)jit_address(_jit) - pc_base;
+ enum jit_reloc_kind kind = JIT_RELOC_JMP_WITH_VENEER;
+ if (inst == THUMB2_B)
+ kind |= JIT_RELOC_B;
+ jit_reloc_t ret = jit_reloc (_jit, kind, 0, _jit->pc.uc, pc_base, 0);
+ uint8_t thumb_jump_width = 24;
+ if (add_pending_literal(_jit, ret, thumb_jump_width - 1)) {
+ emit_wide_thumb(_jit, patch_thumb_jump(inst, off));
+ return ret;
+ }
+ }
+}
+
+static int
+offset_in_jcc_range(int32_t v, int flags)
+{
+ if (!(v & 1))
+ return 0;
+ else
+ return -0x100000 <= v && v <= 0xfffff;
+}
+
+static int32_t
+decode_thumb_cc_jump(uint32_t v)
+{
+ uint32_t s = (v >> 26) & 1;
+ uint32_t j1 = (v >> 13) & 1;
+ uint32_t j2 = (v >> 11) & 1;
+ uint32_t hi = (v >> 16) & 0x3f;
+ uint32_t lo = v & 0x7ff;
+
+ int32_t ret = s << 31;
+ ret >>= 12;
+ ret |= j2 << 18;
+ ret |= j1 << 17;
+ ret |= hi << 11;
+ ret |= lo;
+ return ret << 1;
+}
+
+static const uint32_t thumb_cc_jump_mask = 0xfbc0d000;
+
+static uint32_t
+encode_thumb_cc_jump(int32_t v)
+{
+ ASSERT(offset_in_jcc_range(v, 0));
+ v >>= 1;
+ uint32_t s = !!(v & 0x80000);
+ uint32_t j2 = !!(v & 0x40000);
+ uint32_t j1 = !!(v & 0x20000);
+ uint32_t hi = (v >> 11) & 0x3f;
+ uint32_t lo = v & 0x7ff;
+ uint32_t ret = (s<<26)|(hi << 16)|(j1<<13)|(j2<<11)|lo;
+ ASSERT(decode_thumb_cc_jump(ret) == v << 1);
+ ASSERT((ret & thumb_cc_jump_mask) == 0);
+ return ret;
+}
+
+static uint32_t
+patch_thumb_cc_jump(uint32_t inst, int32_t v)
+{
+ return (inst & thumb_cc_jump_mask) | encode_thumb_cc_jump(v);
+}
+
+static int32_t
+read_jcc_offset(uint32_t *loc)
+{
+ return decode_thumb_cc_jump(read_wide_thumb(loc));
+}
+
+static void
+patch_jcc_offset(uint32_t *loc, int32_t v)
+{
+ write_wide_thumb(loc, patch_thumb_cc_jump(read_wide_thumb(loc), v));
+}
+
+static void
+patch_veneer_jcc_offset(uint32_t *loc, int32_t v)
+{
+ ASSERT(!(v & 1));
+ patch_jcc_offset(loc, v | 1);
+}
+
+static jit_reloc_t
+emit_thumb_cc_jump(jit_state_t *_jit, uint32_t inst)
+{
+ while (1) {
+ uint8_t *pc_base = _jit->pc.uc + 4;
+ int32_t off = (uint8_t*)jit_address(_jit) - pc_base;
+ jit_reloc_t ret =
+ jit_reloc (_jit, JIT_RELOC_JCC_WITH_VENEER, 0, _jit->pc.uc, pc_base, 0);
+ uint8_t thumb_cc_jump_width = 20;
+ if (add_pending_literal(_jit, ret, thumb_cc_jump_width - 1)) {
+ emit_wide_thumb(_jit, patch_thumb_cc_jump(inst, off));
+ return ret;
+ }
+ }
+}
+
+static void
+torrr(jit_state_t *_jit, int o, int rn, int rd, int rm)
+{
+ ASSERT(!(o & 0xf0f0f));
+ emit_wide_thumb(_jit, o|(_u4(rn)<<16)|(_u4(rd)<<8)|_u4(rm));
+}
+
+static void
+torxr(jit_state_t *_jit, int o, int rn, int rt, int rm)
+{
+ ASSERT(!(o & 0xf0f0f));
+ emit_wide_thumb(_jit, o|(_u4(rn)<<16)|(_u4(rt)<<12)|_u4(rm));
+}
+
+static void
+torrrr(jit_state_t *_jit, int o, int rn, int rl, int rh, int rm)
+{
+ ASSERT(!(o & 0x000fff0f));
+ emit_wide_thumb(_jit, o|(_u4(rn)<<16)|(_u4(rl)<<12)|(_u4(rh)<<8)|_u4(rm));
+}
+
+static void
+torri(jit_state_t *_jit, int o, int rn, int rd, int im)
+{
+ ASSERT(!(o & 0x0c0f7fff));
+ ASSERT(!(im & 0xfbff8f00));
+ emit_wide_thumb(_jit, o|(_u4(rn)<<16)|(_u4(rd)<<8)|im);
+}
+
+static void
+torri8(jit_state_t *_jit, int o, int rn, int rt, int im)
+{
+ ASSERT(!(o & 0x000ff0ff));
+ ASSERT(!(im & 0xffffff00));
+ emit_wide_thumb(_jit, o|(_u4(rn)<<16)|(_u4(rt)<<12)|im);
+}
+
+static void
+torri12(jit_state_t *_jit, int o, int rn, int rt, int im)
+{
+ ASSERT(!(o & 0x000fffff));
+ ASSERT(!(im & 0xfffff000));
+ emit_wide_thumb(_jit, o|(_u4(rn)<<16)|(_u4(rt)<<12)|im);
+}
+
+static void
+tshift(jit_state_t *_jit, int o, int rd, int rm, int im)
+{
+ ASSERT(!(o & 0x7fcf));
+ ASSERT(im >= 0 && im < 32);
+ emit_wide_thumb(_jit, o|((im&0x1c)<<10)|(_u4(rd)<<8)|((im&3)<<6)|_u4(rm));
+}
+
+static void
+toriw(jit_state_t *_jit, int o, int rd, int im)
+{
+ ASSERT(!(im & 0xffff0000));
+ emit_wide_thumb(_jit, o|((im&0xf000)<<4)|((im&0x800)<<15)|((im&0x700)<<4)|(_u4(rd)<<8)|(im&0xff));
+}
+
+static jit_reloc_t
+tcb(jit_state_t *_jit, int cc)
+{
+ ASSERT(!(cc & 0xfffffff));
+ ASSERT(cc != ARM_CC_AL && cc != ARM_CC_NV);
+ cc = ((uint32_t)cc) >> 6;
+ return emit_thumb_cc_jump(_jit, THUMB2_CC_B|cc);
+}
+
+static jit_reloc_t
+tb(jit_state_t *_jit, int o)
+{
+ ASSERT(!(o & 0x07ff2fff));
+ return emit_thumb_jump(_jit, o);
+}
+
+static void
+T1_ORR(jit_state_t *_jit, int32_t rdn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_ORR|(_u3(rm)<<3)|_u3(rdn));
+}
+
+static void
+T2_ORR(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm)
+{
+ return torrr(_jit, THUMB2_ORR,rn,rd,rm);
+}
+
+static void
+T2_ORRI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im)
+{
+ return torri(_jit, THUMB2_ORRI,rn,rd,im);
+}
+
+static void
+T1_EOR(jit_state_t *_jit, int32_t rdn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_EOR|(_u3(rm)<<3)|_u3(rdn));
+}
+
+static void
+T2_EOR(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm)
+{
+ return torrr(_jit, THUMB2_EOR,rn,rd,rm);
+}
+
+static void
+T2_EORI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im)
+{
+ return torri(_jit, THUMB2_EORI,rn,rd,im);
+}
+
+static void
+T1_MOV(jit_state_t *_jit, int32_t rd, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_MOV|((_u4(rd)&8)<<4)|(_u4(rm)<<3)|(rd&7));
+}
+
+static void
+T1_MOVI(jit_state_t *_jit, int32_t rd, int32_t im)
+{
+ emit_u16_with_pool(_jit, THUMB_MOVI|(_u3(rd)<<8)|_u8(im));
+}
+
+static void
+T2_MOVI(jit_state_t *_jit, int32_t rd, int32_t im)
+{
+ return torri(_jit, THUMB2_MOVI,_NOREG,rd,im);
+}
+
+static void
+T2_MOVWI(jit_state_t *_jit, int32_t rd, int32_t im)
+{
+ return toriw(_jit, THUMB2_MOVWI,rd,im);
+}
+
+static void
+T2_MOVTI(jit_state_t *_jit, int32_t rd, int32_t im)
+{
+ return toriw(_jit, THUMB2_MOVTI,rd,im);
+}
+
+static void
+T1_MVN(jit_state_t *_jit, int32_t rd, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_MVN|(_u3(rm)<<3)|_u3(rd));
+}
+
+static void
+T2_MVN(jit_state_t *_jit, int32_t rd, int32_t rm)
+{
+ return torrr(_jit, THUMB2_MVN,_NOREG,rd,rm);
+}
+
+static void
+T2_MVNI(jit_state_t *_jit, int32_t rd, int32_t im)
+{
+ return torri(_jit, THUMB2_MVNI,_NOREG,rd,im);
+}
+
+static void
+T1_NOT(jit_state_t *_jit, int32_t rd, int32_t rm)
+{
+ return T1_MVN(_jit, rd,rm);
+}
+
+static void
+T2_NOT(jit_state_t *_jit, int32_t rd, int32_t rm)
+{
+ return T2_MVN(_jit, rd,rm);
+}
+
+static void
+T1_NOP(jit_state_t *_jit)
+{
+ emit_u16_with_pool(_jit, 0xbf00);
+}
+
+static void
+T1_ADD(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_ADD|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rd));
+}
+
+static void
+T1_ADDX(jit_state_t *_jit, int32_t rdn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_ADDX|((_u4(rdn)&8)<<4)|(_u4(rm)<<3)|(rdn&7));
+}
+
+static void
+T2_ADD(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm)
+{
+ return torrr(_jit, THUMB2_ADD,rn,rd,rm);
+}
+
+static void
+T1_ADDI3(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im)
+{
+ emit_u16_with_pool(_jit, THUMB_ADDI3|(_u3(im)<<6)|(_u3(rn)<<3)|_u3(rd));
+}
+
+static void
+T1_ADDI8(jit_state_t *_jit, int32_t rdn, int32_t im)
+{
+ emit_u16_with_pool(_jit, THUMB_ADDI8|(_u3(rdn)<<8)|_u8(im));
+}
+
+static void
+T2_ADDI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im)
+{
+ return torri(_jit, THUMB2_ADDI,rn,rd,im);
+}
+
+static void
+T2_ADDWI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im)
+{
+ return torri(_jit, THUMB2_ADDWI,rn,rd,im);
+}
+
+static void
+T2_ADDS(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm)
+{
+ return torrr(_jit, THUMB2_ADD|ARM_S,rn,rd,rm);
+}
+
+static void
+T2_ADDSI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im)
+{
+ return torri(_jit, THUMB2_ADDI|ARM_S,rn,rd,im);
+}
+
+static void
+T1_ADC(jit_state_t *_jit, int32_t rdn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_ADC|(_u3(rm)<<3)|_u3(rdn));
+}
+
+static void
+T2_ADCS(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm)
+{
+ return torrr(_jit, THUMB2_ADC|ARM_S,rn,rd,rm);
+}
+
+static void
+T2_ADCSI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im)
+{
+ return torri(_jit, THUMB2_ADCI|ARM_S,rn,rd,im);
+}
+
+static void
+T1_SUB(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_SUB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rd));
+}
+
+static void
+T2_SUB(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm)
+{
+ return torrr(_jit, THUMB2_SUB,rn,rd,rm);
+}
+
+static void
+T1_SUBI3(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im)
+{
+ emit_u16_with_pool(_jit, THUMB_SUBI3|(_u3(im)<<6)|(_u3(rn)<<3)|_u3(rd));
+}
+
+static void
+T1_SUBI8(jit_state_t *_jit, int32_t rdn, int32_t im)
+{
+ emit_u16_with_pool(_jit, THUMB_SUBI8|(_u3(rdn)<<8)|_u8(im));
+}
+
+static void
+T2_SUBI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im)
+{
+ return torri(_jit, THUMB2_SUBI,rn,rd,im);
+}
+
+static void
+T2_SUBWI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im)
+{
+ return torri(_jit, THUMB2_SUBWI,rn,rd,im);
+}
+
+static void
+T2_SUBS(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm)
+{
+ return torrr(_jit, THUMB2_SUB|ARM_S,rn,rd,rm);
+}
+
+static void
+T2_SUBSI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im)
+{
+ return torri(_jit, THUMB2_SUBI|ARM_S,rn,rd,im);
+}
+
+static void
+T1_SBC(jit_state_t *_jit, int32_t rdn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_SBC|(_u3(rm)<<3)|_u3(rdn));
+}
+
+static void
+T2_SBCS(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm)
+{
+ return torrr(_jit, THUMB2_SBC|ARM_S,rn,rd,rm);
+}
+
+static void
+T2_SBCSI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im)
+{
+ return torri(_jit, THUMB2_SBCI|ARM_S,rn,rd,im);
+}
+
+static void
+T1_RSBI(jit_state_t *_jit, int32_t rd, int32_t rn)
+{
+ emit_u16_with_pool(_jit, THUMB_RSBI|(_u3(rn)<<3)|_u3(rd));
+}
+
+static void
+T2_RSBI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im)
+{
+ return torri(_jit, THUMB2_RSBI,rn,rd,im);
+}
+
+static void
+T1_MUL(jit_state_t *_jit, int32_t rdm, int32_t rn)
+{
+ emit_u16_with_pool(_jit, THUMB_MUL|(_u3(rn)<<3)|_u3(rdm));
+}
+
+static void
+T2_MUL(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm)
+{
+ return torrr(_jit, THUMB2_MUL,rn,rd,rm);
+}
+
+static void
+T2_SMULL(jit_state_t *_jit, int32_t rl, int32_t rh, int32_t rn, int32_t rm)
+{
+ return torrrr(_jit, THUMB2_SMULL,rn,rl,rh,rm);
+}
+
+static void
+T2_UMULL(jit_state_t *_jit, int32_t rl, int32_t rh, int32_t rn, int32_t rm)
+{
+ return torrrr(_jit, THUMB2_UMULL,rn,rl,rh,rm);
+}
+
+static void
+T2_SDIV(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm)
+{
+ return torrr(_jit, THUMB2_SDIV,rn,rd,rm);
+}
+
+static void
+T2_UDIV(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm)
+{
+ return torrr(_jit, THUMB2_UDIV,rn,rd,rm);
+}
+
+static void
+T1_MLS(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm, int32_t ra)
+{
+ return torrrr(_jit, THUMB_MLS, rn, ra, rd, rm);
+}
+
+static void
+T1_AND(jit_state_t *_jit, int32_t rdn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_AND|(_u3(rm)<<3)|_u3(rdn));
+}
+
+static void
+T2_AND(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm)
+{
+ return torrr(_jit, THUMB2_AND,rn,rd,rm);
+}
+
+static void
+T2_ANDI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im)
+{
+ return torri(_jit, THUMB2_ANDI,rn,rd,im);
+}
+
+static void
+T2_BICI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im)
+{
+ return torri(_jit, THUMB2_BICI,rn,rd,im);
+}
+
+static void
+T1_REV(jit_state_t *_jit, int32_t rd, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_REV|(_u3(rm)<<3)|_u3(rd));
+}
+
+static void
+T2_REV(jit_state_t *_jit, int32_t rd, int32_t rm)
+{
+ return torrr(_jit, THUMB2_REV,rm,rd,rm);
+}
+
+static void
+T1_SXTB(jit_state_t *_jit, int32_t rd, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_SXTB|(_u3(rm)<<3)|_u3(rd));
+}
+
+static void
+T2_SXTB(jit_state_t *_jit, int32_t rd, int32_t rm)
+{
+ return torrr(_jit, THUMB2_SXTB,_NOREG,rd,rm);
+}
+
+static void
+T1_UXTB(jit_state_t *_jit, int32_t rd, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_UXTB|(_u3(rm)<<3)|_u3(rd));
+}
+
+static void
+T2_UXTB(jit_state_t *_jit, int32_t rd, int32_t rm)
+{
+ return torrr(_jit, THUMB2_UXTB,_NOREG,rd,rm);
+}
+
+static void
+T1_SXTH(jit_state_t *_jit, int32_t rd, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_SXTH|(_u3(rm)<<3)|_u3(rd));
+}
+
+static void
+T2_SXTH(jit_state_t *_jit, int32_t rd, int32_t rm)
+{
+ return torrr(_jit, THUMB2_SXTH,_NOREG,rd,rm);
+}
+
+static void
+T1_UXTH(jit_state_t *_jit, int32_t rd, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_UXTH|(_u3(rm)<<3)|_u3(rd));
+}
+
+static void
+T2_UXTH(jit_state_t *_jit, int32_t rd, int32_t rm)
+{
+ return torrr(_jit, THUMB2_UXTH,_NOREG,rd,rm);
+}
+
+static void
+T1_LSL(jit_state_t *_jit, int32_t rdn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_LSL|(_u3(rm)<<3)|_u3(rdn));
+}
+
+static void
+T2_LSL(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm)
+{
+ return torrr(_jit, THUMB2_LSL,rn,rd,rm);
+}
+
+static void
+T1_LSLI(jit_state_t *_jit, int32_t rd, int32_t rm, int32_t im)
+{
+ emit_u16_with_pool(_jit, THUMB_LSLI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd));
+}
+
+static void
+T2_LSLI(jit_state_t *_jit, int32_t rd, int32_t rm, int32_t im)
+{
+ return tshift(_jit, THUMB2_LSLI,rd,rm,im);
+}
+
+static void
+T1_LSR(jit_state_t *_jit, int32_t rdn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_LSR|(_u3(rm)<<3)|_u3(rdn));
+}
+
+static void
+T2_LSR(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm)
+{
+ return torrr(_jit, THUMB2_LSR,rn,rd,rm);
+}
+
+static void
+T1_LSRI(jit_state_t *_jit, int32_t rd, int32_t rm, int32_t im)
+{
+ emit_u16_with_pool(_jit, THUMB_LSRI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd));
+}
+
+static void
+T2_LSRI(jit_state_t *_jit, int32_t rd, int32_t rm, int32_t im)
+{
+ return tshift(_jit, THUMB2_LSRI,rd,rm,im);
+}
+
+static void
+T1_ASR(jit_state_t *_jit, int32_t rdn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_ASR|(_u3(rm)<<3)|_u3(rdn));
+}
+
+static void
+T2_ASR(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm)
+{
+ return torrr(_jit, THUMB2_ASR,rn,rd,rm);
+}
+
+static void
+T1_ASRI(jit_state_t *_jit, int32_t rd, int32_t rm, int32_t im)
+{
+ emit_u16_with_pool(_jit, THUMB_ASRI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd));
+}
+
+static void
+T2_ASRI(jit_state_t *_jit, int32_t rd, int32_t rm, int32_t im)
+{
+ return tshift(_jit, THUMB2_ASRI,rd,rm,im);
+}
+
+static void
+T1_CMP(jit_state_t *_jit, int32_t rn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_CMP|(_u3(rm)<<3)|_u3(rn));
+}
+
+static void
+T1_CMPX(jit_state_t *_jit, int32_t rn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_CMPX|((_u4(rn)&8)<<4)|(_u4(rm)<<3)|(rn&7));
+}
+
+static void
+T2_CMP(jit_state_t *_jit, int32_t rn, int32_t rm)
+{
+ return torrr(_jit, THUMB2_CMP,rn,_NOREG,rm);
+}
+
+static void
+T1_CMPI(jit_state_t *_jit, int32_t rn, int32_t im)
+{
+ emit_u16_with_pool(_jit, THUMB_CMPI|(_u3(rn)<<8)|_u8(im));
+}
+
+static void
+T2_CMPI(jit_state_t *_jit, int32_t rn, int32_t im)
+{
+ return torri(_jit, THUMB2_CMPI,rn,_NOREG,im);
+}
+
+static void
+T2_CMNI(jit_state_t *_jit, int32_t rn, int32_t im)
+{
+ return torri(_jit, THUMB2_CMNI,rn,_NOREG,im);
+}
+
+static void
+T1_TST(jit_state_t *_jit, int32_t rn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_TST|(_u3(rm)<<3)|_u3(rn));
+}
+
+static void
+T2_TST(jit_state_t *_jit, int32_t rn, int32_t rm)
+{
+ return torrr(_jit, THUMB2_TST,rn,_NOREG,rm);
+}
+
+static void
+T2_TSTI(jit_state_t *_jit, int32_t rn, int32_t im)
+{
+ return torri(_jit, THUMB2_TSTI,rn,_NOREG,im);
+}
+
+static void
+T1_BLX(jit_state_t *_jit, int32_t r0)
+{
+ emit_u16_with_pool(_jit, THUMB_BLX|(_u4(r0)<<3));
+}
+
+static void
+T1_BX(jit_state_t *_jit, int32_t r0)
+{
+ emit_u16_with_pool(_jit, THUMB_BX|(_u4(r0)<<3));
+}
+
+static jit_reloc_t
+T2_CC_B(jit_state_t *_jit, uint32_t cc)
+{
+ return tcb(_jit, cc);
+}
+
+static jit_reloc_t
+T2_B(jit_state_t *_jit)
+{
+ return tb(_jit, THUMB2_B);
+}
+
+static jit_reloc_t
+T2_BLI(jit_state_t *_jit)
+{
+ return tb(_jit, THUMB2_BLI);
+}
+
+enum dmb_option { DMB_ISH = 0xb };
+static void
+T1_DMB(jit_state_t *_jit, enum dmb_option option)
+{
+ emit_wide_thumb(_jit, THUMB_DMB|_u4(option));
+}
+
+static void
+T1_LDREX(jit_state_t *_jit, int32_t rt, int32_t rn, int8_t offset)
+{
+ emit_wide_thumb(_jit, THUMB_LDREX|(_u4(rn)<<16)|(_u4(rt)<<12)|_u8(offset));
+}
+
+static void
+T1_STREX(jit_state_t *_jit, int32_t rd, int32_t rt, int32_t rn, int8_t offset)
+{
+ emit_wide_thumb
+ (_jit, THUMB_STREX|(_u4(rn)<<16)|(_u4(rt)<<12)|(_u4(rd)<<8)|_u8(offset));
+}
+
+static void
+T1_LDRSB(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_LDRSB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt));
+}
+
+static void
+T2_LDRSB(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm)
+{
+ return torxr(_jit, THUMB2_LDRSB,rn,rt,rm);
+}
+
+static void
+T2_LDRSBI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri8(_jit, THUMB2_LDRSBI|THUMB2_U,rn,rt,im);
+}
+
+static void
+T2_LDRSBWI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri12(_jit, THUMB2_LDRSBWI,rn,rt,im);
+}
+
+static void
+T2_LDRSBIN(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri8(_jit, THUMB2_LDRSBI,rn,rt,im);
+}
+
+static void
+T1_LDRB(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_LDRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt));
+}
+
+static void
+T2_LDRB(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm)
+{
+ return torxr(_jit, THUMB2_LDRB,rn,rt,rm);
+}
+
+static void
+T1_LDRBI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ emit_u16_with_pool(_jit, THUMB_LDRBI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt));
+}
+
+static void
+T2_LDRBI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri8(_jit, THUMB2_LDRBI|THUMB2_U,rn,rt,im);
+}
+
+static void
+T2_LDRBWI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri12(_jit, THUMB2_LDRBWI,rn,rt,im);
+}
+
+static void
+T2_LDRBIN(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri8(_jit, THUMB2_LDRBI,rn,rt,im);
+}
+
+static void
+T1_LDRSH(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_LDRSH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt));
+}
+
+static void
+T2_LDRSH(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm)
+{
+ return torxr(_jit, THUMB2_LDRSH,rn,rt,rm);
+}
+
+static void
+T2_LDRSHI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri8(_jit, THUMB2_LDRSHI|THUMB2_U,rn,rt,im);
+}
+
+static void
+T2_LDRSHWI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri12(_jit, THUMB2_LDRSHWI,rn,rt,im);
+}
+
+static void
+T2_LDRSHIN(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri8(_jit, THUMB2_LDRSHI,rn,rt,im);
+}
+
+static void
+T1_LDRH(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_LDRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt));
+}
+
+static void
+T2_LDRH(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm)
+{
+ return torxr(_jit, THUMB2_LDRH,rn,rt,rm);
+}
+
+static void
+T1_LDRHI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ emit_u16_with_pool(_jit, THUMB_LDRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt));
+}
+
+static void
+T2_LDRHI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri8(_jit, THUMB2_LDRHI|THUMB2_U,rn,rt,im);
+}
+
+static void
+T2_LDRHWI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri12(_jit, THUMB2_LDRHWI,rn,rt,im);
+}
+
+static void
+T2_LDRHIN(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri8(_jit, THUMB2_LDRHI,rn,rt,im);
+}
+
+static void
+T1_LDR(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_LDR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt));
+}
+
+static void
+T2_LDR(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm)
+{
+ return torxr(_jit, THUMB2_LDR,rn,rt,rm);
+}
+
+static void
+T1_LDRI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ emit_u16_with_pool(_jit, THUMB_LDRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt));
+}
+
+static void
+T1_LDRISP(jit_state_t *_jit, int32_t rt, int32_t im)
+{
+ emit_u16_with_pool(_jit, THUMB_LDRISP|(_u3(rt)<<8)|_u8(im));
+}
+
+static void
+T2_LDRI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri8(_jit, THUMB2_LDRI|THUMB2_U,rn,rt,im);
+}
+
+static void
+T2_LDRWI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri12(_jit, THUMB2_LDRWI,rn,rt,im);
+}
+
+static void
+T2_LDRIN(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri8(_jit, THUMB2_LDRI,rn,rt,im);
+}
+
+static void
+T1_STRB(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_STRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt));
+}
+
+static void
+T2_STRB(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm)
+{
+ return torxr(_jit, THUMB2_STRB,rn,rt,rm);
+}
+
+static void
+T1_STRBI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ emit_u16_with_pool(_jit, THUMB_STRBI | (_u5(im) << 6) | (_u3(rn) << 3) | _u3(rt));
+}
+
+static void
+T2_STRBI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri8(_jit, THUMB2_STRBI|THUMB2_U,rn,rt,im);
+}
+
+static void
+T2_STRBWI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri12(_jit, THUMB2_STRBWI,rn,rt,im);
+}
+
+static void
+T2_STRBIN(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri8(_jit, THUMB2_STRBI,rn,rt,im);
+}
+
+static void
+T1_STRH(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_STRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt));
+}
+
+static void
+T2_STRH(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm)
+{
+ return torxr(_jit, THUMB2_STRH,rn,rt,rm);
+}
+
+static void
+T1_STRHI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ emit_u16_with_pool(_jit, THUMB_STRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt));
+}
+
+static void
+T2_STRHI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri8(_jit, THUMB2_STRHI|THUMB2_U,rn,rt,im);
+}
+
+static void
+T2_STRHWI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri12(_jit, THUMB2_STRHWI,rn,rt,im);
+}
+
+static void
+T2_STRHIN(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri8(_jit, THUMB2_STRHI,rn,rt,im);
+}
+
+static void
+T1_STR(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm)
+{
+ emit_u16_with_pool(_jit, THUMB_STR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt));
+}
+
+static void
+T2_STR(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm)
+{
+ return torxr(_jit, THUMB2_STR,rn,rt,rm);
+}
+
+static void
+T1_STRI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ emit_u16_with_pool(_jit, THUMB_STRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt));
+}
+
+static void
+T1_STRISP(jit_state_t *_jit, int32_t rt, int32_t im)
+{
+ emit_u16_with_pool(_jit, THUMB_STRISP|(_u3(rt)<<8)|(_u8(im)));
+}
+
+static void
+T2_STRI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri8(_jit, THUMB2_STRI|THUMB2_U,rn,rt,im);
+}
+
+static void
+T2_STRWI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri12(_jit, THUMB2_STRWI,rn,rt,im);
+}
+
+static void
+T2_STRIN(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im)
+{
+ return torri8(_jit, THUMB2_STRI,rn,rt,im);
+}
+
+static void
+T1_BRK(jit_state_t *_jit)
+{
+ emit_u16_with_pool(_jit, THUMB_BRK);
+}
+
+static void
+nop(jit_state_t *_jit, int32_t i0)
+{
+ for (; i0 > 0; i0 -= 2)
+ T1_NOP(_jit);
+
+ ASSERT(i0 == 0);
+}
+
+static void
+movr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (r0 != r1) {
+ T1_MOV(_jit, r0, r1);
+ }
+}
+
+enum preserve_flags { PRESERVE_FLAGS, FLAGS_UNIMPORTANT };
+
+static void
+_movi(jit_state_t *_jit, int32_t r0, jit_word_t i0, enum preserve_flags flags)
+{
+ int i;
+
+ if (flags == PRESERVE_FLAGS && r0 < 8 && !(i0 & 0xffffff80))
+ T1_MOVI(_jit, r0, i0);
+ else if (r0 < 8 && !(i0 & 0xffffff80))
+ T1_MOVI(_jit, r0, i0);
+ else if ((i = encode_thumb_immediate(i0)) != -1)
+ T2_MOVI(_jit, r0, i);
+ else if ((i = encode_thumb_immediate(~i0)) != -1)
+ T2_MVNI(_jit, r0, i);
+ else {
+ T2_MOVWI(_jit, r0, (uint16_t)i0);
+ if (i0 & 0xffff0000)
+ T2_MOVTI(_jit, r0, (uint16_t)((unsigned)i0 >> 16));
+ }
+}
+
+static void
+movi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ return _movi(_jit, r0, i0, FLAGS_UNIMPORTANT);
+}
+
+static int
+offset_in_load_from_pool_range(int32_t offset)
+{
+ return -0xfff <= offset && offset <= 0xfff;
+}
+
+static int32_t
+decode_load_from_pool_offset(uint32_t inst)
+{
+ int32_t ret = inst & 0xfff;
+ return ((inst >> 23) & 1) ? ret : -ret;
+}
+
+static uint32_t
+encode_load_from_pool_offset(int32_t off)
+{
+ ASSERT(offset_in_load_from_pool_range(off));
+ uint32_t u = off >= 0;
+ uint32_t ret = ((u ? off : -off) & 0xfff) | (u << 23);
+ ASSERT(decode_load_from_pool_offset(ret) == off);
+ return ret;
+}
+
+static uint32_t
+patch_load_from_pool(uint32_t inst, int32_t off)
+{
+ uint32_t load_from_pool_mask = THUMB2_LDRP | (0xf << 12);
+ return (inst & load_from_pool_mask) | encode_load_from_pool_offset(off);
+}
+
+static int32_t
+read_load_from_pool_offset(uint32_t *loc)
+{
+ return decode_load_from_pool_offset(read_wide_thumb(loc));
+}
+
+static void
+patch_load_from_pool_offset(uint32_t *loc, int32_t v)
+{
+ write_wide_thumb(loc, patch_load_from_pool(read_wide_thumb(loc), v));
+}
+
+static jit_reloc_t
+emit_load_from_pool(jit_state_t *_jit, uint32_t inst)
+{
+ while (1) {
+ uint8_t *pc_base = (uint8_t *)((_jit->pc.w + 4) & ~3);
+ uint8_t rsh = 0;
+ int32_t off = (_jit->pc.uc - pc_base) >> rsh;
+ jit_reloc_t ret =
+ jit_reloc (_jit, JIT_RELOC_LOAD_FROM_POOL, 0, _jit->pc.uc, pc_base, rsh);
+ uint8_t load_from_pool_width = 12;
+ if (add_pending_literal(_jit, ret, load_from_pool_width)) {
+ emit_wide_thumb(_jit, patch_load_from_pool(inst, off));
+ return ret;
+ }
+ }
+}
+
+static jit_reloc_t
+movi_from_pool(jit_state_t *_jit, int32_t Rt)
+{
+ return emit_load_from_pool(_jit, THUMB2_LDRP | (_u4(Rt) << 12));
+}
+
+static jit_reloc_t
+mov_addr(jit_state_t *_jit, int32_t r0)
+{
+ return movi_from_pool(_jit, r0);
+}
+
+static void
+comr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if ((r0|r1) < 8)
+ T1_NOT(_jit, r0, r1);
+ else
+ T2_NOT(_jit, r0, r1);
+}
+
+static void
+negr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if ((r0|r1) < 8)
+ T1_RSBI(_jit, r0, r1);
+ else
+ T2_RSBI(_jit, r0, r1, 0);
+}
+
+static void
+addr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if ((r0|r1|r2) < 8)
+ T1_ADD(_jit, r0, r1, r2);
+ else if (r0 == r1 || r0 == r2)
+ T1_ADDX(_jit, r0, r0 == r1 ? r2 : r1);
+ else
+ T2_ADD(_jit, r0, r1, r2);
+}
+
+static void
+addi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int i;
+
+ if ((r0|r1) < 8 && !(i0 & ~7))
+ T1_ADDI3(_jit, r0, r1, i0);
+ else if ((r0|r1) < 8 && !(-i0 & ~7))
+ T1_SUBI3(_jit, r0, r1, -i0);
+ else if (r0 < 8 && r0 == r1 && !(i0 & ~0xff))
+ T1_ADDI8(_jit, r0, i0);
+ else if (r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
+ T1_SUBI8(_jit, r0, -i0);
+ else if ((i = encode_thumb_immediate(i0)) != -1)
+ T2_ADDI(_jit, r0, r1, i);
+ else if ((i = encode_thumb_immediate(-i0)) != -1)
+ T2_SUBI(_jit, r0, r1, i);
+ else if ((i = encode_thumb_word_immediate(i0)) != -1)
+ T2_ADDWI(_jit, r0, r1, i);
+ else if ((i = encode_thumb_word_immediate(-i0)) != -1)
+ T2_SUBWI(_jit, r0, r1, i);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ T2_ADD(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+addcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ /* thumb auto set carry if not inside IT block */
+ if ((r0|r1|r2) < 8)
+ T1_ADD(_jit, r0, r1, r2);
+ else
+ T2_ADDS(_jit, r0, r1, r2);
+}
+
+static void
+addci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int i;
+
+ if ((r0|r1) < 8 && !(i0 & ~7))
+ T1_ADDI3(_jit, r0, r1, i0);
+ else if ((r0|r1) < 8 && !(-i0 & ~7))
+ T1_SUBI3(_jit, r0, r1, -i0);
+ else if (r0 < 8 && r0 == r1 && !(i0 & ~0xff))
+ T1_ADDI8(_jit, r0, i0);
+ else if (r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
+ T1_SUBI8(_jit, r0, -i0);
+ else if ((i = encode_thumb_immediate(i0)) != -1)
+ T2_ADDSI(_jit, r0, r1, i);
+ else if ((i = encode_thumb_immediate(-i0)) != -1)
+ T2_SUBSI(_jit, r0, r1, i);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ T2_ADDS(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+addxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ /* keep setting carry because don't know last ADC */
+
+ /* thumb auto set carry if not inside IT block */
+ if ((r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
+ T1_ADC(_jit, r0, r0 == r1 ? r2 : r1);
+ else
+ T2_ADCS(_jit, r0, r1, r2);
+}
+
+static void
+addxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int i;
+ if ((i = encode_thumb_immediate(i0)) != -1)
+ T2_ADCSI(_jit, r0, r1, i);
+ else if ((i = encode_thumb_immediate(-i0)) != -1)
+ T2_SBCSI(_jit, r0, r1, i);
+ else if (r0 != r1) {
+ _movi(_jit, r0, i0, PRESERVE_FLAGS);
+ T2_ADCS(_jit, r0, r1, r0);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ _movi(_jit, jit_gpr_regno(reg), i0, PRESERVE_FLAGS);
+ T2_ADCS(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+subr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if ((r0|r1|r2) < 8)
+ T1_SUB(_jit, r0, r1, r2);
+ else
+ T2_SUB(_jit, r0, r1, r2);
+}
+
+static void
+subi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int i;
+
+ if ((r0|r1) < 8 && !(i0 & ~7))
+ T1_SUBI3(_jit, r0, r1, i0);
+ else if ((r0|r1) < 8 && !(-i0 & ~7))
+ T1_ADDI3(_jit, r0, r1, -i0);
+ else if (r0 < 8 && r0 == r1 && !(i0 & ~0xff))
+ T1_SUBI8(_jit, r0, i0);
+ else if (r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
+ T1_ADDI8(_jit, r0, -i0);
+ else if ((i = encode_thumb_immediate(i0)) != -1)
+ T2_SUBI(_jit, r0, r1, i);
+ else if ((i = encode_thumb_immediate(-i0)) != -1)
+ T2_ADDI(_jit, r0, r1, i);
+ else if ((i = encode_thumb_word_immediate(i0)) != -1)
+ T2_SUBWI(_jit, r0, r1, i);
+ else if ((i = encode_thumb_word_immediate(-i0)) != -1)
+ T2_ADDWI(_jit, r0, r1, i);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ T2_SUB(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+subcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ /* thumb auto set carry if not inside IT block */
+ if ((r0|r1|r2) < 8)
+ T1_SUB(_jit, r0, r1, r2);
+ else
+ T2_SUBS(_jit, r0, r1, r2);
+}
+
+static void
+subci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int i;
+
+ if ((r0|r1) < 8 && !(i0 & ~7))
+ T1_SUBI3(_jit, r0, r1, i0);
+ else if ((r0|r1) < 8 && !(-i0 & ~7))
+ T1_ADDI3(_jit, r0, r1, -i0);
+ else if (r0 < 8 && r0 == r1 && !(i0 & ~0xff))
+ T1_SUBI8(_jit, r0, i0);
+ else if (r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
+ T1_ADDI8(_jit, r0, -i0);
+ else if ((i = encode_thumb_immediate(i0)) != -1)
+ T2_SUBSI(_jit, r0, r1, i);
+ else if ((i = encode_thumb_immediate(-i0)) != -1)
+ T2_ADDSI(_jit, r0, r1, i);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ T2_SUBS(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+subxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ /* keep setting carry because don't know last SBC */
+
+ /* thumb auto set carry if not inside IT block */
+ if ((r0|r1|r2) < 8 && r0 == r1)
+ T1_SBC(_jit, r0, r2);
+ else
+ T2_SBCS(_jit, r0, r1, r2);
+}
+
+static void
+subxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int i;
+ if ((i = encode_arm_immediate(i0)) != -1)
+ T2_SBCSI(_jit, r0, r1, i);
+ else if ((i = encode_arm_immediate(-i0)) != -1)
+ T2_ADCSI(_jit, r0, r1, i);
+ else if (r0 != r1) {
+ _movi(_jit, r0, i0, PRESERVE_FLAGS);
+ T2_SBCS(_jit, r0, r1, r0);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ _movi(_jit, jit_gpr_regno(reg), i0, PRESERVE_FLAGS);
+ T2_SBCS(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+mulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2 && (r0|r1) < 8)
+ T1_MUL(_jit, r0, r1);
+ else if (r0 == r1 && (r0|r2) < 8)
+ T1_MUL(_jit, r0, r2);
+ else
+ T2_MUL(_jit, r0, r1, r2);
+}
+
+static void
+muli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ mulr(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+iqmulr(jit_state_t *_jit, int32_t r0, int32_t r1,
+ int32_t r2, int32_t r3, jit_bool_t sign)
+{
+ if (r2 == r3) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, jit_gpr_regno(reg), r2);
+ if (sign)
+ T2_SMULL(_jit, r0, r1, jit_gpr_regno(reg), r2);
+ else
+ T2_UMULL(_jit, r0, r1, jit_gpr_regno(reg), r2);
+ unget_temp_gpr(_jit);
+ } else if (r0 != r2 && r1 != r2) {
+ if (sign)
+ T2_SMULL(_jit, r0, r1, r2, r3);
+ else
+ T2_UMULL(_jit, r0, r1, r2, r3);
+ } else {
+ if (sign)
+ T2_SMULL(_jit, r0, r1, r3, r2);
+ else
+ T2_UMULL(_jit, r0, r1, r3, r2);
+ }
+}
+
+static void
+iqmuli(jit_state_t *_jit, int32_t r0, int32_t r1,
+ int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ iqmulr(_jit, r0, r1, r2, jit_gpr_regno(reg), sign);
+ unget_temp_gpr(_jit);
+}
+
+static void
+qmulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ return iqmulr(_jit, r0,r1,r2,r3,1);
+}
+
+static void
+qmulr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ return iqmulr(_jit, r0,r1,r2,r3,0);
+}
+
+static void
+qmuli(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t i0)
+{
+ return iqmuli(_jit, r0,r1,r2,i0,1);
+}
+
+static void
+qmuli_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t i0)
+{
+ return iqmuli(_jit, r0,r1,r2,i0,0);
+}
+
+static void
+divr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ T2_SDIV(_jit, r0, r1, r2);
+}
+
+static void
+divi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ divr(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+divr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ T2_UDIV(_jit, r0, r1, r2);
+}
+
+static void
+divi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ divr_u(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+iqdivr(jit_state_t *_jit, int32_t r0, int32_t r1,
+ int32_t r2, int32_t r3, jit_bool_t sign)
+{
+ int need_tmp = r0 == r2 || r0 == r3;
+ if (need_tmp) {
+ int32_t tmp = jit_gpr_regno(get_temp_gpr(_jit));
+ if (r0 == r2) {
+ movr(_jit, tmp, r2);
+ r2 = tmp;
+ }
+ if (r0 == r3) {
+ if (r2 != r3)
+ movr(_jit, tmp, r3);
+ r3 = tmp;
+ }
+ }
+ if (sign)
+ divr(_jit, r0, r2, r3);
+ else
+ divr_u(_jit, r0, r2, r3);
+ T1_MLS(_jit, r1, r3, r0, r2);
+ if (need_tmp)
+ unget_temp_gpr(_jit);
+}
+
+static void
+iqdivi(jit_state_t *_jit, int32_t r0, int32_t r1,
+ int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ iqdivr(_jit, r0, r1, r2, jit_gpr_regno(reg), sign);
+ unget_temp_gpr(_jit);
+}
+
+static void
+qdivr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ return iqdivr(_jit, r0,r1,r2,r3,1);
+}
+
+static void
+qdivr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ return iqdivr(_jit, r0,r1,r2,r3,0);
+}
+
+static void
+qdivi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t i0)
+{
+ return iqdivi(_jit, r0,r1,r2,i0,1);
+}
+
+static void
+qdivi_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t i0)
+{
+ return iqdivi(_jit, r0,r1,r2,i0,0);
+}
+
+static void
+iremr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_bool_t sign)
+{
+ return iqdivr(_jit, r0, r0, r1, r2, sign);
+}
+
+static void
+remr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return iremr(_jit, r0, r1, r2, 1);
+}
+
+static void
+remi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ remr(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+remr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return iremr(_jit, r0, r1, r2, 0);
+}
+
+static void
+remi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ remr_u(_jit, r0, r1,jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+andr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if ((r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
+ T1_AND(_jit, r0, r0 == r1 ? r2 : r1);
+ else
+ T2_AND(_jit, r0, r1, r2);
+}
+
+static void
+andi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int i;
+
+ if ((i = encode_thumb_immediate(i0)) != -1)
+ T2_ANDI(_jit, r0, r1, i);
+ else if ((i = encode_thumb_immediate(~i0)) != -1)
+ T2_BICI(_jit, r0, r1, i);
+ else if (r0 != r1) {
+ movi(_jit, r0, i0);
+ T2_AND(_jit, r0, r1, r0);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ T2_AND(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+orr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if ((r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
+ T1_ORR(_jit, r0, r0 == r1 ? r2 : r1);
+ else
+ T2_ORR(_jit, r0, r1, r2);
+}
+
+static void
+ori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int i;
+
+ if ((i = encode_thumb_immediate(i0)) != -1)
+ T2_ORRI(_jit, r0, r1, i);
+ else if (r0 != r1) {
+ movi(_jit, r0, i0);
+ T2_ORR(_jit, r0, r1, r0);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ T2_ORR(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+xorr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if ((r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
+ T1_EOR(_jit, r0, r0 == r1 ? r2 : r1);
+ else
+ T2_EOR(_jit, r0, r1, r2);
+}
+
+static void
+xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int i;
+
+ if ((i = encode_thumb_immediate(i0)) != -1)
+ T2_EORI(_jit, r0, r1, i);
+ else if (r0 != r1) {
+ movi(_jit, r0, i0);
+ T2_EOR(_jit, r0, r1, r0);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ T2_EOR(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+lshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if ((r0|r1|r2) < 8 && r0 == r1)
+ T1_LSL(_jit, r0, r2);
+ else
+ T2_LSL(_jit, r0, r1, r2);
+}
+
+static void
+lshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ ASSERT(i0 >= 0 && i0 <= 31);
+ if (i0 == 0)
+ movr(_jit, r0, r1);
+ else {
+ if ((r0|r1) < 8)
+ T1_LSLI(_jit, r0, r1, i0);
+ else
+ T2_LSLI(_jit, r0, r1, i0);
+ }
+}
+
+static void
+rshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if ((r0|r1|r2) < 8 && r0 == r1)
+ T1_ASR(_jit, r0, r2);
+ else
+ T2_ASR(_jit, r0, r1, r2);
+}
+
+static void
+rshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ ASSERT(i0 >= 0 && i0 <= 31);
+ if (i0 == 0)
+ movr(_jit, r0, r1);
+ else {
+ if ((r0|r1) < 8)
+ T1_ASRI(_jit, r0, r1, i0);
+ else
+ T2_ASRI(_jit, r0, r1, i0);
+ }
+}
+
+static void
+rshr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if ((r0|r1|r2) < 8 && r0 == r1)
+ T1_LSR(_jit, r0, r2);
+ else
+ T2_LSR(_jit, r0, r1, r2);
+}
+
+static void
+rshi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ ASSERT(i0 >= 0 && i0 <= 31);
+ if (i0 == 0)
+ movr(_jit, r0, r1);
+ else {
+ if ((r0|r1) < 8)
+ T1_LSRI(_jit, r0, r1, i0);
+ else
+ T2_LSRI(_jit, r0, r1, i0);
+ }
+}
+
+static void
+jmpr(jit_state_t *_jit, int32_t r0)
+{
+ T1_BX(_jit, r0);
+}
+
+static jit_reloc_t
+jmp(jit_state_t *_jit)
+{
+ return T2_B(_jit);
+}
+
+static void
+jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+ return jit_patch_there(_jit, jmp(_jit), (void*)i0);
+}
+
+static jit_reloc_t
+bccr(jit_state_t *_jit, int cc, int32_t r0, int32_t r1)
+{
+ if ((r0|r1) < 8)
+ T1_CMP(_jit, r0, r1);
+ else if ((r0&r1) & 8)
+ T1_CMPX(_jit, r0, r1);
+ else
+ T2_CMP(_jit, r0, r1);
+ return T2_CC_B(_jit, cc);
+}
+
+static jit_reloc_t
+bcci(jit_state_t *_jit, int cc, int32_t r0, jit_word_t i1)
+{
+ int i;
+ if (r0 < 7 && !(i1 & 0xffffff00))
+ T1_CMPI(_jit, r0, i1);
+ else if ((i = encode_thumb_immediate(i1)) != -1)
+ T2_CMPI(_jit, r0, i);
+ else if ((i = encode_thumb_immediate(-i1)) != -1)
+ T2_CMNI(_jit, r0, i);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i1);
+ T2_CMP(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+ return T2_CC_B(_jit, cc);
+}
+
+static jit_reloc_t
+bltr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit, ARM_CC_LT, r0, r1);
+}
+
+static jit_reloc_t
+blti(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return bcci(_jit, ARM_CC_LT, r0, i1);
+}
+
+static jit_reloc_t
+bltr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit, ARM_CC_LO, r0, r1);
+}
+
+static jit_reloc_t
+blti_u(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return bcci(_jit, ARM_CC_LO, r0, i1);
+}
+
+static jit_reloc_t
+bler(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit, ARM_CC_LE, r0, r1);
+}
+
+static jit_reloc_t
+blei(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return bcci(_jit, ARM_CC_LE, r0, i1);
+}
+
+static jit_reloc_t
+bler_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit, ARM_CC_LS, r0, r1);
+}
+
+static jit_reloc_t
+blei_u(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return bcci(_jit, ARM_CC_LS, r0, i1);
+}
+
+static jit_reloc_t
+beqr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit, ARM_CC_EQ, r0, r1);
+}
+
+static jit_reloc_t
+beqi(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return bcci(_jit, ARM_CC_EQ, r0, i1);
+}
+
+static jit_reloc_t
+bger(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit, ARM_CC_GE, r0, r1);
+}
+
+static jit_reloc_t
+bgei(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return bcci(_jit, ARM_CC_GE, r0, i1);
+}
+
+static jit_reloc_t
+bger_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit, ARM_CC_HS, r0, r1);
+}
+
+static jit_reloc_t
+bgei_u(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return bcci(_jit, ARM_CC_HS, r0, i1);
+}
+
+static jit_reloc_t
+bgtr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit, ARM_CC_GT, r0, r1);
+}
+
+static jit_reloc_t
+bgti(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return bcci(_jit, ARM_CC_GT, r0, i1);
+}
+
+static jit_reloc_t
+bgtr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit, ARM_CC_HI, r0, r1);
+}
+
+static jit_reloc_t
+bgti_u(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return bcci(_jit, ARM_CC_HI, r0, i1);
+}
+
+static jit_reloc_t
+bner(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bccr(_jit, ARM_CC_NE, r0, r1);
+}
+
+static jit_reloc_t
+bnei(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return bcci(_jit, ARM_CC_NE, r0, i1);
+}
+
+static jit_reloc_t
+baddr(jit_state_t *_jit, int cc, int32_t r0, int32_t r1)
+{
+ if ((r0|r1) < 8)
+ T1_ADD(_jit, r0, r0, r1);
+ else
+ T2_ADDS(_jit, r0, r0, r1);
+ return T2_CC_B(_jit, cc);
+}
+
+static jit_reloc_t
+baddi(jit_state_t *_jit, int cc, int32_t r0, int i1)
+{
+ int i;
+ if (r0 < 8 && !(i1 & ~7))
+ T1_ADDI3(_jit, r0, r0, i1);
+ else if (r0 < 8 && !(-i1 & ~7))
+ T1_SUBI3(_jit, r0, r0, -i1);
+ else if (r0 < 8 && !(i1 & ~0xff))
+ T1_ADDI8(_jit, r0, i1);
+ else if (r0 < 8 && !(-i1 & ~0xff))
+ T1_SUBI8(_jit, r0, -i1);
+ else if ((i = encode_thumb_immediate(i1)) != -1)
+ T2_ADDSI(_jit, r0, r0, i);
+ else if ((i = encode_thumb_immediate(-i1)) != -1)
+ T2_SUBSI(_jit, r0, r0, i);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i1);
+ T2_ADDS(_jit, r0, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+ return T2_CC_B(_jit, cc);
+}
+
+static jit_reloc_t
+boaddr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return baddr(_jit, ARM_CC_VS, r0, r1);
+}
+
+static jit_reloc_t
+boaddi(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return baddi(_jit, ARM_CC_VS, r0, i1);
+}
+
+static jit_reloc_t
+boaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return baddr(_jit, ARM_CC_HS, r0, r1);
+}
+
+static jit_reloc_t
+boaddi_u(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return baddi(_jit, ARM_CC_HS, r0, i1);
+}
+
+static jit_reloc_t
+bxaddr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return baddr(_jit, ARM_CC_VC, r0, r1);
+}
+
+static jit_reloc_t
+bxaddi(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return baddi(_jit, ARM_CC_VC, r0, i1);
+}
+
+static jit_reloc_t
+bxaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return baddr(_jit, ARM_CC_LO, r0, r1);
+}
+
+static jit_reloc_t
+bxaddi_u(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return baddi(_jit, ARM_CC_LO, r0, i1);
+}
+
+static jit_reloc_t
+bsubr(jit_state_t *_jit, int cc, int32_t r0, int32_t r1)
+{
+ if ((r0|r1) < 8)
+ T1_SUB(_jit, r0, r0, r1);
+ else
+ T2_SUBS(_jit, r0, r0, r1);
+ return T2_CC_B(_jit, cc);
+}
+
+static jit_reloc_t
+bsubi(jit_state_t *_jit, int cc, int32_t r0, int i1)
+{
+ int i;
+ if (r0 < 8 && !(i1 & ~7))
+ T1_SUBI3(_jit, r0, r0, i1);
+ else if (r0 < 8 && !(-i1 & ~7))
+ T1_ADDI3(_jit, r0, r0, -i1);
+ else if (r0 < 8 && !(i1 & ~0xff))
+ T1_SUBI8(_jit, r0, i1);
+ else if (r0 < 8 && !(-i1 & ~0xff))
+ T1_ADDI8(_jit, r0, -i1);
+ else if ((i = encode_thumb_immediate(i1)) != -1)
+ T2_SUBSI(_jit, r0, r0, i);
+ else if ((i = encode_thumb_immediate(-i1)) != -1)
+ T2_SUBSI(_jit, r0, r0, i);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i1);
+ T2_SUBS(_jit, r0, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+ return T2_CC_B(_jit, cc);
+}
+
+static jit_reloc_t
+bosubr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bsubr(_jit, ARM_CC_VS, r0, r1);
+}
+
+static jit_reloc_t
+bosubi(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return bsubi(_jit, ARM_CC_VS, r0, i1);
+}
+
+static jit_reloc_t
+bosubr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bsubr(_jit, ARM_CC_LO, r0, r1);
+}
+
+static jit_reloc_t
+bosubi_u(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return bsubi(_jit, ARM_CC_LO, r0, i1);
+}
+
+static jit_reloc_t
+bxsubr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bsubr(_jit, ARM_CC_VC, r0, r1);
+}
+
+static jit_reloc_t
+bxsubi(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return bsubi(_jit, ARM_CC_VC, r0, i1);
+}
+
+static jit_reloc_t
+bxsubr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bsubr(_jit, ARM_CC_HS, r0, r1);
+}
+
+static jit_reloc_t
+bxsubi_u(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return bsubi(_jit, ARM_CC_HS, r0, i1);
+}
+
+static jit_reloc_t
+bmxr(jit_state_t *_jit, int cc, int32_t r0, int32_t r1)
+{
+ if ((r0|r1) < 8)
+ T1_TST(_jit, r0, r1);
+ else
+ T2_TST(_jit, r0, r1);
+ return T2_CC_B(_jit, cc);
+}
+
+static jit_reloc_t
+bmxi(jit_state_t *_jit, int cc, int32_t r0, jit_word_t i1)
+{
+ int i;
+ if ((i = encode_thumb_immediate(i1)) != -1)
+ T2_TSTI(_jit, r0, i);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i1);
+ T2_TST(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+ return T2_CC_B(_jit, cc);
+}
+
+static jit_reloc_t
+bmsr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bmxr(_jit, ARM_CC_NE, r0, r1);
+}
+
+static jit_reloc_t
+bmsi(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return bmxi(_jit, ARM_CC_NE, r0, i1);
+}
+
+static jit_reloc_t
+bmcr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bmxr(_jit, ARM_CC_EQ, r0, r1);
+}
+
+static jit_reloc_t
+bmci(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+ return bmxi(_jit, ARM_CC_EQ, r0, i1);
+}
+
+static void
+ldr_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ T2_LDRSBI(_jit, r0, r1, 0);
+}
+
+static void
+ldi_c(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ T2_LDRSBI(_jit, r0, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if ((r0|r1|r2) < 8)
+ T1_LDRSB(_jit, r0, r1, r2);
+ else
+ T2_LDRSB(_jit, r0, r1, r2);
+}
+
+#define jit_ldrt_strt_p() 0
+
+static void
+ldxi_c(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+
+ if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
+ T2_LDRSBI(_jit, r0, r1, i0);
+ else if (i0 < 0 && i0 >= -255)
+ T2_LDRSBIN(_jit, r0, r1, -i0);
+ else if (i0 >= 0 && i0 <= 4095)
+ T2_LDRSBWI(_jit, r0, r1, i0);
+ else if (r0 != r1) {
+ movi(_jit, r0, i0);
+ if ((r0|r1) < 8)
+ T1_LDRSB(_jit, r0, r1, r0);
+ else
+ T2_LDRSB(_jit, r0, r1, r0);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ if ((r0|r1|jit_gpr_regno(reg)) < 8)
+ T1_LDRSB(_jit, r0, r1, jit_gpr_regno(reg));
+ else
+ T2_LDRSB(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldr_uc(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ T2_LDRBI(_jit, r0, r1, 0);
+}
+
+static void
+ldi_uc(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ T2_LDRBI(_jit, r0, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxr_uc(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if ((r0|r1|r2) < 8)
+ T1_LDRB(_jit, r0, r1, r2);
+ else
+ T2_LDRB(_jit, r0, r1, r2);
+}
+
+static void
+ldxi_uc(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if ((r0|r1) < 8 && i0 >= 0 && i0 < 0x20)
+ T1_LDRBI(_jit, r0, r1, i0);
+ else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
+ T2_LDRBI(_jit, r0, r1, i0);
+ else if (i0 < 0 && i0 >= -255)
+ T2_LDRBIN(_jit, r0, r1, -i0);
+ else if (i0 >= 0 && i0 <= 4095)
+ T2_LDRBWI(_jit, r0, r1, i0);
+ else if (r0 != r1) {
+ movi(_jit, r0, i0);
+ if ((r0|r1) < 8)
+ T1_LDRB(_jit, r0, r1, r0);
+ else
+ T2_LDRB(_jit, r0, r1, r0);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ if ((r0|r1|jit_gpr_regno(reg)) < 8)
+ T1_LDRB(_jit, r0, r1, jit_gpr_regno(reg));
+ else
+ T2_LDRB(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldr_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ T2_LDRSHI(_jit, r0, r1, 0);
+}
+
+static void
+ldi_s(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ T2_LDRSHI(_jit, r0, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if ((r0|r1|r2) < 8)
+ T1_LDRSH(_jit, r0, r1, r2);
+ else
+ T2_LDRSH(_jit, r0, r1, r2);
+}
+
+static void
+ldxi_s(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
+ T2_LDRSHI(_jit, r0, r1, i0);
+ else if (i0 < 0 && i0 >= -255)
+ T2_LDRSHIN(_jit, r0, r1, -i0);
+ else if (i0 >= 0 && i0 <= 4095)
+ T2_LDRSHWI(_jit, r0, r1, i0);
+ else if (r0 != r1) {
+ movi(_jit, r0, i0);
+ if ((r0|r1) < 8)
+ T1_LDRSH(_jit, r0, r1, r0);
+ else
+ T2_LDRSH(_jit, r0, r1, r0);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ if ((r0|r1|jit_gpr_regno(reg)) < 8)
+ T1_LDRSH(_jit, r0, r1, jit_gpr_regno(reg));
+ else
+ T2_LDRSH(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ T2_LDRHI(_jit, r0, r1, 0);
+}
+
+static void
+ldi_us(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ T2_LDRHI(_jit, r0, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxr_us(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+
+ if ((r0|r1|r2) < 8)
+ T1_LDRH(_jit, r0, r1, r2);
+ else
+ T2_LDRH(_jit, r0, r1, r2);
+}
+
+static void
+ldxi_us(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 1) && (i0 >> 1) < 0x20)
+ T1_LDRHI(_jit, r0, r1, i0 >> 1);
+ else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
+ T2_LDRHI(_jit, r0, r1, i0);
+ else if (i0 < 0 && i0 >= -255)
+ T2_LDRHIN(_jit, r0, r1, -i0);
+ else if (i0 >= 0 && i0 <= 4095)
+ T2_LDRHWI(_jit, r0, r1, i0);
+ else if (r0 != r1) {
+ movi(_jit, r0, i0);
+ if ((r0|r1) < 8)
+ T1_LDRH(_jit, r0, r1, r0);
+ else
+ T2_LDRH(_jit, r0, r1, r0);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ if ((r0|r1|jit_gpr_regno(reg)) < 8)
+ T1_LDRH(_jit, r0, r1, jit_gpr_regno(reg));
+ else
+ T2_LDRH(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldr_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ T2_LDRI(_jit, r0, r1, 0);
+}
+
+static void
+ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ T2_LDRI(_jit, r0, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if ((r0|r1|r2) < 8)
+ T1_LDR(_jit, r0, r1, r2);
+ else
+ T2_LDR(_jit, r0, r1, r2);
+}
+
+static void
+ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) < 0x20)
+ T1_LDRI(_jit, r0, r1, i0 >> 2);
+ else if (r1 == jit_gpr_regno(JIT_SP) && r0 < 8 &&
+ i0 >= 0 && !(i0 & 3) && (i0 >> 2) <= 255)
+ T1_LDRISP(_jit, r0, i0 >> 2);
+ else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
+ T2_LDRI(_jit, r0, r1, i0);
+ else if (i0 < 0 && i0 > -255)
+ T2_LDRIN(_jit, r0, r1, -i0);
+ else if (i0 >= 0 && i0 <= 4095)
+ T2_LDRWI(_jit, r0, r1, i0);
+ else if (r0 != r1) {
+ movi(_jit, r0, i0);
+ if ((r0|r1) < 8)
+ T1_LDR(_jit, r0, r1, r0);
+ else
+ T2_LDR(_jit, r0, r1, r0);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ if ((r0|r1|jit_gpr_regno(reg)) < 8)
+ T1_LDR(_jit, r0, r1, jit_gpr_regno(reg));
+ else
+ T2_LDR(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+str_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ T2_STRBI(_jit, r1, r0, 0);
+}
+
+static void
+sti_c(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ T2_STRBI(_jit, r0, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+stxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if ((r0|r1|r2) < 8)
+ T1_STRB(_jit, r2, r1, r0);
+ else
+ T2_STRB(_jit, r2, r1, r0);
+}
+
+static void
+stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if ((r0|r1) < 8 && i0 >= 0 && i0 < 0x20)
+ T1_STRBI(_jit, r1, r0, i0);
+ else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
+ T2_STRBI(_jit, r1, r0, i0);
+ else if (i0 < 0 && i0 >= -255)
+ T2_STRBIN(_jit, r1, r0, -i0);
+ else if (i0 >= 0 && i0 <= 4095)
+ T2_STRBWI(_jit, r1, r0, i0);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ if ((r0|r1|jit_gpr_regno(reg)) < 8)
+ T1_STRB(_jit, r1, r0, jit_gpr_regno(reg));
+ else
+ T2_STRB(_jit, r1, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+str_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ T2_STRHI(_jit, r1, r0, 0);
+}
+
+static void
+sti_s(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ T2_STRHI(_jit, r0, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+stxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if ((r0|r1|r2) < 8)
+ T1_STRH(_jit, r2, r1, r0);
+ else
+ T2_STRH(_jit, r2, r1, r0);
+}
+
+static void
+stxi_s(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 1) && (i0 >> 1) < 0x20)
+ T1_STRHI(_jit, r1, r0, i0 >> 1);
+ else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
+ T2_STRHI(_jit, r1, r0, i0);
+ else if (i0 < 0 && i0 >= -255)
+ T2_STRHIN(_jit, r1, r0, -i0);
+ else if (i0 >= 0 && i0 <= 4095)
+ T2_STRHWI(_jit, r1, r0, i0);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ if ((r0|r1|jit_gpr_regno(reg)) < 8)
+ T1_STRH(_jit, r1, r0, jit_gpr_regno(reg));
+ else
+ T2_STRH(_jit, r1, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+str_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ T2_STRI(_jit, r1, r0, 0);
+}
+
+static void
+sti_i(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ T2_STRI(_jit, r0, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+stxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if ((r0|r1|r2) < 8)
+ T1_STR(_jit, r2, r1, r0);
+ else
+ T2_STR(_jit, r2, r1, r0);
+}
+
+static void
+stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) < 0x20)
+ T1_STRI(_jit, r1, r0, i0 >> 2);
+ else if (r0 == jit_gpr_regno(JIT_SP) && r1 < 8 &&
+ i0 >= 0 && !(i0 & 3) && (i0 >> 2) <= 255)
+ T1_STRISP(_jit, r1, i0 >> 2);
+ else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
+ T2_STRI(_jit, r1, r0, i0);
+ else if (i0 < 0 && i0 >= -255)
+ T2_STRIN(_jit, r1, r0, -i0);
+ else if (i0 >= 0 && i0 <= 4095)
+ T2_STRWI(_jit, r1, r0, i0);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ if ((r0|r1|jit_gpr_regno(reg)) < 8)
+ T1_STR(_jit, r1, r0, jit_gpr_regno(reg));
+ else
+ T2_STR(_jit, r1, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if ((r0|r1) < 8)
+ T1_REV(_jit, r0, r1);
+ else
+ T2_REV(_jit, r0, r1);
+ rshi_u(_jit, r0, r0, 16);
+}
+
+/* inline glibc htonl (without register clobber) */
+static void
+bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if ((r0|r1) < 8)
+ T1_REV(_jit, r0, r1);
+ else
+ T2_REV(_jit, r0, r1);
+}
+
+static void
+extr_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+
+ if ((r0|r1) < 8)
+ T1_SXTB(_jit, r0, r1);
+ else
+ T2_SXTB(_jit, r0, r1);
+}
+
+static void
+extr_uc(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if ((r0|r1) < 8)
+ T1_UXTB(_jit, r0, r1);
+ else
+ T2_UXTB(_jit, r0, r1);
+}
+
+static void
+extr_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if ((r0|r1) < 8)
+ T1_SXTH(_jit, r0, r1);
+ else
+ T2_SXTH(_jit, r0, r1);
+}
+
+static void
+extr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if ((r0|r1) < 8)
+ T1_UXTH(_jit, r0, r1);
+ else
+ T2_UXTH(_jit, r0, r1);
+}
+
+static void
+callr(jit_state_t *_jit, int32_t r0)
+{
+ T1_BLX(_jit, r0);
+}
+
+static void
+calli(jit_state_t *_jit, jit_word_t i0)
+{
+ jit_patch_there(_jit, T2_BLI(_jit), (void*)i0);
+}
+
+static void
+jmpi_with_link(jit_state_t *_jit, jit_word_t i0)
+{
+ jit_patch_there(_jit, T2_BLI(_jit), (void*)i0);
+}
+
+static void
+push_link_register(jit_state_t *_jit)
+{
+}
+
+static void
+pop_link_register(jit_state_t *_jit)
+{
+}
+
+static void
+ret(jit_state_t *_jit)
+{
+ T1_BX(_jit, jit_gpr_regno(_LR));
+}
+
+static void
+reti(jit_state_t *_jit, int32_t i0)
+{
+ movi(_jit, jit_gpr_regno(_R0), i0);
+ ret(_jit);
+}
+
+static void
+retr(jit_state_t *_jit, int32_t r0)
+{
+ movr(_jit, jit_gpr_regno(_R0), r0);
+ ret(_jit);
+}
+
+static void
+retval_c(jit_state_t *_jit, int32_t r0)
+{
+ extr_c(_jit, r0, jit_gpr_regno(_R0));
+}
+
+static void
+retval_uc(jit_state_t *_jit, int32_t r0)
+{
+ extr_uc(_jit, r0, jit_gpr_regno(_R0));
+}
+
+static void
+retval_s(jit_state_t *_jit, int32_t r0)
+{
+ extr_s(_jit, r0, jit_gpr_regno(_R0));
+}
+
+static void
+retval_us(jit_state_t *_jit, int32_t r0)
+{
+ extr_us(_jit, r0, jit_gpr_regno(_R0));
+}
+
+static void
+retval_i(jit_state_t *_jit, int32_t r0)
+{
+ movr(_jit, r0, jit_gpr_regno(_R0));
+}
+
+static uint32_t*
+jmp_without_veneer(jit_state_t *_jit)
+{
+ uint32_t *loc = _jit->pc.ui;
+ emit_u16(_jit, 0);
+ emit_u16(_jit, 0);
+ return loc;
+}
+
+static void
+patch_jmp_without_veneer(jit_state_t *_jit, uint32_t *loc)
+{
+ uint8_t *pc_base = ((uint8_t *)loc) + 4;
+ int32_t off = (uint8_t*)jit_address(_jit) - pc_base;
+ write_wide_thumb(loc, THUMB2_B | encode_thumb_jump(off));
+}
+
+struct veneer
+{
+ uint16_t ldr;
+ uint16_t br;
+ uint32_t addr;
+};
+
+static void
+patch_veneer(uint32_t *loc, jit_pointer_t addr)
+{
+ struct veneer *v = (struct veneer*) loc;
+ v->addr = (uintptr_t) addr;
+}
+
+static void
+emit_veneer(jit_state_t *_jit, jit_pointer_t target)
+{
+ uint16_t thumb1_ldr = 0x4800;
+ int32_t tmp = jit_gpr_regno(JIT_TMP1);
+ ASSERT(tmp < 8);
+ // Loaded addr is 4 bytes after the LDR, which is aligned, so offset is 0.
+ emit_u16(_jit, thumb1_ldr | (tmp << 8));
+ emit_u16(_jit, THUMB_BX|(_u4(tmp)<<3));
+ emit_u32(_jit, (uint32_t) target);
+}
+
+static void
+ldr_atomic(jit_state_t *_jit, int32_t dst, int32_t loc)
+{
+ T1_DMB(_jit, DMB_ISH);
+ ldr_i(_jit, dst, loc);
+ T1_DMB(_jit, DMB_ISH);
+}
+
+static void
+str_atomic(jit_state_t *_jit, int32_t loc, int32_t val)
+{
+ T1_DMB(_jit, DMB_ISH);
+ str_i(_jit, loc, val);
+ T1_DMB(_jit, DMB_ISH);
+}
+
+static void
+swap_atomic(jit_state_t *_jit, int32_t dst, int32_t loc, int32_t val)
+{
+ int32_t result = jit_gpr_regno(get_temp_gpr(_jit));
+ int32_t dst_or_tmp;
+ if (dst == val || dst == loc)
+ dst_or_tmp = jit_gpr_regno(get_temp_gpr(_jit));
+ else
+ dst_or_tmp = dst;
+
+ T1_DMB(_jit, DMB_ISH);
+ void *retry = jit_address(_jit);
+ T1_LDREX(_jit, dst_or_tmp, loc, 0);
+ T1_STREX(_jit, result, val, loc, 0);
+ jit_patch_there(_jit, bnei(_jit, result, 0), retry);
+ T1_DMB(_jit, DMB_ISH);
+ movr(_jit, dst, dst_or_tmp);
+
+ if (dst == val || dst == loc) unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+}
+
+static void
+cas_atomic(jit_state_t *_jit, int32_t dst, int32_t loc, int32_t expected,
+ int32_t desired)
+{
+ int32_t dst_or_tmp;
+ if (dst == loc || dst == expected || dst == desired)
+ dst_or_tmp = jit_gpr_regno(get_temp_gpr(_jit));
+ else
+ dst_or_tmp = dst;
+ T1_DMB(_jit, DMB_ISH);
+ void *retry = jit_address(_jit);
+ T1_LDREX(_jit, dst_or_tmp, loc, 0);
+ jit_reloc_t bad = bner(_jit, dst_or_tmp, expected);
+ int result = jit_gpr_regno(get_temp_gpr(_jit));
+ T1_STREX(_jit, result, desired, loc, 0);
+ jit_patch_there(_jit, bnei(_jit, result, 0), retry);
+ unget_temp_gpr(_jit);
+ jit_patch_here(_jit, bad);
+ T1_DMB(_jit, DMB_ISH);
+ movr(_jit, dst, dst_or_tmp);
+
+ if (dst == loc || dst == expected || dst == desired)
+ unget_temp_gpr(_jit);
+}
+
+static void
+breakpoint(jit_state_t *_jit)
+{
+ T1_BRK(_jit);
+}
diff --git a/deps/lightening/lightening/arm-vfp.c b/deps/lightening/lightening/arm-vfp.c
new file mode 100644
index 0000000..208edc3
--- /dev/null
+++ b/deps/lightening/lightening/arm-vfp.c
@@ -0,0 +1,1168 @@
+/*
+ * Copyright (C) 2012-2017, 2019 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+#define ARM_V_F64 0x00000100
+#define ARM_VADD_F 0x0e300a00
+#define ARM_VSUB_F 0x0e300a40
+#define ARM_VMUL_F 0x0e200a00
+#define ARM_VDIV_F 0x0e800a00
+#define ARM_VABS_F 0x0eb00ac0
+#define ARM_VNEG_F 0x0eb10a40
+#define ARM_VSQRT_F 0x0eb10ac0
+#define ARM_VMOV_F 0x0eb00a40
+#define ARM_VMOV_A_S 0x0e100a10 /* vmov rn, sn */
+#define ARM_VMOV_S_A 0x0e000a10 /* vmov sn, rn */
+#define ARM_VMOV_D_AA 0x0c400b10 /* vmov dn, rn,rn */
+#define ARM_VCMP 0x0eb40a40
+#define ARM_VMRS 0x0ef10a10
+#define ARM_VCVT_2I 0x00040000 /* to integer */
+#define ARM_VCVT_2S 0x00010000 /* to signed */
+#define ARM_VCVT_RS 0x00000080 /* round to zero or signed */
+#define ARM_VCVT 0x0eb80a40
+#define ARM_VCVT_S32_F32 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS
+#define ARM_VCVT_S32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS|ARM_V_F64
+#define ARM_VCVT_F32_S32 ARM_VCVT|ARM_VCVT_RS
+#define ARM_VCVT_F64_S32 ARM_VCVT|ARM_VCVT_RS|ARM_V_F64
+#define ARM_VCVT_F 0x0eb70ac0
+#define ARM_VCVT_F32_F64 ARM_VCVT_F
+#define ARM_VCVT_F64_F32 ARM_VCVT_F|ARM_V_F64
+#define ARM_P 0x00800000 /* positive offset */
+#define ARM_V_D 0x00400000
+#define ARM_V_N 0x00000080
+#define ARM_V_M 0x00000020
+#define ARM_V_I32 0x00200000
+#define ARM_VMOVI 0x02800010
+#define ARM_VMVNI 0x02800030
+#define ARM_VLDR 0x0d100a00
+#define ARM_VSTR 0x0d000a00
+#define ARM_VM 0x0c000a00
+#define ARM_VMOV_A_D 0x0e100b10
+#define ARM_VMOV_D_A 0x0e000b10
+
+#define vfp_regno(rn) ((rn) >> 1)
+
+static void
+vodi(jit_state_t *_jit, int oi, int r0)
+{
+ ASSERT(!(oi & 0x0000f000));
+ ASSERT(!(r0 & 1));
+ r0 >>= 1;
+ emit_wide_thumb(_jit, oi|(_u4(r0)<<12));
+}
+
+static void
+vo_ss(jit_state_t *_jit, int o, int r0, int r1)
+{
+ ASSERT(!(o & 0xf000f00f));
+ if (r0 & 1) o |= ARM_V_D;
+ if (r1 & 1) o |= ARM_V_M;
+ r0 >>= 1; r1 >>= 1;
+ emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r0)<<12)|_u4(r1));
+}
+
+static void
+vo_dd(jit_state_t *_jit, int o, int r0, int r1)
+{
+ ASSERT(!(o & 0xf000f00f));
+ ASSERT(!(r0 & 1) && !(r1 & 1));
+ r0 >>= 1; r1 >>= 1;
+ emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r0)<<12)|_u4(r1));
+}
+
+static void
+vors_(jit_state_t *_jit, int o, int r0, int r1)
+{
+ ASSERT(!(o & 0xf000f00f));
+ if (r1 & 1) o |= ARM_V_N;
+ r1 >>= 1;
+ emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12));
+}
+
+static void
+vori_(jit_state_t *_jit, int o, int r0, int r1)
+{
+ ASSERT(!(o & 0xf000f00f));
+ /* use same bit pattern, to set opc1... */
+ if (r1 & 1) o |= ARM_V_I32;
+ r1 >>= 1;
+ emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12));
+}
+
+static void
+vorrd(jit_state_t *_jit, int o, int r0, int r1, int r2)
+{
+ ASSERT(!(o & 0xf00ff00f));
+ ASSERT(!(r2 & 1));
+ r2 >>= 1;
+ emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2));
+}
+
+static void
+vosss(jit_state_t *_jit, int o, int r0, int r1, int r2)
+{
+ ASSERT(!(o & 0xf00ff00f));
+ if (r0 & 1) o |= ARM_V_D;
+ if (r1 & 1) o |= ARM_V_N;
+ if (r2 & 1) o |= ARM_V_M;
+ r0 >>= 1; r1 >>= 1; r2 >>= 1;
+ emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2));
+}
+
+static void
+voddd(jit_state_t *_jit, int o, int r0, int r1, int r2)
+{
+ ASSERT(!(o & 0xf00ff00f));
+ ASSERT(!(r0 & 1) && !(r1 & 1) && !(r2 & 1));
+ r0 >>= 1; r1 >>= 1; r2 >>= 1;
+ emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2));
+}
+
+static void
+vldst(jit_state_t *_jit, int o, int r0, int r1, int i0)
+{
+ /* i0 << 2 is byte offset */
+ ASSERT(!(o & 0xf00ff0ff));
+ if (r0 & 1) {
+ ASSERT(!(o & ARM_V_F64));
+ o |= ARM_V_D;
+ }
+ r0 >>= 1;
+ emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u8(i0));
+}
+
+static void
+VADD_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ vosss(_jit,ARM_VADD_F,r0,r1,r2);
+}
+
+static void
+VADD_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ voddd(_jit,ARM_VADD_F|ARM_V_F64,r0,r1,r2);
+}
+
+static void
+VSUB_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ vosss(_jit,ARM_VSUB_F,r0,r1,r2);
+}
+
+static void
+VSUB_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ voddd(_jit,ARM_VSUB_F|ARM_V_F64,r0,r1,r2);
+}
+
+static void
+VMUL_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ vosss(_jit,ARM_VMUL_F,r0,r1,r2);
+}
+
+static void
+VMUL_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ voddd(_jit,ARM_VMUL_F|ARM_V_F64,r0,r1,r2);
+}
+
+static void
+VDIV_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ vosss(_jit,ARM_VDIV_F,r0,r1,r2);
+}
+
+static void
+VDIV_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ voddd(_jit,ARM_VDIV_F|ARM_V_F64,r0,r1,r2);
+}
+
+static void
+VABS_F32(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ vo_ss(_jit,ARM_VABS_F,r0,r1);
+}
+
+static void
+VABS_F64(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ vo_dd(_jit,ARM_VABS_F|ARM_V_F64,r0,r1);
+}
+
+static void
+VNEG_F32(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ vo_ss(_jit,ARM_VNEG_F,r0,r1);
+}
+
+static void
+VNEG_F64(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ vo_dd(_jit,ARM_VNEG_F|ARM_V_F64,r0,r1);
+}
+
+static void
+VSQRT_F32(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ vo_ss(_jit,ARM_VSQRT_F,r0,r1);
+}
+
+static void
+VSQRT_F64(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ vo_dd(_jit,ARM_VSQRT_F|ARM_V_F64,r0,r1);
+}
+
+static void
+VMOV_F32(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ vo_ss(_jit,ARM_VMOV_F,r0,r1);
+}
+
+static void
+VMOV_F64(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ vo_dd(_jit,ARM_VMOV_F|ARM_V_F64,r0,r1);
+}
+
+static void
+VMOV_D_AA(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ vorrd(_jit,ARM_VMOV_D_AA,r1,r2,r0);
+}
+
+static void
+VMOV_S_A(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ vors_(_jit,ARM_VMOV_S_A,r1,r0);
+}
+
+static void
+VCMP_F32(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ vo_ss(_jit,ARM_VCMP,r0,r1);
+}
+
+static void
+VCMP_F64(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ vo_dd(_jit,ARM_VCMP|ARM_V_F64,r0,r1);
+}
+
+static void
+VMRS(jit_state_t *_jit)
+{
+ emit_wide_thumb(_jit, ARM_CC_AL|ARM_VMRS|(0xf<<12));
+}
+
+static void
+VCVT_S32_F32(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ vo_ss(_jit,ARM_VCVT_S32_F32,r0,r1);
+}
+
+static void
+VCVT_S32_F64(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ vo_ss(_jit,ARM_VCVT_S32_F64,r0,r1);
+}
+
+static void
+VCVT_F32_S32(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ vo_ss(_jit,ARM_VCVT_F32_S32,r0,r1);
+}
+
+static void
+VCVT_F64_S32(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ vo_ss(_jit,ARM_VCVT_F64_S32,r0,r1);
+}
+
+static void
+VCVT_F32_F64(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ vo_ss(_jit,ARM_VCVT_F32_F64,r0,r1);
+}
+
+static void
+VCVT_F64_F32(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ vo_ss(_jit,ARM_VCVT_F64_F32,r0,r1);
+}
+
+static void
+VMOV_A_S32(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ vori_(_jit,ARM_VMOV_A_D,r0,r1);
+}
+
+static void
+VMOV_V_I32(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ vori_(_jit,ARM_VMOV_D_A,r1,r0);
+}
+
+/* "oi" should be the result of encode_vfp_double */
+static void
+VIMM(jit_state_t *_jit, int32_t oi, int32_t r0)
+{
+ vodi(_jit, oi,r0);
+}
+
+/* index is multipled by four */
+static void
+VLDRN_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+ vldst(_jit,ARM_VLDR,r0,r1,i0);
+}
+
+static void
+VLDR_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+ vldst(_jit,ARM_VLDR|ARM_P,r0,r1,i0);
+}
+
+static void
+VLDRN_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+ vldst(_jit,ARM_VLDR|ARM_V_F64,r0,r1,i0);
+}
+
+static void
+VLDR_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+ vldst(_jit,ARM_VLDR|ARM_V_F64|ARM_P,r0,r1,i0);
+}
+
+static void
+VSTRN_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+ vldst(_jit,ARM_VSTR,r0,r1,i0);
+}
+
+static void
+VSTR_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+ vldst(_jit,ARM_VSTR|ARM_P,r0,r1,i0);
+}
+
+static void
+VSTRN_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+ vldst(_jit,ARM_VSTR|ARM_V_F64,r0,r1,i0);
+}
+
+static void
+VSTR_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+ vldst(_jit,ARM_VSTR|ARM_V_F64|ARM_P,r0,r1,i0);
+}
+
+static void
+absr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ VABS_F32(_jit, r0,r1);
+}
+
+static void
+absr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ VABS_F64(_jit, r0,r1);
+}
+
+static void
+negr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ VNEG_F32(_jit, r0,r1);
+}
+
+static void
+negr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ VNEG_F64(_jit, r0,r1);
+}
+
+static void
+sqrtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ VSQRT_F32(_jit, r0,r1);
+}
+
+static void
+sqrtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ VSQRT_F64(_jit, r0,r1);
+}
+
+static void
+addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ VADD_F32(_jit, r0,r1,r2);
+}
+
+static void
+addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ VADD_F64(_jit, r0,r1,r2);
+}
+
+static void
+subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ VSUB_F32(_jit, r0,r1,r2);
+}
+
+static void
+subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ VSUB_F64(_jit, r0,r1,r2);
+}
+
+static void
+mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ VMUL_F32(_jit, r0,r1,r2);
+}
+
+static void
+mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ VMUL_F64(_jit, r0,r1,r2);
+}
+
+static void
+divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ VDIV_F32(_jit, r0,r1,r2);
+}
+
+static void
+divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ VDIV_F64(_jit, r0,r1,r2);
+}
+
+static void
+cmp_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ VCMP_F32(_jit, r0, r1);
+}
+
+static void
+cmp_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ VCMP_F64(_jit, r0, r1);
+}
+
+static jit_reloc_t
+vbcmp_x(jit_state_t *_jit, int cc)
+{
+ VMRS(_jit);
+ return T2_CC_B(_jit, cc);
+}
+
+static jit_reloc_t
+vbcmp_f(jit_state_t *_jit, int cc, int32_t r0, int32_t r1)
+{
+ cmp_f(_jit, r0, r1);
+ return vbcmp_x(_jit, cc);
+}
+
+static jit_reloc_t
+vbcmp_d(jit_state_t *_jit, int cc, int32_t r0, int32_t r1)
+{
+ cmp_d(_jit, r0, r1);
+ return vbcmp_x(_jit, cc);
+}
+
+static jit_reloc_t
+vbncmp_x(jit_state_t *_jit, int cc)
+{
+ VMRS(_jit);
+ jit_reloc_t cont = T2_CC_B(_jit, cc);
+ jit_reloc_t ret = T2_B(_jit);
+ jit_patch_here(_jit, cont);
+ return ret;
+}
+
+static jit_reloc_t
+vbncmp_f(jit_state_t *_jit, int cc, int32_t r0, int32_t r1)
+{
+ cmp_f(_jit, r0, r1);
+ return vbncmp_x(_jit, cc);
+}
+
+static jit_reloc_t
+vbncmp_d(jit_state_t *_jit, int cc, int32_t r0, int32_t r1)
+{
+ cmp_d(_jit, r0, r1);
+ return vbncmp_x(_jit, cc);
+}
+
+static jit_reloc_t
+bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbcmp_f(_jit, ARM_CC_MI, r0, r1);
+}
+
+static jit_reloc_t
+bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbcmp_d(_jit, ARM_CC_MI, r0, r1);
+}
+
+static jit_reloc_t
+bler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbcmp_f(_jit, ARM_CC_LS, r0, r1);
+}
+
+static jit_reloc_t
+bler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbcmp_d(_jit, ARM_CC_LS, r0, r1);
+}
+
+static jit_reloc_t
+beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbcmp_f(_jit, ARM_CC_EQ, r0, r1);
+}
+
+static jit_reloc_t
+beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbcmp_d(_jit, ARM_CC_EQ, r0, r1);
+}
+
+static jit_reloc_t
+bger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbcmp_f(_jit, ARM_CC_GE, r0, r1);
+}
+
+static jit_reloc_t
+bger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbcmp_d(_jit, ARM_CC_GE, r0, r1);
+}
+
+static jit_reloc_t
+bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbcmp_f(_jit, ARM_CC_GT, r0, r1);
+}
+
+static jit_reloc_t
+bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbcmp_d(_jit, ARM_CC_GT, r0, r1);
+}
+
+static jit_reloc_t
+bner_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbcmp_f(_jit, ARM_CC_NE, r0, r1);
+}
+
+static jit_reloc_t
+bner_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbcmp_d(_jit, ARM_CC_NE, r0, r1);
+}
+
+static jit_reloc_t
+bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbncmp_f(_jit, ARM_CC_GE, r0, r1);
+}
+
+static jit_reloc_t
+bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbncmp_d(_jit, ARM_CC_GE, r0, r1);
+}
+
+static jit_reloc_t
+bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbncmp_f(_jit, ARM_CC_GT, r0, r1);
+}
+
+static jit_reloc_t
+bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbncmp_d(_jit, ARM_CC_GT, r0, r1);
+}
+
+static jit_reloc_t
+bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbcmp_f(_jit, ARM_CC_HI, r0, r1);
+}
+
+static jit_reloc_t
+bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbcmp_d(_jit, ARM_CC_HI, r0, r1);
+}
+
+static jit_reloc_t
+bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbcmp_f(_jit, ARM_CC_VC, r0, r1);
+}
+
+static jit_reloc_t
+bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbcmp_d(_jit, ARM_CC_VC, r0, r1);
+}
+
+static jit_reloc_t
+bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbcmp_f(_jit, ARM_CC_VS, r0, r1);
+}
+
+static jit_reloc_t
+bunordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return vbcmp_d(_jit, ARM_CC_VS, r0, r1);
+}
+
+static jit_reloc_t
+buneqr_x(jit_state_t *_jit)
+{
+ VMRS(_jit);
+ jit_reloc_t a = T2_CC_B(_jit, ARM_CC_VS);
+ jit_reloc_t b = T2_CC_B(_jit, ARM_CC_NE);
+ jit_patch_here(_jit, a);
+ jit_reloc_t ret = T2_B(_jit);
+ jit_patch_here(_jit, b);
+ return ret;
+}
+
+static jit_reloc_t
+buneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ cmp_f(_jit, r0, r1);
+ return buneqr_x(_jit);
+}
+
+static jit_reloc_t
+buneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ cmp_d(_jit, r0, r1);
+ return buneqr_x(_jit);
+}
+
+static jit_reloc_t
+bunger_x(jit_state_t *_jit)
+{
+ VMRS(_jit);
+ jit_reloc_t a = T2_CC_B(_jit, ARM_CC_MI);
+ jit_reloc_t ret = T2_CC_B(_jit, ARM_CC_HS);
+ jit_patch_here(_jit, a);
+ return ret;
+}
+
+static jit_reloc_t
+bunger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ cmp_f(_jit, r0, r1);
+ return bunger_x(_jit);
+}
+
+static jit_reloc_t
+bunger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ cmp_d(_jit, r0, r1);
+ return bunger_x(_jit);
+}
+
+static jit_reloc_t
+bltgtr_x(jit_state_t *_jit)
+{
+ VMRS(_jit);
+ jit_reloc_t a = T2_CC_B(_jit, ARM_CC_VS);
+ jit_reloc_t b = T2_CC_B(_jit, ARM_CC_EQ);
+ jit_reloc_t ret = T2_B(_jit);
+ jit_patch_here(_jit, a);
+ jit_patch_here(_jit, b);
+ return ret;
+}
+
+static jit_reloc_t
+bltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ cmp_f(_jit, r0, r1);
+ return bltgtr_x(_jit);
+}
+
+static jit_reloc_t
+bltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ cmp_d(_jit, r0, r1);
+ return bltgtr_x(_jit);
+}
+
+static void
+ldr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ VLDR_F32(_jit, r0,r1,0);
+}
+
+static void
+ldr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ VLDR_F64(_jit, r0,r1,0);
+}
+
+static void
+str_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ VSTR_F32(_jit, r1,r0,0);
+}
+
+static void
+str_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ VSTR_F64(_jit, r1,r0,0);
+}
+
+static void
+movr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (r0 != r1)
+ VMOV_F32(_jit, r0, r1);
+}
+
+static void
+movr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (r0 != r1)
+ VMOV_F64(_jit, r0, r1);
+}
+
+static int
+encode_vfp_double(int mov, int inv, unsigned lo, unsigned hi)
+{
+ int code, mode, imm, mask;
+
+ if (hi != lo) {
+ if (mov && !inv) {
+ /* (I64)
+ * aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh
+ */
+ for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
+ imm = lo & mask;
+ if (imm != mask && imm != 0)
+ goto fail;
+ imm = hi & mask;
+ if (imm != mask && imm != 0)
+ goto fail;
+ }
+ mode = 0xe20;
+ imm = (((hi & 0x80000000) >> 24) | ((hi & 0x00800000) >> 17) |
+ ((hi & 0x00008000) >> 10) | ((hi & 0x00000080) >> 3) |
+ ((lo & 0x80000000) >> 28) | ((lo & 0x00800000) >> 21) |
+ ((lo & 0x00008000) >> 14) | ((lo & 0x00000080) >> 7));
+ goto success;
+ }
+ goto fail;
+ }
+ /* (I32)
+ * 00000000 00000000 00000000 abcdefgh
+ * 00000000 00000000 abcdefgh 00000000
+ * 00000000 abcdefgh 00000000 00000000
+ * abcdefgh 00000000 00000000 00000000 */
+ for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
+ if ((lo & mask) == lo) {
+ imm = lo >> (mode << 3);
+ mode <<= 9;
+ goto success;
+ }
+ }
+ /* (I16)
+ * 00000000 abcdefgh 00000000 abcdefgh
+ * abcdefgh 00000000 abcdefgh 00000000 */
+ for (mode = 0, mask = 0xff; mode < 2; mask <<= 8, mode++) {
+ if ((lo & mask) && ((lo & (mask << 16)) >> 16) == (lo & mask)) {
+ imm = lo >> (mode << 3);
+ mode = 0x800 | (mode << 9);
+ goto success;
+ }
+ }
+ if (mov) {
+ /* (I32)
+ * 00000000 00000000 abcdefgh 11111111
+ * 00000000 abcdefgh 11111111 11111111 */
+ for (mode = 0, mask = 0xff; mode < 2;
+ mask = (mask << 8) | 0xff, mode++) {
+ if ((lo & mask) == mask &&
+ !((lo & ~mask) >> 8) &&
+ (imm = lo >> (8 + (mode << 8)))) {
+ mode = 0xc00 | (mode << 8);
+ goto success;
+ }
+ }
+ if (!inv) {
+ /* (F32)
+ * aBbbbbbc defgh000 00000000 00000000
+ * from the ARM Architecture Reference Manual:
+ * In this entry, B = NOT(b). The bit pattern represents the
+ * floating-point number (-1)^s* 2^exp * mantissa, where
+ * S = UInt(a),
+ * exp = UInt(NOT(b):c:d)-3 and
+ * mantissa = (16+UInt(e:f:g:h))/16. */
+ if ((lo & 0x7ffff) == 0 &&
+ (((lo & 0x7e000000) == 0x3e000000) ||
+ ((lo & 0x7e000000) == 0x40000000))) {
+ mode = 0xf00;
+ imm = ((lo >> 24) & 0x80) | ((lo >> 19) & 0x7f);
+ goto success;
+ }
+ }
+ }
+
+fail:
+ /* need another approach (load from memory, move from arm register, etc) */
+ return -1;
+
+success:
+ code = inv ? ARM_VMVNI : ARM_VMOVI;
+ switch ((mode & 0xf00) >> 8) {
+ case 0x0: case 0x2: case 0x4: case 0x6:
+ case 0x8: case 0xa:
+ if (inv) mode |= 0x20;
+ if (!mov) mode |= 0x100;
+ break;
+ case 0x1: case 0x3: case 0x5: case 0x7:
+ /* should actually not reach here */
+ ASSERT(!inv);
+ case 0x9: case 0xb:
+ ASSERT(!mov);
+ break;
+ case 0xc: case 0xd:
+ /* should actually not reach here */
+ ASSERT(inv);
+ case 0xe:
+ ASSERT(mode & 0x20);
+ ASSERT(mov && !inv);
+ break;
+ default:
+ ASSERT(!(mode & 0x20));
+ break;
+ }
+ imm = ((imm & 0x80) << 17) | ((imm & 0x70) << 12) | (imm & 0x0f);
+ code |= mode | imm;
+
+ if (code & 0x1000000)
+ code |= 0xff000000;
+ else
+ code |= 0xef000000;
+
+ return code;
+}
+
+static void
+movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t i0)
+{
+ union { int32_t i; jit_float32_t f; } u = { .f = i0 };
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), u.i);
+ VMOV_S_A(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t i0)
+{
+ union { int32_t i[2]; jit_float64_t d; } u = { .d = i0 };
+ int32_t code;
+ if ((code = encode_vfp_double(1, 0, u.i[0], u.i[1])) != -1 ||
+ (code = encode_vfp_double(1, 1, ~u.i[0], ~u.i[1])) != -1)
+ VIMM(_jit, code, r0);
+ else {
+ jit_gpr_t rg0 = get_temp_gpr(_jit);
+ jit_gpr_t rg1 = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(rg0), u.i[0]);
+ movi(_jit, jit_gpr_regno(rg1), u.i[1]);
+ VMOV_D_AA(_jit, r0, jit_gpr_regno(rg0), jit_gpr_regno(rg1));
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+extr_d_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ VCVT_F64_F32(_jit, r0, r1);
+}
+
+static void
+extr_f_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ VCVT_F32_F64(_jit, r0, r1);
+}
+
+static void
+extr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ VMOV_V_I32(_jit, r0, r1);
+ VCVT_F32_S32(_jit, r0, r0);
+}
+
+static void
+extr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ VMOV_V_I32(_jit, r0, r1);
+ VCVT_F64_S32(_jit, r0, r0);
+}
+
+static void
+truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_fpr_t reg = get_temp_fpr(_jit);
+ VCVT_S32_F32(_jit, jit_fpr_regno(reg), r1);
+ VMOV_A_S32(_jit, r0, jit_fpr_regno(reg));
+ unget_temp_fpr(_jit);
+}
+
+static void
+truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_fpr_t reg = get_temp_fpr(_jit);
+ VCVT_S32_F64(_jit, jit_fpr_regno(reg), r1);
+ VMOV_A_S32(_jit, r0, jit_fpr_regno(reg));
+ unget_temp_fpr(_jit);
+}
+
+static void
+ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t gpr = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(gpr), i0);
+ VLDR_F32(_jit, r0, jit_gpr_regno(gpr), 0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(reg), r1, r2);
+ VLDR_F32(_jit, r0, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(reg), r1, r2);
+ VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 >= 0) {
+ ASSERT(!(i0 & 3));
+ if (i0 < 1024)
+ VLDR_F32(_jit, r0, r1, i0 >> 2);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(reg), r1, i0);
+ VLDR_F32(_jit, r0, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+ }
+ }
+ else {
+ i0 = -i0;
+ ASSERT(!(i0 & 3));
+ if (i0 < 1024)
+ VLDRN_F32(_jit, r0, r1, i0 >> 2);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ subi(_jit, jit_gpr_regno(reg), r1, i0);
+ VLDR_F32(_jit, r0, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+ }
+ }
+}
+
+static void
+ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 >= 0) {
+ ASSERT(!(i0 & 3));
+ if (i0 < 1024)
+ VLDR_F64(_jit, r0, r1, i0 >> 2);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(reg), r1, i0);
+ VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+ }
+ }
+ else {
+ i0 = -i0;
+ ASSERT(!(i0 & 3));
+ if (i0 < 1024)
+ VLDRN_F64(_jit, r0, r1, i0 >> 2);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ subi(_jit, jit_gpr_regno(reg), r1, i0);
+ VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+ }
+ }
+}
+
+static void
+sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ VSTR_F32(_jit, r0, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ VSTR_F64(_jit, r0, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(reg), r0, r1);
+ VSTR_F32(_jit, r2, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ addr(_jit, jit_gpr_regno(reg), r0, r1);
+ VSTR_F64(_jit, r2, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+}
+
+static void
+stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (i0 >= 0) {
+ ASSERT(!(i0 & 3));
+ if (i0 < 1024)
+ VSTR_F32(_jit, r1, r0, i0 >> 2);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(reg), r0, i0);
+ VSTR_F32(_jit, r1, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+ }
+ }
+ else {
+ i0 = -i0;
+ ASSERT(!(i0 & 3));
+ if (i0 < 1024)
+ VSTRN_F32(_jit, r1, r0, i0 >> 2);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ subi(_jit, jit_gpr_regno(reg), r0, i0);
+ VSTR_F32(_jit, r1, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+ }
+ }
+}
+
+static void
+stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (i0 >= 0) {
+ ASSERT(!(i0 & 3));
+ if (i0 < 0124)
+ VSTR_F64(_jit, r1, r0, i0 >> 2);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ addi(_jit, jit_gpr_regno(reg), r0, i0);
+ VSTR_F64(_jit, r1, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+ }
+ }
+ else {
+ i0 = -i0;
+ ASSERT(!(i0 & 3));
+ if (i0 < 1024)
+ VSTRN_F64(_jit, r1, r0, i0 >> 2);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ subi(_jit, jit_gpr_regno(reg), r0, i0);
+ VSTR_F64(_jit, r1, jit_gpr_regno(reg), 0);
+ unget_temp_gpr(_jit);
+ }
+ }
+}
+
+static void
+retr_d(jit_state_t *_jit, int32_t r)
+{
+ movr_d(_jit, jit_fpr_regno(_D0), r);
+ ret(_jit);
+}
+
+static void
+retr_f(jit_state_t *_jit, int32_t r)
+{
+ movr_f(_jit, jit_fpr_regno(_S0), r);
+ ret(_jit);
+}
+
+static void
+retval_f(jit_state_t *_jit, int32_t r0)
+{
+ movr_f(_jit, r0, jit_fpr_regno(_S0));
+}
+
+static void
+retval_d(jit_state_t *_jit, int32_t r0)
+{
+ movr_d(_jit, r0, jit_fpr_regno(_D0));
+}
diff --git a/deps/lightening/lightening/arm.c b/deps/lightening/lightening/arm.c
new file mode 100644
index 0000000..f44f04d
--- /dev/null
+++ b/deps/lightening/lightening/arm.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2012-2019 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+/*
+ * Types
+ */
+typedef union _jit_thumb_t {
+ int32_t i;
+ int16_t s[2];
+} jit_thumb_t;
+
+/* libgcc */
+extern void __clear_cache(void *, void *);
+
+#include "arm-cpu.c"
+#include "arm-vfp.c"
+
+static const jit_gpr_t abi_gpr_args[] = {
+ _R0, _R1, _R2, _R3
+};
+static const int abi_gpr_arg_count = sizeof(abi_gpr_args) / sizeof(abi_gpr_args[0]);
+
+struct abi_arg_iterator
+{
+ const jit_operand_t *args;
+ size_t argc;
+
+ size_t arg_idx;
+ size_t gpr_idx;
+ uint32_t vfp_used_registers;
+ size_t stack_size;
+ size_t stack_padding;
+};
+
+static size_t page_size;
+
+jit_bool_t
+jit_get_cpu(void)
+{
+ page_size = sysconf(_SC_PAGE_SIZE);
+ // FIXME check version, thumb, hardware fp support
+ return 1;
+}
+
+jit_bool_t
+jit_init(jit_state_t *_jit)
+{
+ return 1;
+}
+
+static size_t
+jit_initial_frame_size (void)
+{
+ return 0;
+}
+
+static void
+reset_abi_arg_iterator(struct abi_arg_iterator *iter, size_t argc,
+ const jit_operand_t *args)
+{
+ memset(iter, 0, sizeof *iter);
+ iter->argc = argc;
+ iter->args = args;
+}
+
+static void
+next_abi_arg(struct abi_arg_iterator *iter, jit_operand_t *arg)
+{
+ ASSERT(iter->arg_idx < iter->argc);
+ enum jit_operand_abi abi = iter->args[iter->arg_idx].abi;
+ iter->arg_idx++;
+ if (is_gpr_arg(abi) && iter->gpr_idx < abi_gpr_arg_count) {
+ *arg = jit_operand_gpr (abi, abi_gpr_args[iter->gpr_idx++]);
+ return;
+ }
+ if (is_fpr_arg(abi)) {
+ // The ARM VFP ABI passes floating-point arguments in d0-d7
+ // (s0-s15), and allows for "back-filling". Say you have a
+ // function:
+ //
+ // void f(float a, double b, float c);
+ //
+ // A gets allocated to s0, then b to d1 (which aliases s2+s3), then
+ // c to s1.
+ uint32_t width = abi == JIT_OPERAND_ABI_FLOAT ? 1 : 2;
+ uint32_t mask = (1 << width) - 1;
+ for (size_t i = 0; i < 16; i += width) {
+ if ((iter->vfp_used_registers & (mask << i)) == 0) {
+ iter->vfp_used_registers |= (mask << i);
+ *arg = jit_operand_fpr (abi, JIT_FPR(i));
+ return;
+ }
+ }
+ }
+
+ // doubles passed on the stack need to be aligned up to the next 8 byte boundary
+ if (abi == JIT_OPERAND_ABI_DOUBLE)
+ iter->stack_size = jit_align_up(iter->stack_size, 8);
+
+ *arg = jit_operand_mem (abi, JIT_SP, iter->stack_size);
+
+ if (abi == JIT_OPERAND_ABI_DOUBLE)
+ iter->stack_size += 8;
+ else
+ iter->stack_size += 4;
+}
+
+static void
+jit_flush(void *fptr, void *tptr)
+{
+ jit_word_t f = (jit_word_t)fptr & -page_size;
+ jit_word_t t = (((jit_word_t)tptr) + page_size - 1) & -page_size;
+ __clear_cache((void *)f, (void *)t);
+}
+
+static inline size_t
+jit_stack_alignment(void)
+{
+ return 8;
+}
+
+static void
+jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc, jit_pointer_t addr)
+{
+}
+
+static void*
+bless_function_pointer(void *ptr)
+{
+ // Set low bit to mark as thumb mode.
+ return (void*) (((uintptr_t)ptr) | 1);
+}
diff --git a/deps/lightening/lightening/arm.h b/deps/lightening/lightening/arm.h
new file mode 100644
index 0000000..6131330
--- /dev/null
+++ b/deps/lightening/lightening/arm.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2012-2019 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_arm_h
+#define _jit_arm_h
+
+
+#define JIT_NEEDS_LITERAL_POOL 1
+
+#define _R0 JIT_GPR(0)
+#define _R1 JIT_GPR(1)
+#define _R2 JIT_GPR(2)
+#define _R3 JIT_GPR(3)
+#define _R4 JIT_GPR(4)
+#define _R5 JIT_GPR(5)
+#define _R6 JIT_GPR(6)
+#define _R7 JIT_GPR(7)
+#define _R8 JIT_GPR(8)
+#define _R9 JIT_GPR(9)
+#define _R10 JIT_GPR(10)
+#define _R11 JIT_GPR(11)
+#define _R12 JIT_GPR(12)
+#define _R13 JIT_GPR(13)
+#define _R14 JIT_GPR(14)
+#define _R15 JIT_GPR(15)
+
+#define _D0 JIT_FPR(0)
+#define _D1 JIT_FPR(2)
+#define _D2 JIT_FPR(4)
+#define _D3 JIT_FPR(6)
+#define _D4 JIT_FPR(8)
+#define _D5 JIT_FPR(10)
+#define _D6 JIT_FPR(12)
+#define _D7 JIT_FPR(14)
+#define _D8 JIT_FPR(16)
+#define _D9 JIT_FPR(18)
+#define _D10 JIT_FPR(20)
+#define _D11 JIT_FPR(22)
+#define _D12 JIT_FPR(24)
+#define _D13 JIT_FPR(26)
+#define _D14 JIT_FPR(28)
+#define _D15 JIT_FPR(30)
+
+#define _S0 JIT_FPR(0)
+#define _S1 JIT_FPR(1)
+#define _S2 JIT_FPR(2)
+#define _S3 JIT_FPR(3)
+#define _S4 JIT_FPR(4)
+#define _S5 JIT_FPR(5)
+#define _S6 JIT_FPR(6)
+#define _S7 JIT_FPR(7)
+#define _S8 JIT_FPR(8)
+#define _S9 JIT_FPR(9)
+#define _S10 JIT_FPR(10)
+#define _S11 JIT_FPR(11)
+#define _S12 JIT_FPR(12)
+#define _S13 JIT_FPR(13)
+#define _S14 JIT_FPR(14)
+#define _S15 JIT_FPR(15)
+#define _S16 JIT_FPR(16)
+#define _S17 JIT_FPR(17)
+#define _S18 JIT_FPR(18)
+#define _S19 JIT_FPR(19)
+#define _S20 JIT_FPR(20)
+#define _S21 JIT_FPR(21)
+#define _S22 JIT_FPR(22)
+#define _S23 JIT_FPR(23)
+#define _S24 JIT_FPR(24)
+#define _S25 JIT_FPR(25)
+#define _S26 JIT_FPR(26)
+#define _S27 JIT_FPR(27)
+#define _S28 JIT_FPR(28)
+#define _S29 JIT_FPR(29)
+#define _S30 JIT_FPR(30)
+#define _S31 JIT_FPR(31)
+
+#define JIT_R0 _R0
+#define JIT_R1 _R1
+#define JIT_R2 _R2
+#define JIT_R3 _R3
+#define JIT_TMP0 _R12
+
+#define JIT_V0 _R4
+#define JIT_V1 _R5
+#define JIT_V2 _R6
+#define JIT_TMP1 _R7
+#define JIT_V3 _R8
+#define JIT_V4 _R9
+#define JIT_V5 _R10
+#define JIT_V6 _R11
+
+#define JIT_LR _R14
+#define JIT_SP _R13
+#define _LR _R14
+#define _PC _R15
+
+#define JIT_F0 _D0
+#define JIT_F1 _D1
+#define JIT_F2 _D2
+#define JIT_F3 _D3
+#define JIT_F4 _D4
+#define JIT_F5 _D5
+#define JIT_F6 _D6
+#define JIT_F7 _D7
+
+#define JIT_VF0 _D8
+#define JIT_VF1 _D9
+#define JIT_VF2 _D10
+#define JIT_VF3 _D11
+#define JIT_VF4 _D12
+#define JIT_VF5 _D13
+#define JIT_VF6 _D14
+#define JIT_FTMP _D15
+
+#define JIT_PLATFORM_CALLEE_SAVE_GPRS _LR, JIT_TMP1
+#define JIT_PLATFORM_CALLEE_SAVE_FPRS JIT_FTMP
+
+
+#endif /* _jit_arm_h */
diff --git a/deps/lightening/lightening/endian.h b/deps/lightening/lightening/endian.h
new file mode 100644
index 0000000..3b34a15
--- /dev/null
+++ b/deps/lightening/lightening/endian.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2012-2019 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ * Andy Wingo
+ */
+
+#ifndef _jit_endian_h
+#define _jit_endian_h
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stddef.h>
+
+#ifndef __WORDSIZE
+# if defined(WORDSIZE) /* ppc darwin */
+# define __WORDSIZE WORDSIZE
+# elif defined(__SIZEOF_POINTER__) /* ppc aix */
+# define __WORDSIZE (__SIZEOF_POINTER__ << 3)
+# elif defined(_MIPS_SZPTR) /* mips irix */
+# if _MIPS_SZPTR == 32
+# define __WORDSIZE 32
+# else
+# define __WORDSIZE 64
+# endif
+# else /* From FreeBSD 9.1 stdint.h */
+# if defined(UINTPTR_MAX) && defined(UINT64_MAX) && \
+ (UINTPTR_MAX == UINT64_MAX)
+# define __WORDSIZE 64
+# else
+# define __WORDSIZE 32
+# endif
+# endif
+#endif
+#ifndef __LITTLE_ENDIAN
+# if defined(LITTLE_ENDIAN) /* ppc darwin */
+# define __LITTLE_ENDIAN LITTLE_ENDIAN
+# elif defined(__ORDER_LITTLE_ENDIAN__) /* ppc aix */
+# define __LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__
+# else
+# define __LITTLE_ENDIAN 1234
+# endif
+#endif
+#ifndef __BIG_ENDIAN
+# if defined(BIG_ENDIAN) /* ppc darwin */
+# define __BIG_ENDIAN BIG_ENDIAN
+# elif defined(__ORDER_BIG_ENDIAN__) /* ppc aix */
+# define __BIG_ENDIAN __ORDER_BIG_ENDIAN__
+# else
+# define __BIG_ENDIAN 4321
+# endif
+#endif
+#ifndef __BYTE_ORDER
+# if defined(BYTE_ORDER) /* ppc darwin */
+# define __BYTE_ORDER BYTE_ORDER
+# elif defined(__BYTE_ORDER__) /* ppc aix */
+# define __BYTE_ORDER __BYTE_ORDER__
+# elif defined(__i386__) /* 32 bit x86 solaris */
+# define __BYTE_ORDER __LITTLE_ENDIAN
+# elif defined(__x86_64__) /* 64 bit x86 solaris */
+# define __BYTE_ORDER __LITTLE_ENDIAN
+# elif defined(__MIPSEB) /* mips irix */
+# define __BYTE_ORDER __BIG_ENDIAN
+# else
+# error cannot figure __BYTE_ORDER
+# endif
+#endif
+
+#if __WORDSIZE == 32
+#define CHOOSE_32_64(x, y) x
+#elif __WORDSIZE == 64
+#define CHOOSE_32_64(x, y) y
+#else
+#error unhandled __WORDSIZE
+#endif
+
+#define WHEN_64(x) CHOOSE_32_64(/**/, x)
+
+
+#endif /* _jit_endian_h */
diff --git a/deps/lightening/lightening/lightening.c b/deps/lightening/lightening/lightening.c
new file mode 100644
index 0000000..937fd14
--- /dev/null
+++ b/deps/lightening/lightening/lightening.c
@@ -0,0 +1,1762 @@
+/* Copyright (C) 2012-2020 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <assert.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <sys/mman.h>
+
+#include "../lightening.h"
+
+#define ASSERT(x) do { if (!(x)) abort(); } while (0)
+
+#if defined(__GNUC__)
+# define maybe_unused __attribute__ ((unused))
+# define UNLIKELY(exprn) __builtin_expect(exprn, 0)
+#else
+# define maybe_unused /**/
+# define UNLIKELY(exprn) exprn
+#endif
+
+union jit_pc
+{
+ uint8_t *uc;
+ uint16_t *us;
+ uint32_t *ui;
+ uint64_t *ul;
+ intptr_t w;
+ uintptr_t uw;
+};
+
+#ifdef JIT_NEEDS_LITERAL_POOL
+struct jit_literal_pool_entry
+{
+ jit_reloc_t reloc;
+ uintptr_t value;
+};
+
+struct jit_literal_pool
+{
+ uint32_t deadline;
+ uint32_t size;
+ uint32_t capacity;
+ struct jit_literal_pool_entry entries[];
+};
+#endif // JIT_NEEDS_LITERAL_POOL
+
+struct jit_state
+{
+ union jit_pc pc;
+ uint8_t *start;
+ uint8_t *last_instruction_start;
+ uint8_t *limit;
+ uint8_t temp_gpr_saved;
+ uint8_t temp_fpr_saved;
+ uint8_t overflow;
+ uint8_t emitting_data;
+ uint8_t preparing_call;
+ int frame_size; // Used to know when to align stack.
+#ifdef JIT_NEEDS_LITERAL_POOL
+ struct jit_literal_pool *pool;
+#endif
+ void* (*alloc)(size_t);
+ void (*free)(void*);
+};
+
+static jit_bool_t jit_get_cpu(void);
+static jit_bool_t jit_init(jit_state_t *);
+static void jit_flush(void *fptr, void *tptr);
+static void jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc,
+ jit_pointer_t addr);
+static void* bless_function_pointer(void *ptr);
+
+struct abi_arg_iterator;
+
+#ifdef JIT_NEEDS_LITERAL_POOL
+static struct jit_literal_pool* alloc_literal_pool(jit_state_t *_jit,
+ size_t capacity);
+static void reset_literal_pool(jit_state_t *_jit,
+ struct jit_literal_pool *pool);
+static jit_bool_t add_pending_literal(jit_state_t *_jit, jit_reloc_t src,
+ uint8_t max_offset_bits);
+static void remove_pending_literal(jit_state_t *_jit, jit_reloc_t src);
+static void patch_pending_literal(jit_state_t *_jit, jit_reloc_t src,
+ uintptr_t value);
+enum guard_pool { GUARD_NEEDED, NO_GUARD_NEEDED };
+static void emit_literal_pool(jit_state_t *_jit, enum guard_pool guard);
+
+static int32_t read_jmp_offset(uint32_t *loc);
+static int offset_in_jmp_range(ptrdiff_t offset, int flags);
+static void patch_jmp_offset(uint32_t *loc, ptrdiff_t offset);
+static void patch_veneer_jmp_offset(uint32_t *loc, ptrdiff_t offset);
+static int32_t read_jcc_offset(uint32_t *loc);
+static int offset_in_jcc_range(ptrdiff_t offset, int flags);
+static void patch_jcc_offset(uint32_t *loc, ptrdiff_t offset);
+static void patch_veneer_jcc_offset(uint32_t *loc, ptrdiff_t offset);
+static void patch_veneer(uint32_t *loc, jit_pointer_t addr);
+static int32_t read_load_from_pool_offset(uint32_t *loc);
+#endif
+
+#ifdef JIT_USE_IMMEDIATE_RELOC
+static void patch_immediate_reloc(uint32_t *loc, jit_pointer_t addr);
+#endif
+
+static jit_bool_t is_fpr_arg(enum jit_operand_abi arg);
+static jit_bool_t is_gpr_arg(enum jit_operand_abi arg);
+#if JIT_ASYMMETRIC_STACK
+static void reset_call_arg_iterator(struct abi_arg_iterator *iter, size_t argc,
+ const jit_operand_t *args);
+
+static void reset_load_arg_iterator(struct abi_arg_iterator *iter, size_t argc,
+ const jit_operand_t *args);
+#else
+static void reset_abi_arg_iterator(struct abi_arg_iterator *iter, size_t argc,
+ const jit_operand_t *args);
+#endif
+static void next_abi_arg(struct abi_arg_iterator *iter,
+ jit_operand_t *arg);
+
+jit_bool_t
+init_jit(void)
+{
+ return jit_get_cpu ();
+}
+
+jit_state_t *
+jit_new_state(void* (*alloc_fn)(size_t), void (*free_fn)(void*))
+{
+ if (!alloc_fn) alloc_fn = malloc;
+ if (!free_fn) free_fn = free;
+
+ jit_state_t *_jit = alloc_fn (sizeof (*_jit));
+ if (!_jit)
+ abort ();
+
+ memset(_jit, 0, sizeof (*_jit));
+ _jit->alloc = alloc_fn;
+ _jit->free = free_fn;
+
+ if (!jit_init (_jit)) {
+#ifdef JIT_NEEDS_LITERAL_POOL
+ free_fn (_jit->pool);
+#endif
+ free_fn (_jit);
+ return NULL;
+ }
+
+#ifdef JIT_NEEDS_LITERAL_POOL
+ _jit->pool = alloc_literal_pool(_jit, 0);
+#endif
+
+ return _jit;
+}
+
+void
+jit_destroy_state(jit_state_t *_jit)
+{
+#ifdef JIT_NEEDS_LITERAL_POOL
+ _jit->free (_jit->pool);
+#endif
+ _jit->free (_jit);
+}
+
+jit_pointer_t
+jit_address(jit_state_t *_jit)
+{
+ ASSERT (_jit->start);
+ jit_pointer_t ret = _jit->pc.uc;
+ return _jit->emitting_data ? ret : jit_address_to_function_pointer (ret);
+}
+
+void
+jit_begin(jit_state_t *_jit, uint8_t* buf, size_t length)
+{
+ ASSERT (!_jit->start);
+
+ _jit->pc.uc = _jit->start = buf;
+ _jit->limit = buf + length;
+ _jit->overflow = 0;
+ _jit->frame_size = 0;
+ _jit->emitting_data = 0;
+#if JIT_NEEDS_LITERAL_POOL
+ ASSERT(_jit->pool->size == 0);
+ _jit->pool->deadline = length;
+#endif
+}
+
+jit_bool_t
+jit_has_overflow(jit_state_t *_jit)
+{
+ ASSERT (_jit->start);
+ return _jit->overflow;
+}
+
+void
+jit_reset(jit_state_t *_jit)
+{
+ ASSERT (_jit);
+ _jit->pc.uc = _jit->start = _jit->limit = NULL;
+ _jit->overflow = 0;
+ _jit->frame_size = 0;
+ _jit->emitting_data = 0;
+#ifdef JIT_NEEDS_LITERAL_POOL
+ reset_literal_pool(_jit, _jit->pool);
+#endif
+}
+
+jit_function_pointer_t
+jit_address_to_function_pointer(jit_pointer_t p)
+{
+ return bless_function_pointer(p);
+}
+
+void*
+jit_end(jit_state_t *_jit, size_t *length)
+{
+#ifdef JIT_NEEDS_LITERAL_POOL
+ if (_jit->pool->size)
+ emit_literal_pool(_jit, NO_GUARD_NEEDED);
+#endif
+
+ uint8_t *start = _jit->start;
+ uint8_t *end = _jit->pc.uc;
+
+ if (length) {
+ *length = end - start;
+ }
+
+ if (_jit->overflow) {
+ jit_reset(_jit);
+ return NULL;
+ }
+
+ ASSERT(start);
+ ASSERT(start <= end);
+ ASSERT(end <= _jit->limit);
+ ASSERT(!_jit->emitting_data);
+
+ jit_flush (start, end);
+
+
+ _jit->pc.uc = _jit->start = _jit->limit = NULL;
+ _jit->overflow = 0;
+ _jit->frame_size = 0;
+#ifdef JIT_NEEDS_LITERAL_POOL
+ reset_literal_pool(_jit, _jit->pool);
+#endif
+
+ return jit_address_to_function_pointer(start);
+}
+
+static int
+is_power_of_two (unsigned x)
+{
+ return x && !(x & (x-1));
+}
+
+inline static jit_word_t
+jit_align_up(jit_word_t val, jit_uword_t a)
+{
+ if (!a)
+ return val;
+
+ jit_word_t rem = val % a;
+
+ if (rem == 0)
+ return val;
+
+ return val + a - rem;
+}
+
+inline static jit_word_t
+jit_align_down(jit_word_t val, jit_uword_t a)
+{
+ if (!a)
+ return val;
+
+ return val - (val % a);
+}
+
+static jit_gpr_t
+get_temp_gpr(jit_state_t *_jit)
+{
+ switch(_jit->temp_gpr_saved++)
+ {
+ case 0:
+ return JIT_TMP0;
+#ifdef JIT_TMP1
+ case 1:
+ return JIT_TMP1;
+#endif
+#ifdef JIT_TMP2
+ case 2:
+ return JIT_TMP2;
+#endif
+#ifdef JIT_TMP3
+ case 3:
+ return JIT_TMP3;
+#endif
+#ifdef JIT_TMP4
+ case 4:
+ return JIT_TMP4;
+#endif
+#ifdef JIT_TMP5
+ case 5:
+ return JIT_TMP5;
+#endif
+ default:
+ abort();
+ }
+}
+
+static jit_fpr_t
+get_temp_fpr(jit_state_t *_jit)
+{
+ switch(_jit->temp_fpr_saved++)
+ {
+ case 0:
+ return JIT_FTMP;
+ default:
+ abort();
+ }
+}
+
+static void
+unget_temp_fpr(jit_state_t *_jit)
+{
+ ASSERT(_jit->temp_fpr_saved);
+ _jit->temp_fpr_saved--;
+}
+
+static void
+unget_temp_gpr(jit_state_t *_jit)
+{
+ ASSERT(_jit->temp_gpr_saved);
+ _jit->temp_gpr_saved--;
+}
+
+static inline void emit_u8(jit_state_t *_jit, uint8_t u8) {
+ if (UNLIKELY(_jit->pc.uc + 1 > _jit->limit)) {
+ _jit->overflow = 1;
+ } else {
+ *_jit->pc.uc = u8;
+ }
+
+ _jit->pc.uc++;
+}
+
+static inline void emit_u16(jit_state_t *_jit, uint16_t u16) {
+ if (UNLIKELY(_jit->pc.us + 1 > (uint16_t*)_jit->limit)) {
+ _jit->overflow = 1;
+ } else {
+ *_jit->pc.us = u16;
+ }
+
+ _jit->pc.us++;
+}
+
+static inline void emit_u32(jit_state_t *_jit, uint32_t u32) {
+ if (UNLIKELY(_jit->pc.ui + 1 > (uint32_t*)_jit->limit)) {
+ _jit->overflow = 1;
+ } else {
+ *_jit->pc.ui = u32;
+ }
+
+ _jit->pc.ui++;
+}
+
+#ifdef JIT_NEEDS_LITERAL_POOL
+static inline void emit_u16_with_pool(jit_state_t *_jit, uint16_t u16) {
+ emit_u16(_jit, u16);
+ if (UNLIKELY(_jit->pc.uc >= _jit->start + _jit->pool->deadline))
+ emit_literal_pool(_jit, GUARD_NEEDED);
+}
+
+static inline void emit_u32_with_pool(jit_state_t *_jit, uint32_t u32) {
+ emit_u32(_jit, u32);
+ if (UNLIKELY(_jit->pc.uc >= _jit->start + _jit->pool->deadline))
+ emit_literal_pool(_jit, GUARD_NEEDED);
+}
+#endif
+
+static inline void emit_u64(jit_state_t *_jit, uint64_t u64) {
+ if (UNLIKELY(_jit->pc.ul + 1 > (uint64_t*)_jit->limit)) {
+ _jit->overflow = 1;
+ } else {
+ *_jit->pc.ul = u64;
+ }
+
+ _jit->pc.ul++;
+}
+
+static inline void emit_uintptr(jit_state_t *_jit, uintptr_t u) {
+ if (sizeof(u) == 4)
+ emit_u32 (_jit, u);
+ else
+ emit_u64 (_jit, u);
+}
+
+static inline jit_reloc_t
+jit_reloc(jit_state_t *_jit, enum jit_reloc_kind kind,
+ uint8_t inst_start_offset, uint8_t *loc, uint8_t *pc_base,
+ uint8_t rsh)
+{
+ jit_reloc_t ret;
+
+ ASSERT(rsh < __WORDSIZE);
+ ASSERT(pc_base >= (loc - inst_start_offset));
+ ASSERT(pc_base - (loc - inst_start_offset) < 256);
+
+ ret.kind = kind;
+ ret.inst_start_offset = inst_start_offset;
+ ret.pc_base_offset = pc_base - (loc - inst_start_offset);
+ ret.rsh = rsh;
+ ret.offset = loc - _jit->start;
+
+ return ret;
+}
+
+static inline jit_reloc_t
+emit_abs_reloc (jit_state_t *_jit, uint8_t inst_start)
+{
+ uint8_t *loc = _jit->pc.uc;
+ emit_uintptr (_jit, 0);
+ return jit_reloc(_jit, JIT_RELOC_ABSOLUTE, inst_start, loc, _jit->pc.uc, 0);
+}
+
+void
+jit_patch_here(jit_state_t *_jit, jit_reloc_t reloc)
+{
+ jit_patch_there (_jit, reloc, jit_address (_jit));
+}
+
+void
+jit_patch_there(jit_state_t* _jit, jit_reloc_t reloc, jit_pointer_t addr)
+{
+ if (_jit->overflow)
+ return;
+ union jit_pc loc;
+ uint8_t *end;
+ loc.uc = _jit->start + reloc.offset;
+ uint8_t *pc_base = loc.uc - reloc.inst_start_offset + reloc.pc_base_offset;
+ ptrdiff_t diff = (uint8_t*)addr - pc_base;
+ ASSERT((diff & ((1 << reloc.rsh) - 1)) == 0);
+ diff >>= reloc.rsh;
+#ifdef JIT_NEEDS_LITERAL_POOL
+ int flags = reloc.kind & ~JIT_RELOC_MASK;
+#endif
+
+ switch (reloc.kind & JIT_RELOC_MASK)
+ {
+ case JIT_RELOC_ABSOLUTE:
+ if (sizeof(diff) == 4)
+ *loc.ui = (uintptr_t)addr;
+ else
+ *loc.ul = (uintptr_t)addr;
+ end = loc.uc + sizeof(diff);
+ break;
+ case JIT_RELOC_REL8:
+ ASSERT (INT8_MIN <= diff && diff <= INT8_MAX);
+ *loc.uc = diff;
+ end = loc.uc + 1;
+ break;
+ case JIT_RELOC_REL16:
+ ASSERT (INT16_MIN <= diff && diff <= INT16_MAX);
+ *loc.us = diff;
+ end = loc.uc + 2;
+ break;
+#ifdef JIT_NEEDS_LITERAL_POOL
+ case JIT_RELOC_JMP_WITH_VENEER: {
+ int32_t voff = read_jmp_offset(loc.ui);
+ uint8_t *target = pc_base + (voff << reloc.rsh);
+ if (target == loc.uc) {
+ // PC still in range to reify direct branch.
+ if (offset_in_jmp_range(diff, flags)) {
+ // Target also in range: reify direct branch.
+ patch_jmp_offset(loc.ui, diff);
+ remove_pending_literal(_jit, reloc);
+ } else {
+ // Target out of range; branch to veneer.
+ patch_pending_literal(_jit, reloc, (uintptr_t) addr);
+ }
+ } else {
+ // Already emitted a veneer. In this case, patch the veneer
+ // directly.
+ patch_veneer((uint32_t *) target, addr);
+ }
+ return;
+ }
+ case JIT_RELOC_JCC_WITH_VENEER: {
+ int32_t voff = read_jcc_offset(loc.ui);
+ uint8_t *target = pc_base + (voff << reloc.rsh);
+ if (target == loc.uc) {
+ if (offset_in_jcc_range(diff, flags)) {
+ patch_jcc_offset(loc.ui, diff);
+ remove_pending_literal(_jit, reloc);
+ } else {
+ patch_pending_literal(_jit, reloc, (uintptr_t) addr);
+ }
+ } else {
+ patch_veneer((uint32_t *) target, addr);
+ }
+ return;
+ }
+ case JIT_RELOC_LOAD_FROM_POOL: {
+ int32_t voff = read_load_from_pool_offset(loc.ui);
+ uint8_t *target = pc_base + (voff << reloc.rsh);
+ if (target == loc.uc) {
+ patch_pending_literal(_jit, reloc, (uintptr_t) addr);
+ } else {
+ *(uintptr_t *) target = (uintptr_t) addr;
+ }
+ return;
+ }
+#endif
+#ifdef JIT_USE_IMMEDIATE_RELOC
+ case JIT_RELOC_IMMEDIATE: {
+ patch_immediate_reloc(loc.ui, addr);
+ return;
+ }
+#endif
+ case JIT_RELOC_REL32:
+ ASSERT (INT32_MIN <= diff && diff <= INT32_MAX);
+ *loc.ui = diff;
+ end = loc.uc + 4;
+ break;
+ case JIT_RELOC_REL64:
+ *loc.ul = diff;
+ end = loc.uc + 8;
+ break;
+ default:
+ abort ();
+ }
+
+ if (end == _jit->pc.uc)
+ jit_try_shorten (_jit, reloc, addr);
+}
+
+void
+jit_begin_data(jit_state_t *j, size_t max_size_or_zero)
+{
+#ifdef JIT_NEEDS_LITERAL_POOL
+ if (j->pool->size) {
+ uint8_t *deadline = j->start + j->pool->deadline;
+ // Emit a literal pool now if the data might overwrite the deadline.
+ // Emitting data won't add entries to the pool.
+ if (max_size_or_zero == 0 || j->pc.uc + max_size_or_zero >= deadline)
+ emit_literal_pool(j, NO_GUARD_NEEDED);
+ }
+#endif
+
+ ASSERT(!j->emitting_data);
+ j->emitting_data = 1;
+}
+
+void
+jit_end_data(jit_state_t *j)
+{
+ ASSERT(j->emitting_data);
+ j->emitting_data = 0;
+}
+
+void
+jit_emit_u8(jit_state_t *j, uint8_t u8)
+{
+ ASSERT(j->emitting_data);
+ emit_u8(j, u8);
+}
+
+void
+jit_emit_u16(jit_state_t *j, uint16_t u16)
+{
+ ASSERT(j->emitting_data);
+ emit_u16(j, u16);
+}
+
+void
+jit_emit_u32(jit_state_t *j, uint32_t u32)
+{
+ ASSERT(j->emitting_data);
+ emit_u32(j, u32);
+}
+
+void
+jit_emit_u64(jit_state_t *j, uint64_t u64)
+{
+ ASSERT(j->emitting_data);
+ emit_u64(j, u64);
+}
+
+jit_reloc_t
+jit_emit_addr(jit_state_t *j)
+{
+ ASSERT(j->emitting_data);
+ uint8_t inst_start = 0;
+ return emit_abs_reloc(j, inst_start);
+}
+
+#if defined(__i386__) || defined(__x86_64__)
+# include "x86.c"
+#elif defined(__mips__)
+# include "mips.c"
+#elif defined(__arm__)
+# include "arm.c"
+#elif defined(__ppc__) || defined(__powerpc__)
+# include "ppc.c"
+#elif defined(__aarch64__)
+# include "aarch64.c"
+#elif defined(__s390__) || defined(__s390x__)
+# include "s390.c"
+#endif
+
+#define JIT_IMPL_0(stem, ret) \
+ ret jit_##stem (jit_state_t* _jit) \
+ { \
+ return stem(_jit); \
+ }
+#define JIT_IMPL_1(stem, ret, ta) \
+ ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a) \
+ { \
+ return stem(_jit, unwrap_##ta(a)); \
+ }
+#define JIT_IMPL_2(stem, ret, ta, tb) \
+ ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b) \
+ { \
+ return stem(_jit, unwrap_##ta(a), unwrap_##tb(b)); \
+ }
+#define JIT_IMPL_3(stem, ret, ta, tb, tc) \
+ ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, jit_##tc##_t c) \
+ { \
+ return stem(_jit, unwrap_##ta(a), unwrap_##tb(b), unwrap_##tc(c)); \
+ }
+#define JIT_IMPL_4(stem, ret, ta, tb, tc, td) \
+ ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, jit_##tc##_t c, jit_##td##_t d) \
+ { \
+ return stem(_jit, unwrap_##ta(a), unwrap_##tb(b), unwrap_##tc(c), unwrap_##td(d)); \
+ }
+
+#define JIT_IMPL_RFF__(stem) JIT_IMPL_2(stem, jit_reloc_t, fpr, fpr)
+#define JIT_IMPL_RGG__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, gpr)
+#define JIT_IMPL_RG___(stem) JIT_IMPL_1(stem, jit_reloc_t, gpr)
+#define JIT_IMPL_RGi__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, imm)
+#define JIT_IMPL_RGu__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, uimm)
+#define JIT_IMPL_R____(stem) JIT_IMPL_0(stem, jit_reloc_t)
+#define JIT_IMPL__FFF_(stem) JIT_IMPL_3(stem, void, fpr, fpr, fpr)
+#define JIT_IMPL__FF__(stem) JIT_IMPL_2(stem, void, fpr, fpr)
+#define JIT_IMPL__FGG_(stem) JIT_IMPL_3(stem, void, fpr, gpr, gpr)
+#define JIT_IMPL__FG__(stem) JIT_IMPL_2(stem, void, fpr, gpr)
+#define JIT_IMPL__FGo_(stem) JIT_IMPL_3(stem, void, fpr, gpr, off)
+#define JIT_IMPL__F___(stem) JIT_IMPL_1(stem, void, fpr)
+#define JIT_IMPL__Fd__(stem) JIT_IMPL_2(stem, void, fpr, float64)
+#define JIT_IMPL__Ff__(stem) JIT_IMPL_2(stem, void, fpr, float32)
+#define JIT_IMPL__Fp__(stem) JIT_IMPL_2(stem, void, fpr, pointer)
+#define JIT_IMPL__GF__(stem) JIT_IMPL_2(stem, void, gpr, fpr)
+#define JIT_IMPL__GGF_(stem) JIT_IMPL_3(stem, void, gpr, gpr, fpr)
+#define JIT_IMPL__GGGG(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, gpr)
+#define JIT_IMPL__GGG_(stem) JIT_IMPL_3(stem, void, gpr, gpr, gpr)
+#define JIT_IMPL__GGGi(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, imm)
+#define JIT_IMPL__GGGu(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, uimm)
+#define JIT_IMPL__GG__(stem) JIT_IMPL_2(stem, void, gpr, gpr)
+#define JIT_IMPL__GGi_(stem) JIT_IMPL_3(stem, void, gpr, gpr, imm)
+#define JIT_IMPL__GGo_(stem) JIT_IMPL_3(stem, void, gpr, gpr, off)
+#define JIT_IMPL__GGu_(stem) JIT_IMPL_3(stem, void, gpr, gpr, uimm)
+#define JIT_IMPL__G___(stem) JIT_IMPL_1(stem, void, gpr)
+#define JIT_IMPL__Gi__(stem) JIT_IMPL_2(stem, void, gpr, imm)
+#define JIT_IMPL__Gp__(stem) JIT_IMPL_2(stem, void, gpr, pointer)
+#define JIT_IMPL______(stem) JIT_IMPL_0(stem, void)
+#define JIT_IMPL__i___(stem) JIT_IMPL_1(stem, void, imm)
+#define JIT_IMPL__oGF_(stem) JIT_IMPL_3(stem, void, off, gpr, fpr)
+#define JIT_IMPL__oGG_(stem) JIT_IMPL_3(stem, void, off, gpr, gpr)
+#define JIT_IMPL__pF__(stem) JIT_IMPL_2(stem, void, pointer, fpr)
+#define JIT_IMPL__pG__(stem) JIT_IMPL_2(stem, void, pointer, gpr)
+#define JIT_IMPL__p___(stem) JIT_IMPL_1(stem, void, pointer)
+#define JIT_IMPL__GGGo(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, off)
+#define JIT_IMPL__oGGG(stem) JIT_IMPL_4(stem, void, off, gpr, gpr, gpr)
+
+#define unwrap_gpr(r) jit_gpr_regno(r)
+#define unwrap_fpr(r) jit_fpr_regno(r)
+#define unwrap_imm(i) i
+#define unwrap_uimm(u) u
+#define unwrap_off(o) o
+#define unwrap_pointer(p) ((uintptr_t) p)
+#define unwrap_float32(f) f
+#define unwrap_float64(d) d
+
+#define IMPL_INSTRUCTION(kind, stem) JIT_IMPL_##kind(stem)
+FOR_EACH_INSTRUCTION(IMPL_INSTRUCTION)
+#ifdef JIT_PASS_DOUBLES_IN_GPR_PAIRS
+/* internal use only */
+static void jit_movr_d_ww(jit_state_t *_jit, jit_fpr_t f0, jit_gpr_t r0, jit_gpr_t r1);
+static void jit_movr_ww_d(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_fpr_t f0);
+static void jit_ldxi_ww(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1,
+ jit_gpr_t r2, jit_off_t o0);
+static void jit_stxi_ww(jit_state_t *_jit, jit_off_t o0, jit_gpr_t r0,
+ jit_gpr_t r1, jit_gpr_t r2);
+IMPL_INSTRUCTION(_FGG_, movr_d_ww)
+IMPL_INSTRUCTION(_GGF_, movr_ww_d)
+IMPL_INSTRUCTION(_GGGo, ldxi_ww)
+IMPL_INSTRUCTION(_oGGG, stxi_ww)
+#endif
+#ifdef JIT_PASS_FLOATS_IN_GPRS
+static void jit_movr_f_w(jit_state_t *_jit, jit_fpr_t f0, jit_gpr_t r0);
+static void jit_movr_w_f(jit_state_t *_jit, jit_gpr_t r0, jit_fpr_t f0);
+static void jit_ldxi_w(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1,
+ jit_off_t o0);
+static void jit_stxi_w(jit_state_t *_jit, jit_off_t o0, jit_gpr_t r0,
+ jit_gpr_t r1);
+IMPL_INSTRUCTION(_FG__, movr_f_w)
+IMPL_INSTRUCTION(_GF__, movr_w_f)
+IMPL_INSTRUCTION(_GGo_, ldxi_w)
+IMPL_INSTRUCTION(_oGG_, stxi_w)
+#endif
+#undef IMPL_INSTRUCTION
+
+void
+jit_align(jit_state_t *_jit, unsigned align)
+{
+ ASSERT (is_power_of_two (align));
+ uintptr_t here = _jit->pc.w;
+ uintptr_t there = (here + align - 1) & ~(align - 1);
+ if (there - here)
+ nop(_jit, there - here);
+}
+
+static jit_bool_t
+is_fpr_arg(enum jit_operand_abi arg)
+{
+ switch (arg)
+ {
+ case JIT_OPERAND_ABI_UINT8:
+ case JIT_OPERAND_ABI_INT8:
+ case JIT_OPERAND_ABI_UINT16:
+ case JIT_OPERAND_ABI_INT16:
+ case JIT_OPERAND_ABI_UINT32:
+ case JIT_OPERAND_ABI_INT32:
+ case JIT_OPERAND_ABI_UINT64:
+ case JIT_OPERAND_ABI_INT64:
+ case JIT_OPERAND_ABI_POINTER:
+ return 0;
+ case JIT_OPERAND_ABI_FLOAT:
+ case JIT_OPERAND_ABI_DOUBLE:
+ return 1;
+ default:
+ abort();
+ }
+}
+
+static jit_bool_t
+is_gpr_arg(enum jit_operand_abi arg)
+{
+ return !is_fpr_arg(arg);
+}
+
+static void
+abi_imm_to_gpr(jit_state_t *_jit, enum jit_operand_abi abi, jit_gpr_t dst,
+ jit_imm_t imm)
+{
+ switch (abi) {
+ case JIT_OPERAND_ABI_UINT8:
+ ASSERT(0 <= imm);
+ ASSERT(imm <= UINT8_MAX);
+ break;
+ case JIT_OPERAND_ABI_INT8:
+ ASSERT(INT8_MIN <= imm);
+ ASSERT(imm <= INT8_MAX);
+ break;
+ case JIT_OPERAND_ABI_UINT16:
+ ASSERT(0 <= imm);
+ ASSERT(imm <= UINT16_MAX);
+ break;
+ case JIT_OPERAND_ABI_INT16:
+ ASSERT(INT16_MIN <= imm);
+ ASSERT(imm <= INT16_MAX);
+ break;
+#if __WORDSIZE > 32
+ case JIT_OPERAND_ABI_UINT32:
+ ASSERT(0 <= imm);
+ ASSERT(imm <= UINT32_MAX);
+ break;
+ case JIT_OPERAND_ABI_INT32:
+ ASSERT(INT32_MIN <= imm);
+ ASSERT(imm <= INT32_MAX);
+ break;
+ case JIT_OPERAND_ABI_UINT64:
+ case JIT_OPERAND_ABI_INT64:
+ break;
+#else
+ case JIT_OPERAND_ABI_UINT32:
+ case JIT_OPERAND_ABI_INT32:
+ break;
+#endif
+ case JIT_OPERAND_ABI_POINTER:
+ break;
+ default:
+ abort();
+ }
+ jit_movi (_jit, dst, imm);
+}
+
+static void
+abi_gpr_to_mem_walign(jit_state_t *_jit, enum jit_operand_abi abi,
+ jit_gpr_t base, ptrdiff_t offset, jit_gpr_t src)
+{
+ // Invariant: GPR memory destination operand sizes are rounded up to words.
+ // True for ARM, AArch64, IA32, and X86-64. Some ABIs expect to be able to
+ // load operands from the stack via a full-word read, so we need to make sure
+ // we don't leave garbage in the high bytes of (for example) the stack slot
+ // for a uint8_t arg.
+ switch (abi) {
+ case JIT_OPERAND_ABI_UINT8:
+ case JIT_OPERAND_ABI_INT8:
+ jit_stxi(_jit, offset, base, src);
+ break;
+ case JIT_OPERAND_ABI_UINT16:
+ case JIT_OPERAND_ABI_INT16:
+ jit_stxi(_jit, offset, base, src);
+ break;
+ case JIT_OPERAND_ABI_UINT32:
+ case JIT_OPERAND_ABI_INT32:
+#if __WORDSIZE == 32
+ case JIT_OPERAND_ABI_POINTER:
+#endif
+ jit_stxi(_jit, offset, base, src);
+ break;
+#if __WORDSIZE == 64
+ case JIT_OPERAND_ABI_UINT64:
+ case JIT_OPERAND_ABI_INT64:
+ case JIT_OPERAND_ABI_POINTER:
+ jit_stxi_l(_jit, offset, base, src);
+ break;
+#endif
+#if JIT_PASS_FLOATS_IN_GPRS
+ case JIT_OPERAND_ABI_FLOAT:
+ jit_stxi_w(_jit, offset, base, src);
+ break;
+#endif
+ default:
+ abort();
+ }
+}
+
+static void
+abi_gpr_to_mem_nalign(jit_state_t *_jit, enum jit_operand_abi abi,
+ jit_gpr_t base, ptrdiff_t offset, jit_gpr_t src)
+{
+ switch (abi) {
+ case JIT_OPERAND_ABI_UINT8:
+ case JIT_OPERAND_ABI_INT8:
+ jit_stxi_c(_jit, offset, base, src);
+ break;
+ case JIT_OPERAND_ABI_UINT16:
+ case JIT_OPERAND_ABI_INT16:
+ jit_stxi_s(_jit, offset, base, src);
+ break;
+#if __WORDSIZE == 32
+ case JIT_OPERAND_ABI_UINT32:
+ case JIT_OPERAND_ABI_POINTER:
+#endif
+ case JIT_OPERAND_ABI_INT32:
+ jit_stxi_i(_jit, offset, base, src);
+ break;
+#if __WORDSIZE == 64
+ case JIT_OPERAND_ABI_UINT32:
+ jit_stxi_i(_jit, offset, base, src);
+ break;
+ case JIT_OPERAND_ABI_UINT64:
+ case JIT_OPERAND_ABI_POINTER:
+ case JIT_OPERAND_ABI_INT64:
+ jit_stxi_l(_jit, offset, base, src);
+ break;
+#endif
+#if JIT_PASS_FLOATS_IN_GPRS
+ case JIT_OPERAND_ABI_FLOAT:
+ jit_stxi_w(_jit, offset, base, src);
+ break;
+#endif
+ default:
+ abort();
+ }
+}
+
+static void
+abi_gpr_to_mem(jit_state_t *_jit, enum jit_operand_abi abi,
+ jit_gpr_t base, ptrdiff_t offset, jit_gpr_t src)
+{
+ if (JIT_CALL_STACK_ALIGN_WORD && _jit->preparing_call)
+ abi_gpr_to_mem_walign(_jit, abi, base, offset, src);
+ else
+ abi_gpr_to_mem_nalign(_jit, abi, base, offset, src);
+}
+
+static void
+abi_fpr_to_mem(jit_state_t *_jit, enum jit_operand_abi abi,
+ jit_gpr_t base, ptrdiff_t offset, jit_fpr_t src)
+{
+ switch (abi) {
+ case JIT_OPERAND_ABI_FLOAT:
+ jit_stxi_f(_jit, offset, base, src);
+ break;
+ case JIT_OPERAND_ABI_DOUBLE:
+ jit_stxi_d(_jit, offset, base, src);
+ break;
+ default:
+ abort();
+ }
+}
+
+static void
+abi_mem_to_gpr(jit_state_t *_jit, enum jit_operand_abi abi,
+ jit_gpr_t dst, jit_gpr_t base, ptrdiff_t offset)
+{
+ switch (abi) {
+ case JIT_OPERAND_ABI_UINT8:
+ jit_ldxi_uc(_jit, dst, base, offset);
+ break;
+ case JIT_OPERAND_ABI_INT8:
+ jit_ldxi_c(_jit, dst, base, offset);
+ break;
+ case JIT_OPERAND_ABI_UINT16:
+ jit_ldxi_us(_jit, dst, base, offset);
+ break;
+ case JIT_OPERAND_ABI_INT16:
+ jit_ldxi_s(_jit, dst, base, offset);
+ break;
+#if __WORDSIZE == 32
+ case JIT_OPERAND_ABI_UINT32:
+ case JIT_OPERAND_ABI_POINTER:
+#endif
+ case JIT_OPERAND_ABI_INT32:
+ jit_ldxi_i(_jit, dst, base, offset);
+ break;
+#if __WORDSIZE == 64
+ case JIT_OPERAND_ABI_UINT32:
+ jit_ldxi_ui(_jit, dst, base, offset);
+ break;
+ case JIT_OPERAND_ABI_UINT64:
+ case JIT_OPERAND_ABI_POINTER:
+ case JIT_OPERAND_ABI_INT64:
+ jit_ldxi_l(_jit, dst, base, offset);
+ break;
+#endif
+#if JIT_PASS_FLOATS_IN_GPRS
+ case JIT_OPERAND_ABI_FLOAT:
+ jit_ldxi_w(_jit, dst, base, offset);
+ break;
+#endif
+ default:
+ abort();
+ }
+}
+
+static void
+abi_mem_to_fpr(jit_state_t *_jit, enum jit_operand_abi abi,
+ jit_fpr_t dst, jit_gpr_t base, ptrdiff_t offset)
+{
+ switch (abi) {
+ case JIT_OPERAND_ABI_FLOAT:
+ jit_ldxi_f(_jit, dst, base, offset);
+ break;
+ case JIT_OPERAND_ABI_DOUBLE:
+ jit_ldxi_d(_jit, dst, base, offset);
+ break;
+ default:
+ abort();
+ }
+}
+
+static void
+abi_imm_to_mem(jit_state_t *_jit, enum jit_operand_abi abi, jit_gpr_t base,
+ ptrdiff_t offset, jit_imm_t imm)
+{
+ ASSERT(!is_fpr_arg(abi));
+
+ jit_gpr_t tmp = get_temp_gpr(_jit);
+ abi_imm_to_gpr(_jit, abi, tmp, imm);
+ abi_gpr_to_mem(_jit, abi, base, offset, tmp);
+ unget_temp_gpr(_jit);
+}
+
+static void
+abi_mem_to_mem(jit_state_t *_jit, enum jit_operand_abi abi, jit_gpr_t base,
+ ptrdiff_t offset, jit_gpr_t src_base, ptrdiff_t src_offset)
+{
+ if (is_gpr_arg (abi)) {
+ jit_gpr_t tmp = get_temp_gpr(_jit);
+ abi_mem_to_gpr(_jit, abi, tmp, src_base, src_offset);
+ abi_gpr_to_mem(_jit, abi, base, offset, tmp);
+ unget_temp_gpr(_jit);
+ } else {
+ jit_fpr_t tmp = get_temp_fpr(_jit);
+ abi_mem_to_fpr(_jit, abi, tmp, src_base, src_offset);
+ abi_fpr_to_mem(_jit, abi, base, offset, tmp);
+ unget_temp_fpr(_jit);
+ }
+}
+
+#define MOVE_KIND(a, b) ((((int) a) << 4) | ((int) b))
+
+#define MOVE_KIND_ENUM(a, b) \
+ MOVE_##a##_TO_##b = MOVE_KIND(JIT_OPERAND_KIND_##a, JIT_OPERAND_KIND_##b)
+enum move_kind {
+ MOVE_KIND_ENUM(IMM, GPR),
+ MOVE_KIND_ENUM(GPR, GPR),
+ MOVE_KIND_ENUM(MEM, GPR),
+ MOVE_KIND_ENUM(FPR, FPR),
+ MOVE_KIND_ENUM(MEM, FPR),
+ MOVE_KIND_ENUM(IMM, MEM),
+ MOVE_KIND_ENUM(GPR, MEM),
+ MOVE_KIND_ENUM(FPR, MEM),
+ MOVE_KIND_ENUM(MEM, MEM),
+#if JIT_PASS_DOUBLES_IN_GPR_PAIRS
+ MOVE_KIND_ENUM(FPR, GPR_PAIR),
+ MOVE_KIND_ENUM(GPR_PAIR, FPR),
+ MOVE_KIND_ENUM(MEM, GPR_PAIR),
+ MOVE_KIND_ENUM(GPR_PAIR, MEM),
+ /* needed to make sure nobody overwrites anything */
+ MOVE_KIND_ENUM(GPR, GPR_PAIR),
+ MOVE_KIND_ENUM(GPR_PAIR, GPR),
+#endif
+#if JIT_PASS_FLOATS_IN_GPRS
+ MOVE_KIND_ENUM(FPR, GPR),
+ MOVE_KIND_ENUM(GPR, FPR),
+#endif
+};
+#undef MOVE_KIND_ENUM
+
+static void
+move_operand(jit_state_t *_jit, jit_operand_t dst, jit_operand_t src)
+{
+ switch (MOVE_KIND (src.kind, dst.kind)) {
+ case MOVE_IMM_TO_GPR:
+ return abi_imm_to_gpr(_jit, src.abi, dst.loc.gpr.gpr, src.loc.imm);
+
+ case MOVE_GPR_TO_GPR:
+ return jit_movr(_jit, dst.loc.gpr.gpr, src.loc.gpr.gpr);
+
+ case MOVE_MEM_TO_GPR:
+ return abi_mem_to_gpr(_jit, src.abi, dst.loc.gpr.gpr, src.loc.mem.base,
+ src.loc.mem.offset);
+
+ case MOVE_FPR_TO_FPR:
+ ASSERT(src.abi == dst.abi);
+ if (src.abi == JIT_OPERAND_ABI_DOUBLE)
+ return jit_movr_d(_jit, dst.loc.fpr.fpr, src.loc.fpr.fpr);
+ else
+ return jit_movr_f(_jit, dst.loc.fpr.fpr, src.loc.fpr.fpr);
+
+ case MOVE_MEM_TO_FPR:
+ return abi_mem_to_fpr(_jit, src.abi, dst.loc.fpr.fpr, src.loc.mem.base,
+ src.loc.mem.offset);
+
+ case MOVE_IMM_TO_MEM:
+ return abi_imm_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
+ src.loc.imm);
+
+ case MOVE_GPR_TO_MEM:
+ return abi_gpr_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
+ src.loc.gpr.gpr);
+
+ case MOVE_FPR_TO_MEM:
+ return abi_fpr_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
+ src.loc.fpr.fpr);
+
+ case MOVE_MEM_TO_MEM:
+ return abi_mem_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
+ src.loc.mem.base, src.loc.mem.offset);
+
+#if JIT_PASS_DOUBLES_IN_GPR_PAIRS
+ case MOVE_GPR_PAIR_TO_FPR:
+ ASSERT(dst.abi == JIT_OPERAND_ABI_DOUBLE);
+ return jit_movr_d_ww(_jit, dst.loc.fpr.fpr, src.loc.gpr_pair.l, src.loc.gpr_pair.h);
+
+ case MOVE_FPR_TO_GPR_PAIR:
+ ASSERT(src.abi == JIT_OPERAND_ABI_DOUBLE);
+ return jit_movr_ww_d(_jit, dst.loc.gpr_pair.l, dst.loc.gpr_pair.h, src.loc.fpr.fpr);
+
+ case MOVE_MEM_TO_GPR_PAIR:
+ ASSERT(src.abi == JIT_OPERAND_ABI_DOUBLE);
+ return jit_ldxi_ww(_jit, dst.loc.gpr_pair.l, dst.loc.gpr_pair.h, src.loc.mem.base,
+ src.loc.mem.offset);
+
+ case MOVE_GPR_PAIR_TO_MEM:
+ ASSERT(dst.abi == JIT_OPERAND_ABI_DOUBLE);
+ return jit_stxi_ww(_jit, dst.loc.mem.offset, dst.loc.mem.base,
+ src.loc.gpr_pair.l, src.loc.gpr_pair.h);
+#endif
+
+#if JIT_PASS_FLOATS_IN_GPRS
+ case MOVE_GPR_TO_FPR:
+ return jit_movr_f_w(_jit, dst.loc.fpr.fpr, src.loc.gpr.gpr);
+
+ case MOVE_FPR_TO_GPR:
+ return jit_movr_w_f(_jit, dst.loc.gpr.gpr, src.loc.fpr.fpr);
+#endif
+
+ default:
+ abort();
+ }
+}
+
+// A direct transliteration of "Tilting at windmills with Coq: formal
+// verification of a compilation algorithm for parallel moves" by
+// Laurence Rideau, Bernard Paul Serpette, and Xavier Leroy:
+// https://xavierleroy.org/publi/parallel-move.pdf
+
+enum move_status { TO_MOVE, BEING_MOVED, MOVED };
+
+static inline int
+already_in_place(jit_operand_t src, jit_operand_t dst)
+{
+ switch (MOVE_KIND(src.kind, dst.kind)) {
+ case MOVE_GPR_TO_GPR:
+ return jit_same_gprs (src.loc.gpr.gpr, dst.loc.gpr.gpr);
+ case MOVE_FPR_TO_FPR:
+ return jit_same_fprs (src.loc.fpr.fpr, dst.loc.fpr.fpr);
+ case MOVE_MEM_TO_MEM:
+ return jit_same_gprs (src.loc.mem.base, dst.loc.mem.base) &&
+ src.loc.mem.offset == dst.loc.mem.offset;
+ default:
+ return 0;
+ }
+}
+
+static inline int
+write_would_clobber(jit_operand_t src, jit_operand_t dst)
+{
+ if (already_in_place (src, dst))
+ return 1;
+
+ if (MOVE_KIND(src.kind, dst.kind) == MOVE_MEM_TO_GPR)
+ return jit_same_gprs(src.loc.mem.base, dst.loc.gpr.gpr);
+
+#if JIT_PASS_DOUBLES_IN_GPR_PAIRS
+ if (MOVE_KIND(src.kind, dst.kind) == MOVE_GPR_PAIR_TO_GPR)
+ return jit_same_gprs(src.loc.gpr_pair.h, dst.loc.gpr.gpr)
+ || jit_same_gprs(src.loc.gpr_pair.l, dst.loc.gpr.gpr);
+
+ if (MOVE_KIND(src.kind, dst.kind) == MOVE_GPR_PAIR_TO_MEM)
+ return jit_same_gprs(src.loc.gpr_pair.h, dst.loc.mem.base)
+ || jit_same_gprs(src.loc.gpr_pair.l, dst.loc.mem.base);
+#endif
+
+#if JIT_PASS_FLOATS_IN_GPRS
+ if (MOVE_KIND(src.kind, dst.kind) == MOVE_FPR_TO_GPR)
+ return jit_same_gprs(src.loc.fpr.gpr, dst.loc.gpr.gpr);
+#endif
+
+
+ return 0;
+}
+
+static inline ptrdiff_t
+operand_addend(jit_operand_t op)
+{
+ switch (op.kind) {
+ case JIT_OPERAND_KIND_GPR:
+ return op.loc.gpr.addend;
+ case JIT_OPERAND_KIND_MEM:
+ return op.loc.mem.addend;
+ default:
+ abort();
+ }
+}
+
+static void
+move_one(jit_state_t *_jit, jit_operand_t *dst, jit_operand_t *src,
+ size_t argc, enum move_status *status, size_t i)
+{
+ int tmp_gpr = 0, tmp_fpr = 0;
+
+ if (already_in_place(src[i], dst[i]))
+ return;
+
+ status[i] = BEING_MOVED;
+ for (size_t j = 0; j < argc; j++) {
+ if (write_would_clobber(src[j], dst[i])) {
+ switch (status[j]) {
+ case TO_MOVE:
+ move_one(_jit, dst, src, argc, status, j);
+ break;
+ case BEING_MOVED: {
+ jit_operand_t tmp;
+ if (is_fpr_arg ((enum jit_operand_abi)src[j].kind)) {
+ tmp_fpr = 1;
+ tmp = jit_operand_fpr(src[j].abi, get_temp_fpr(_jit));
+ } else {
+ tmp_gpr = 1;
+ /* Preserve addend, if any, from source operand, to be applied
+ at the end. */
+ tmp = jit_operand_gpr_with_addend(src[j].abi, get_temp_gpr(_jit),
+ operand_addend(src[j]));
+ }
+ move_operand (_jit, tmp, src[j]);
+ src[j] = tmp;
+ break;
+ }
+ case MOVED:
+ break;
+ default:
+ abort ();
+ }
+ }
+ }
+
+ move_operand (_jit, dst[i], src[i]);
+ status[i] = MOVED;
+ if (tmp_gpr)
+ unget_temp_gpr(_jit);
+ else if (tmp_fpr)
+ unget_temp_fpr(_jit);
+}
+
+static void
+apply_addend(jit_state_t *_jit, jit_operand_t dst, jit_operand_t src)
+{
+ switch (MOVE_KIND(src.kind, dst.kind)) {
+ case MOVE_GPR_TO_GPR:
+ case MOVE_MEM_TO_GPR:
+ if (operand_addend(src))
+ jit_addi(_jit, dst.loc.gpr.gpr, dst.loc.gpr.gpr, operand_addend(src));
+ break;
+ case MOVE_GPR_TO_MEM:
+ case MOVE_MEM_TO_MEM:
+ if (operand_addend(src)) {
+ jit_gpr_t tmp = get_temp_gpr(_jit);
+ abi_mem_to_gpr(_jit, dst.abi, tmp, dst.loc.mem.base, dst.loc.mem.offset);
+ jit_addi(_jit, tmp, tmp, operand_addend(src));
+ abi_gpr_to_mem(_jit, dst.abi, dst.loc.mem.base, dst.loc.mem.offset, tmp);
+ unget_temp_gpr(_jit);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+/* Preconditions: No dest operand is IMM. No dest operand aliases
+ another dest operand. No dest MEM operand uses a base register which
+ is used as a dest GPR. No dst operand has an addend. The registers
+ returned by get_temp_gpr and get_temp_fpr do not appear in source or
+ dest args. */
+void
+jit_move_operands(jit_state_t *_jit, jit_operand_t *dst, jit_operand_t *src,
+ size_t argc)
+{
+ // Check preconditions, except the condition about tmp registers.
+ {
+ uint64_t src_gprs = 0;
+ uint64_t dst_gprs = 0;
+ uint64_t dst_fprs = 0;
+ uint64_t dst_mem_base_gprs = 0;
+ for (size_t i = 0; i < argc; i++) {
+ switch (src[i].kind) {
+ case JIT_OPERAND_KIND_GPR:
+ src_gprs |= 1ULL << jit_gpr_regno(src[i].loc.gpr.gpr);
+ break;
+#if JIT_PASS_DOUBLES_IN_GPR_PAIRS
+ case JIT_OPERAND_KIND_GPR_PAIR: {
+ uint64_t bit0 = 1ULL << jit_gpr_regno(dst[i].loc.gpr_pair.l);
+ uint64_t bit1 = 1ULL << jit_gpr_regno(dst[i].loc.gpr_pair.h);
+ src_gprs |= bit0 | bit1;
+ break;
+ }
+#endif
+ case JIT_OPERAND_KIND_FPR:
+ case JIT_OPERAND_KIND_IMM:
+ case JIT_OPERAND_KIND_MEM:
+ break;
+ default:
+ abort();
+ }
+ switch (dst[i].kind) {
+ case JIT_OPERAND_KIND_GPR: {
+ ASSERT(dst[i].loc.gpr.addend == 0);
+ uint64_t bit = 1ULL << jit_gpr_regno(dst[i].loc.gpr.gpr);
+ ASSERT((dst_gprs & bit) == 0);
+ dst_gprs |= bit;
+ break;
+ }
+ case JIT_OPERAND_KIND_FPR: {
+#if JIT_PASS_FLOATS_IN_GPRS
+ if(src[i].kind == JIT_OPERAND_KIND_GPR) {
+ dst[i].loc.fpr.gpr = src[i].loc.gpr.gpr;
+ }
+#endif
+ uint64_t bit = 1ULL << jit_fpr_regno(dst[i].loc.fpr.fpr);
+ ASSERT((dst_fprs & bit) == 0);
+ dst_fprs |= bit;
+ break;
+ }
+ case JIT_OPERAND_KIND_MEM: {
+ ASSERT(dst[i].loc.mem.addend == 0);
+ uint64_t bit = 1ULL << jit_gpr_regno(dst[i].loc.mem.base);
+ dst_mem_base_gprs |= bit;
+ break;
+ }
+#if JIT_PASS_DOUBLES_IN_GPR_PAIRS
+ case JIT_OPERAND_KIND_GPR_PAIR: {
+ uint64_t bit0 = 1ULL << jit_gpr_regno(dst[i].loc.gpr_pair.l);
+ uint64_t bit1 = 1ULL << jit_gpr_regno(dst[i].loc.gpr_pair.h);
+ dst_gprs |= bit0 | bit1;
+ break;
+ }
+#endif
+ case JIT_OPERAND_KIND_IMM:
+ default:
+ abort();
+ break;
+ }
+ }
+ ASSERT(((src_gprs | dst_gprs) & dst_mem_base_gprs) == 0);
+ }
+
+ enum move_status status[argc];
+ for (size_t i = 0; i < argc; i++)
+ status[i] = TO_MOVE;
+ for (size_t i = 0; i < argc; i++)
+ if (status[i] == TO_MOVE)
+ move_one(_jit, dst, src, argc, status, i);
+
+ // Apply addends at the end. We could do it earlier in some cases but
+ // at least at the end we know that an in-place increment of one
+ // operand won't alias another.
+ for (size_t i = 0; i < argc; i++)
+ apply_addend(_jit, dst[i], src[i]);
+}
+
+size_t
+jit_align_stack(jit_state_t *_jit, size_t expand)
+{
+ size_t new_size = _jit->frame_size + expand;
+ // Align stack to double-word boundaries. This isn't really a
+ // principle but it does work for Aarch32, AArch64 and x86-64.
+ size_t alignment = jit_stack_alignment ();
+ size_t aligned_size = (new_size + alignment - 1) & ~(alignment - 1);
+ size_t diff = aligned_size - _jit->frame_size;
+ if (diff)
+ jit_subi (_jit, JIT_SP, JIT_SP, diff);
+ _jit->frame_size = aligned_size;
+ return diff;
+}
+
+void
+jit_shrink_stack(jit_state_t *_jit, size_t diff)
+{
+ if (diff)
+ jit_addi (_jit, JIT_SP, JIT_SP, diff);
+ _jit->frame_size -= diff;
+}
+
+static const jit_gpr_t platform_callee_save_gprs[] = {
+ JIT_PLATFORM_CALLEE_SAVE_GPRS
+};
+
+static const jit_fpr_t platform_callee_save_fprs[] = {
+ JIT_PLATFORM_CALLEE_SAVE_FPRS
+};
+
+static const jit_gpr_t user_callee_save_gprs[] = {
+ JIT_V0, JIT_V1, JIT_V2
+#ifdef JIT_V3
+ , JIT_V3
+#endif
+#ifdef JIT_V4
+ , JIT_V4
+#endif
+#ifdef JIT_V5
+ , JIT_V5
+#endif
+#ifdef JIT_V6
+ , JIT_V6
+#endif
+#ifdef JIT_V7
+ , JIT_V7
+#endif
+#ifdef JIT_V8
+ , JIT_V8
+#endif
+#ifdef JIT_V9
+ , JIT_V9
+#endif
+ };
+
+static const jit_fpr_t user_callee_save_fprs[] = {
+#ifdef JIT_VF0
+ JIT_VF0
+#endif
+#ifdef JIT_VF1
+ , JIT_VF1
+#endif
+#ifdef JIT_VF2
+ , JIT_VF2
+#endif
+#ifdef JIT_VF3
+ , JIT_VF3
+#endif
+#ifdef JIT_VF4
+ , JIT_VF4
+#endif
+#ifdef JIT_VF5
+ , JIT_VF5
+#endif
+#ifdef JIT_VF6
+ , JIT_VF6
+#endif
+#ifdef JIT_VF7
+ , JIT_VF7
+#endif
+};
+
+#define ARRAY_SIZE(X) (sizeof (X)/sizeof ((X)[0]))
+static const size_t pv_count = ARRAY_SIZE(platform_callee_save_gprs);
+static const size_t pf_count = ARRAY_SIZE(platform_callee_save_fprs);
+static const size_t v_count = ARRAY_SIZE(user_callee_save_gprs);
+static const size_t vf_count = ARRAY_SIZE(user_callee_save_fprs);
+
+size_t
+jit_enter_jit_abi(jit_state_t *_jit, size_t v, size_t vf, size_t frame_size)
+{
+ (void)frame_size;
+
+ ASSERT(v <= v_count);
+ ASSERT(vf <= vf_count);
+
+ _jit->frame_size = jit_initial_frame_size();
+
+#if JIT_NEEDS_PROLOG
+ jit_prolog(_jit);
+#endif
+
+ size_t reserved =
+ jit_align_stack(_jit, (pv_count + pf_count + v) * (__WORDSIZE / 8) + vf * 8);
+
+ size_t offset = 0;
+ for (size_t i = 0; i < vf; i++, offset += 8)
+ jit_stxi_d(_jit, offset, JIT_SP, user_callee_save_fprs[i]);
+ for (size_t i = 0; i < v; i++, offset += __WORDSIZE / 8)
+ jit_stxi(_jit, offset, JIT_SP, user_callee_save_gprs[i]);
+ for (size_t i = 0; i < pf_count; i++, offset += __WORDSIZE / 8)
+ jit_stxi_d(_jit, offset, JIT_SP, platform_callee_save_fprs[i]);
+ for (size_t i = 0; i < pv_count; i++, offset += __WORDSIZE / 8)
+ jit_stxi(_jit, offset, JIT_SP, platform_callee_save_gprs[i]);
+ ASSERT(offset <= reserved);
+
+ return reserved;
+}
+
+void
+jit_leave_jit_abi(jit_state_t *_jit, size_t v, size_t vf, size_t frame_size)
+{
+ ASSERT(v <= v_count);
+ ASSERT(vf <= vf_count);
+ ASSERT((pv_count + v) * (__WORDSIZE / 8) + vf * 8 <= frame_size);
+
+ size_t offset = 0;
+ for (size_t i = 0; i < vf; i++, offset += 8)
+ jit_ldxi_d(_jit, user_callee_save_fprs[i], JIT_SP, offset);
+ for (size_t i = 0; i < v; i++, offset += __WORDSIZE / 8)
+ jit_ldxi(_jit, user_callee_save_gprs[i], JIT_SP, offset);
+ for (size_t i = 0; i < pf_count; i++, offset += __WORDSIZE / 8)
+ jit_ldxi_d(_jit, platform_callee_save_fprs[i], JIT_SP, offset);
+ for (size_t i = 0; i < pv_count; i++, offset += __WORDSIZE / 8)
+ jit_ldxi(_jit, platform_callee_save_gprs[i], JIT_SP, offset);
+ ASSERT(offset <= frame_size);
+
+ jit_shrink_stack(_jit, frame_size);
+ _jit->frame_size -= jit_initial_frame_size();
+
+#if JIT_NEEDS_PROLOG
+ jit_epilog(_jit);
+#endif
+}
+
+// Precondition: stack is already aligned.
+static size_t
+prepare_call_args(jit_state_t *_jit, size_t argc, jit_operand_t args[])
+{
+ _jit->preparing_call = 1;
+ jit_operand_t dst[argc];
+ struct abi_arg_iterator iter;
+
+ // Compute shuffle destinations and space for spilled arguments.
+#if JIT_ASYMMETRIC_STACK
+ reset_call_arg_iterator(&iter, argc, args);
+#else
+ reset_abi_arg_iterator(&iter, argc, args);
+#endif
+ for (size_t i = 0; i < argc; i++)
+ next_abi_arg(&iter, &dst[i]);
+
+ // Reserve space for spilled arguments and ensure stack alignment.
+ size_t stack_size = jit_align_stack(_jit, iter.stack_size);
+
+ // Fix up SP-relative operands.
+ for (size_t i = 0; i < argc; i++) {
+ switch(args[i].kind) {
+ case JIT_OPERAND_KIND_GPR:
+ if (jit_same_gprs (args[i].loc.gpr.gpr, JIT_SP))
+ args[i].loc.gpr.addend += stack_size;
+ break;
+ case JIT_OPERAND_KIND_MEM:
+ if (jit_same_gprs (args[i].loc.mem.base, JIT_SP))
+ args[i].loc.mem.offset += stack_size;
+ break;
+
+#if JIT_PASS_FLOATS_IN_GPRS
+ case JIT_OPERAND_KIND_FPR:
+ if (dst[i].kind == JIT_OPERAND_KIND_GPR) {
+ args[i].loc.fpr.gpr = dst[i].loc.gpr.gpr;
+ break;
+ }
+#endif
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ jit_move_operands(_jit, dst, args, argc);
+
+ _jit->preparing_call = 0;
+ return stack_size;
+}
+
+void
+jit_calli(jit_state_t *_jit, jit_pointer_t f, size_t argc, jit_operand_t args[])
+{
+ size_t stack_bytes = prepare_call_args(_jit, argc, args);
+
+ calli(_jit, (jit_word_t)f);
+
+ jit_shrink_stack(_jit, stack_bytes);
+}
+
+void
+jit_callr(jit_state_t *_jit, jit_gpr_t f, size_t argc, jit_operand_t args[])
+{
+ size_t stack_bytes = prepare_call_args(_jit, argc, args);
+
+ callr(_jit, jit_gpr_regno(f));
+
+ jit_shrink_stack(_jit, stack_bytes);
+}
+
+void
+jit_locate_args(jit_state_t *_jit, size_t argc, jit_operand_t args[])
+{
+ struct abi_arg_iterator iter;
+
+#if JIT_ASYMMETRIC_STACK
+ reset_load_arg_iterator(&iter, argc, args);
+#else
+ reset_abi_arg_iterator(&iter, argc, args);
+#endif
+ iter.stack_size += _jit->frame_size;
+ for (size_t i = 0; i < argc; i++)
+ next_abi_arg(&iter, &args[i]);
+}
+
+/* Precondition: args are distinct locations of type GPR or FPR. All
+ addends of arg operands are zero. No GPR arg is SP. */
+void
+jit_load_args(jit_state_t *_jit, size_t argc, jit_operand_t args[])
+{
+ jit_operand_t src[argc];
+
+ memcpy(src, args, sizeof(src[0]) * argc);
+
+ jit_locate_args(_jit, argc, src);
+ jit_move_operands(_jit, args, src, argc);
+}
+
+#ifdef JIT_NEEDS_LITERAL_POOL
+static uint32_t
+literal_pool_byte_size(struct jit_literal_pool *pool)
+{
+ // Check arch header for actual values for these literals, or if applicable,
+ // see default values defined in lightening.h
+ return JIT_EXTRA_SPACE + JIT_JMP_MAX_SIZE + 7 + pool->size * JIT_LITERAL_MAX_SIZE;
+}
+
+static void
+reset_literal_pool(jit_state_t *_jit, struct jit_literal_pool *pool)
+{
+ pool->deadline = _jit->limit - _jit->start;
+ memset(pool->entries, 0, sizeof(pool->entries[0]) * pool->size);
+ pool->size = 0;
+}
+
+#define INITIAL_LITERAL_POOL_CAPACITY 12
+static struct jit_literal_pool*
+alloc_literal_pool(jit_state_t *_jit, size_t capacity)
+{
+ if (capacity == 0) capacity = INITIAL_LITERAL_POOL_CAPACITY;
+
+ struct jit_literal_pool *ret =
+ _jit->alloc (sizeof (struct jit_literal_pool) +
+ sizeof (struct jit_literal_pool_entry) * capacity);
+ ASSERT (ret);
+ ret->capacity = capacity;
+ ret->size = 0;
+ reset_literal_pool(_jit, ret);
+ return ret;
+}
+
+static void
+grow_literal_pool(jit_state_t *_jit)
+{
+ struct jit_literal_pool *new_pool =
+ alloc_literal_pool(_jit, _jit->pool->capacity * 2);
+
+ for (size_t i = 0; i < _jit->pool->size; i++)
+ new_pool->entries[new_pool->size++] = _jit->pool->entries[i];
+ new_pool->deadline = _jit->pool->deadline;
+
+ _jit->free (_jit->pool);
+ _jit->pool = new_pool;
+}
+
+static jit_bool_t
+add_literal_pool_entry(jit_state_t *_jit, struct jit_literal_pool_entry entry,
+ uint32_t max_offset)
+{
+ if (_jit->overflow)
+ return 1;
+
+ if (max_offset <= literal_pool_byte_size(_jit->pool)) {
+ emit_literal_pool(_jit, GUARD_NEEDED);
+ return 0;
+ }
+
+ if (_jit->pool->size == _jit->pool->capacity)
+ grow_literal_pool (_jit);
+
+ uint32_t loc_offset = _jit->pc.uc - _jit->start;
+ uint32_t inst_offset = loc_offset - entry.reloc.inst_start_offset;
+ uint32_t pc_base_offset = inst_offset + entry.reloc.pc_base_offset;
+ uint32_t deadline =
+ pc_base_offset + (max_offset - literal_pool_byte_size(_jit->pool));
+ if (deadline < _jit->pool->deadline)
+ _jit->pool->deadline = deadline;
+
+ _jit->pool->entries[_jit->pool->size++] = entry;
+
+ return 1;
+}
+
+static jit_bool_t
+add_pending_literal(jit_state_t *_jit, jit_reloc_t src,
+ uint8_t max_offset_bits)
+{
+ struct jit_literal_pool_entry entry = { src, 0 };
+ uint32_t max_inst_size = JIT_INST_MAX_SIZE;
+ uint32_t max_offset = (1 << (max_offset_bits + src.rsh)) - max_inst_size;
+ return add_literal_pool_entry(_jit, entry, max_offset);
+}
+
+static void
+remove_pending_literal(jit_state_t *_jit, jit_reloc_t src)
+{
+ for (size_t i = _jit->pool->size; i--; ) {
+ if (_jit->pool->entries[i].reloc.offset == src.offset) {
+ for (size_t j = i + 1; j < _jit->pool->size; j++)
+ _jit->pool->entries[j-1] = _jit->pool->entries[j];
+ _jit->pool->size--;
+ return;
+ }
+ }
+ abort();
+}
+
+static void
+patch_pending_literal(jit_state_t *_jit, jit_reloc_t src, uintptr_t value)
+{
+ for (size_t i = _jit->pool->size; i--; ) {
+ if (_jit->pool->entries[i].reloc.offset == src.offset) {
+ ASSERT(_jit->pool->entries[i].value == 0);
+ _jit->pool->entries[i].value = value;
+ return;
+ }
+ }
+ abort();
+}
+
+static void
+emit_literal_pool(jit_state_t *_jit, enum guard_pool guard)
+{
+ if (_jit->overflow)
+ return;
+
+ if (!_jit->pool->size)
+ return;
+
+ uint32_t *patch_loc = NULL;
+ if (guard == GUARD_NEEDED)
+ patch_loc = jmp_without_veneer(_jit);
+
+ // FIXME: Could de-duplicate constants.
+ for (size_t i = 0; i < _jit->pool->size; i++) {
+ // Align to word boundary without emitting pool.
+ if (_jit->pc.w & 1) emit_u8(_jit, 0);
+ if (_jit->pc.w & 2) emit_u16(_jit, 0);
+ if (sizeof(uintptr_t) > 4 && (_jit->pc.w & 4))
+ emit_u32(_jit, 0);
+ ASSERT((_jit->pc.w & (sizeof(uintptr_t) - 1)) == 0);
+ struct jit_literal_pool_entry *entry = &_jit->pool->entries[i];
+ uint8_t *loc = _jit->start + entry->reloc.offset;
+ uint8_t *pc_base =
+ loc - entry->reloc.inst_start_offset + entry->reloc.pc_base_offset;
+ ptrdiff_t diff = _jit->pc.uc - pc_base;
+ diff >>= entry->reloc.rsh;
+
+ if (_jit->overflow)
+ return;
+
+ switch (entry->reloc.kind & JIT_RELOC_MASK) {
+ case JIT_RELOC_JMP_WITH_VENEER:
+ patch_veneer_jmp_offset((uint32_t*) loc, diff);
+ emit_veneer(_jit, (void*) entry->value);
+ break;
+ case JIT_RELOC_JCC_WITH_VENEER:
+ patch_veneer_jcc_offset((uint32_t*) loc, diff);
+ emit_veneer(_jit, (void*) entry->value);
+ break;
+ case JIT_RELOC_LOAD_FROM_POOL:
+ patch_load_from_pool_offset((uint32_t*) loc, diff);
+ emit_uintptr(_jit, entry->value);
+ break;
+ default:
+ abort();
+ }
+ }
+
+ if (_jit->overflow)
+ return;
+
+ if (guard == GUARD_NEEDED)
+ patch_jmp_without_veneer(_jit, patch_loc);
+
+ reset_literal_pool(_jit, _jit->pool);
+}
+#endif
diff --git a/deps/lightening/lightening/mips-cpu.c b/deps/lightening/lightening/mips-cpu.c
new file mode 100644
index 0000000..bf8b5ba
--- /dev/null
+++ b/deps/lightening/lightening/mips-cpu.c
@@ -0,0 +1,2674 @@
+/*
+ * Copyright (C) 2012-2017 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+typedef union {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ struct {
+ uint32_t funct:6;
+ uint32_t shamt:5;
+ uint32_t rd:5;
+ uint32_t rt:5;
+ uint32_t rs:5;
+ uint32_t op:6;
+ } R;
+
+ struct {
+ int32_t i0:16;
+ uint32_t rt:5;
+ uint32_t rs:5;
+ uint32_t op:6;
+ } I;
+
+ struct {
+ uint32_t addr:26;
+ uint32_t op:6;
+ } J;
+#else
+ struct {
+ uint32_t op:6;
+ uint32_t rs:5;
+ uint32_t rt:5;
+ uint32_t rd:5;
+ uint32_t shamt:5;
+ uint32_t funct:6;
+ } R;
+
+ struct {
+ uint32_t op:6;
+ uint32_t rs:5;
+ uint32_t rt:5;
+ int32_t i0:16;
+ } I;
+
+ struct {
+ uint32_t op:6;
+ uint32_t addr:26;
+ } J;
+#endif
+ uint32_t w;
+} instr_t;
+
+#define can_sign_extend_short_p(im) ((im) >= -32678 && (im) <= 32767)
+#define can_zero_extend_short_p(im) ((im) >= 0 && (im) <= 65535)
+#if __WORDSIZE == 32
+#define can_sign_extend_int_p(im) 1
+#define can_zero_extend_int_p(im) 1
+#else
+#define can_sign_extend_int_p(im) \
+ (((im) >= 0 && (im) <= 0x7fffffffL) || \
+ ((im) < 0 && (im) >= -0x80000000L))
+#define can_zero_extend_int_p(im) ((im) >= 0 && (im) <= 0xffffffff)
+#endif
+
+#define simm16_p(i0) ((i0) <= 0x7fff && (i0) >= -0x8000)
+#define simm18_p(i0) ((i0) <= 0x1ffff && (i0) >= -0x20000)
+#define simm26_p(i0) ((i0) <= 0x1ffffff && (i0) >= -0x2000000)
+
+#define uimm16_p(i0) (!((i0) & ~0xffff))
+#define uimm18_p(i0) (!((i0) & ~0x3ffff))
+#define uimm26_p(i0) (!((i0) & ~0x3ffffff))
+
+#define op_p(op) (!((op) & ~0x3f))
+#define reg_p(r) (!((r) & ~0x1f))
+
+#define em_wp(jit, inst) emit_u32_with_pool(jit, inst)
+
+static uint32_t
+Rtype(int32_t op, int32_t rs, int32_t rt, int32_t rd, int32_t shamt,
+ int32_t funct)
+{
+ instr_t i;
+ assert(op_p(op));
+ assert(reg_p(rs));
+ assert(reg_p(rt));
+ assert(reg_p(rd));
+ assert(!(shamt & ~0x1f));
+ assert(!(funct & ~0x3f));
+ i.R.op = op;
+ i.R.rs = rs;
+ i.R.rt = rt;
+ i.R.rd = rd;
+ i.R.shamt = shamt;
+ i.R.funct = funct;
+ return i.w;
+}
+
+static uint32_t
+Itype(int32_t op, int32_t rs, int32_t rt, int32_t i0)
+{
+ instr_t i;
+ assert(op_p(op));
+ assert(reg_p(rs));
+ assert(reg_p(rt));
+ assert(simm16_p(i0) || uimm16_p(i0));
+ i.I.op = op;
+ i.I.rs = rs;
+ i.I.rt = rt;
+ i.I.i0 = i0;
+ return i.w;
+}
+
+static uint32_t
+Jtype(int32_t op, int32_t addr)
+{
+ instr_t i;
+ assert(op_p(op));
+ assert(simm26_p(addr) || uimm26_p(addr));
+ i.J.op = op;
+ i.J.addr = addr;
+ return i.w;
+}
+
+/*
+ * FIXME
+ */
+#define jit_mips2_p() 0x00
+#define OP_SPECIAL 0x00
+#define OP_REGIMM 0x01
+#define OP_J 0x02
+#define OP_SRL 0x02
+#define OP_JAL 0x03
+#define OP_SRA 0x03
+#define OP_BEQ 0x04
+#define OP_BNE 0x05
+#define OP_BLEZ 0x06
+#define OP_BGTZ 0x07
+#define OP_ADDI 0x08
+#define OP_ADDIU 0x09
+#define OP_SLTI 0x0a
+#define OP_SLTIU 0x0b
+#define OP_ANDI 0x0c
+#define OP_ORI 0x0d
+#define OP_XORI 0x0e
+#define OP_LUI 0x0f
+#define OP_COP0 0x10
+#define OP_COP1 0x11
+#define OP_COP2 0x12
+#define OP_COP1X 0x13
+#define OP_BEQL 0x14
+#define OP_BNEL 0x15
+#define OP_BLEZL 0x16
+#define OP_BGTZL 0x17
+#define OP_DADDI 0x18
+#define OP_DADDIU 0x19
+#define OP_LDL 0x1a
+#define OP_LDR 0x1b
+#define OP_SPECIAL2 0x1c
+#define OP_JALX 0x1d
+#define OP_SPECIAL3 0x1f
+#define OP_LB 0x20
+#define OP_LH 0x21
+#define OP_LWL 0x22
+#define OP_LW 0x23
+#define OP_LBU 0x24
+#define OP_LHU 0x25
+#define OP_LWR 0x26
+#define OP_LWU 0x27
+#define OP_SB 0x28
+#define OP_SH 0x29
+#define OP_SWL 0x2a
+#define OP_SW 0x2b
+#define OP_SWR 0x2e
+#define OP_CACHE 0x2f
+#define OP_LL 0x30
+#define OP_LWC1 0x31
+#define OP_LWC2 0x32
+#define OP_PREF 0x33
+#define OP_LLD 0x34
+#define OP_LDC1 0x35
+#define OP_LDC2 0x36
+#define OP_LD 0x37
+#define OP_SC 0x38
+#define OP_SCD 0x3c
+#define OP_SDC1 0x3d
+#define OP_SDC2 0x3e
+#define OP_SWC1 0x39
+#define OP_SWC2 0x3a
+#define OP_SD 0x3f
+#define OP_MF 0x00
+#define OP_MFH 0x03
+#define OP_DMF 0x01
+#define OP_CF 0x02
+#define OP_MFH 0x03
+#define OP_MT 0x04
+#define OP_MTH 0x07
+#define OP_DMT 0x05
+#define OP_CT 0x06
+#define OP_MTH 0x07
+#define OP_BC 0x08
+#define OP_WRPGPR 0x0e
+#define OP_BGZAL 0x11
+#define OP_MFMC0 0x11
+#define OP_BCF 0x00
+#define OP_BLTZ 0x00
+#define OP_BCT 0x01
+#define OP_BGEZ 0x01
+#define OP_BCFL 0x02
+#define OP_BLTZL 0x02
+#define OP_BCTL 0x03
+#define OP_BGEZL 0x03
+#define OP_TGEI 0x08
+#define OP_TGEIU 0x09
+#define OP_TLTI 0x0a
+#define OP_TLTIU 0x0b
+#define OP_TEQI 0x0c
+#define OP_TNEI 0x0e
+#define OP_BLTZAL 0x10
+#define OP_BGEZAL 0x11
+#define OP_BLTZALL 0x12
+#define OP_BGEZALL 0x13
+#define OP_SYNCI 0x1f
+#define OP_WSBH 0x02
+#define OP_DBSH 0x02
+#define OP_DSHD 0x05
+#define OP_SEB 0x10
+#define OP_SEH 0x18
+#define OP_MADD 0x00
+#define OP_SLL 0x00
+#define OP_EXT 0x00
+#define OP_DEXTM 0x01
+#define OP_MADDU 0x01
+#define OP_MOVFT 0x01
+#define OP_TLBR 0x01
+#define OP_MUL 0x02
+#define OP_DEXTU 0x02
+#define OP_TLBWI 0x02
+#define OP_DEXT 0x03
+#define OP_SLLV 0x04
+#define OP_INS 0x04
+#define OP_MSUB 0x04
+#define OP_DINSM 0x05
+#define OP_MSUBU 0x05
+#define OP_SRLV 0x06
+#define OP_DINSU 0x06
+#define OP_TLBWR 0x06
+#define OP_SRAV 0x07
+#define OP_DINS 0x07
+#define OP_JR 0x08
+#define OP_TLBP 0x08
+#define OP_JALR 0x09
+#define OP_MOVZ 0x0a
+#define OP_MOVN 0x0b
+#define OP_SYSCALL 0x0c
+#define OP_BREAK 0x0d
+#define OP_PREFX 0x0f
+#define OP_SYNC 0x0f
+#define OP_MFHI 0x10
+#define OP_MTHI 0x11
+#define OP_MFLO 0x12
+#define OP_MTLO 0x13
+#define OP_DSLLV 0x14
+#define OP_DSRLV 0x16
+#define OP_DSRAV 0x17
+#define OP_MULT 0x18
+#define OP_ERET 0x18
+#define OP_MULTU 0x19
+#define OP_DIV 0x1a
+#define OP_DIVU 0x1b
+#define OP_DMULT 0x1c
+#define OP_DMULTU 0x1d
+#define OP_DDIV 0x1e
+#define OP_DDIVU 0x1f
+#define OP_DERET 0x1f
+#define OP_ADD 0x20
+#define OP_CLZ 0x20
+#define OP_BSHFL 0x20
+#define OP_ADDU 0x21
+#define OP_CLO 0x21
+#define OP_SUB 0x22
+#define OP_SUBU 0x23
+#define OP_AND 0x24
+#define OP_DCLZ 0x24
+#define OP_DBSHFL 0x24
+#define OP_OR 0x25
+#define OP_DCLO 0x25
+#define OP_XOR 0x26
+#define OP_NOR 0x27
+#define OP_SLT 0x2a
+#define OP_SLTU 0x2b
+#define OP_PCREL 0x2b
+#define OP_DADD 0x2c
+#define OP_DADDU 0x2d
+#define OP_DSUB 0x2e
+#define OP_DSUBU 0x2f
+#define OP_TGE 0x30
+#define OP_TGEU 0x31
+#define OP_TLT 0x32
+#define OP_TLTU 0x33
+#define OP_TEQ 0x34
+#define OP_TNE 0x36
+#define OP_DSLL 0x38
+#define OP_DSRL 0x3a
+#define OP_DSRA 0x3b
+#define OP_DSLL32 0x3c
+#define OP_DSRL32 0x3e
+#define OP_AUIPC 0x3e
+#define OP_DSRA32 0x3f
+#define OP_SDBBP 0x3f
+
+/*
+ * lightning uses these, but it's not a complete implementation
+ */
+#define _NOP(i0) Rtype(OP_SPECIAL, 0, 0, 0, 0, OP_SLL)
+#define _LUI(rt, i0) Itype(OP_LUI, 0, rt, i0)
+#define _ADDU(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_ADDU)
+#define _DADDU(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_DADDU)
+#define _ADDIU(rt, rs, i0) Itype(OP_ADDIU, rs, rt, i0)
+#define _DADDIU(rt, rs, i0) Itype(OP_DADDIU, rs, rt, i0)
+#define _SUBU(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_SUBU)
+#define _DSUBU(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_DSUBU)
+#define _MULT(rs, rt) Rtype(OP_SPECIAL, rs, rt, 00, 00, OP_MULT)
+#define _MULTU(rs, rt) Rtype(OP_SPECIAL, rs, rt, 00, 00, OP_MULTU)
+#define _DMULT(rs, rt) Rtype(OP_SPECIAL, rs, rt, 00, 00, OP_DMULT)
+#define _DMULTU(rs, rt) Rtype(OP_SPECIAL, rs, rt, 00, 00, OP_DMULTU)
+#define _DIV(rs, rt) Rtype(OP_SPECIAL, rs, rt, 00, 00, OP_DIV)
+#define _DIVU(rs, rt) Rtype(OP_SPECIAL, rs, rt, 00, 00, OP_DIVU)
+#define _DDIV(rs, rt) Rtype(OP_SPECIAL, rs, rt, 00, 00, OP_DDIV)
+#define _DDIVU(rs, rt) Rtype(OP_SPECIAL, rs, rt, 00, 00, OP_DDIVU)
+#define _SLLV(rd, rt, rs) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_SLLV)
+#define _SLL(rd, rt, sa) Rtype(OP_SPECIAL, 00, rt, rd, sa, OP_SLL)
+#define _DSLLV(rd, rt, rs) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_DSLLV)
+#define _DSLL(rd, rt, sa) Rtype(OP_SPECIAL, 00, rt, rd, sa, OP_DSLL)
+#define _DSLL32(rd, rt, sa) Rtype(OP_SPECIAL, 00, rt, rd, sa, OP_DSLL32)
+#define _SRAV(rd, rt, rs) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_SRAV)
+#define _SRA(rd, rt, sa) Rtype(OP_SPECIAL, 00, rt, rd, sa, OP_SRA)
+#define _SRLV(rd, rt, rs) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_SRLV)
+#define _SRL(rd, rt, sa) Rtype(OP_SPECIAL, 00, rt, rd, sa, OP_SRL)
+#define _DSRAV(rd, rt, rs) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_DSRAV)
+#define _DSRA(rd, rt, sa) Rtype(OP_SPECIAL, 00, rt, rd, sa, OP_DSRA)
+#define _DSRA32(rd, rt, sa) Rtype(OP_SPECIAL, 00, rt, rd, sa, OP_DSRA32)
+#define _DSRLV(rd, rt, rs) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_DSRLV)
+#define _DSRL(rd, rt, sa) Rtype(OP_SPECIAL, 00, rt, rd, sa, OP_DSRL)
+#define _DSRL32(rd, rt, sa) Rtype(OP_SPECIAL, 00, rt, rd, sa, OP_DSRL32)
+#define _INS(rt, rs, pos, size) Rtype(OP_SPECIAL3, rs, rt, \
+ pos + size - 1, pos, 0x04)
+#define _DINS(rt, rs, pos, size) Rtype(OP_SPECIAL3, rs, rt, \
+ pos + size - 1, pos, 0x07)
+#define _ROTR(rd, rs, sa) Rtype(OP_SPECIAL, 01, rt, rd, sa, OP_SRL)
+#define _DROTR(rd, rs, sa) Rtype(OP_SPECIAL, 01, rt, rd, sa, OP_DSRL)
+#define _MFHI(rd) Rtype(OP_SPECIAL, 00, 00, rd, 00, OP_MFHI)
+#define _MFLO(rd) Rtype(OP_SPECIAL, 00, 00, rd, 00, OP_MFLO)
+#define _MTHI(rs) Rtype(OP_SPECIAL, rs, 00, 00, 00, OP_MTHI)
+#define _MTLO(rs) Rtype(OP_SPECIAL, rs, 00, 00, 00, OP_MTLO)
+#define _AND(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_AND)
+#define _ANDI(rt, rs, i0) Itype(OP_ANDI, rs, rt, i0)
+#define _OR(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_OR)
+#define _ORI(rt, rs, i0) Itype(OP_ORI, rs, rt, i0)
+#define _XOR(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 0, OP_XOR)
+#define _XORI(rt, rs, i0) Itype(OP_XORI, rs, rt, i0)
+#define _LB(rt, of, bs) Itype(OP_LB, bs, rt, of)
+#define _LBU(rt, of, bs) Itype(OP_LBU, bs, rt, of)
+#define _LH(rt, of, bs) Itype(OP_LH, bs, rt, of)
+#define _LHU(rt, of, bs) Itype(OP_LHU, bs, rt, of)
+#define _LW(rt, of, bs) Itype(OP_LW, bs, rt, of)
+#define _LWU(rt, of, bs) Itype(OP_LWU, bs, rt, of)
+#define _LD(rt, of, bs) Itype(OP_LD, bs, rt, of)
+#define _SB(rt, of, bs) Itype(OP_SB, bs, rt, of)
+#define _SH(rt, of, bs) Itype(OP_SH, bs, rt, of)
+#define _SW(rt, of, bs) Itype(OP_SW, bs, rt, of)
+#define _SD(rt, of, bs) Itype(OP_SD, bs, rt, of)
+#define _WSBH(rd, rt) Rtype(OP_SPECIAL3, 00, rt, rd, OP_WSBH, OP_BSHFL)
+#define _SEB(rd, rt) Rtype(OP_SPECIAL3, 00, rt, rd, OP_SEB, OP_BSHFL)
+#define _SEH(rd, rt) Rtype(OP_SPECIAL3, 00, rt, rd, OP_SEH, OP_BSHFL)
+#define _SLT(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_SLT)
+#define _SLTU(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_SLTU)
+#define _SLTI(rt, rs, i0) Itype(OP_SLTI, rs, rt, i0)
+#define _SLTIU(rt, rs, i0) Itype(OP_SLTIU, rs, rt, i0)
+#define _BLTZ(rs, of) Itype(OP_REGIMM, rs, OP_BLTZ, of)
+#define _BLEZ(rs, of) Itype(OP_BLEZ, rs, 00, of)
+#define _BEQ(rs, rt, of) Itype(OP_BEQ, rs, rt, of)
+#define _BGEZ(rs, of) Itype(OP_REGIMM, rs, OP_BGEZ, of)
+#define _BGTZ(rs, of) Itype(OP_BGTZ, rs, 00, of)
+#define _BNE(rs, rt, of) Itype(OP_BNE, rs, rt, of)
+#define _JALR(rd, rs) Rtype(OP_SPECIAL, rs, 00, rd, 00, OP_JALR)
+#define _JR(rs) Rtype(OP_SPECIAL, rs, 00, 00, 00, OP_JR)
+#define _J(t) Jtype(OP_J, t)
+#define _MOVZ(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_MOVZ)
+
+#define _SDBBP() Rtype(OP_SPECIAL2, 0, 0, 0, 0, OP_SDBBP)
+#define _AUIPC(rs, im) Itype(OP_PCREL, rs, OP_AUIPC, im)
+#define _SYNC(st) Rtype(OP_SPECIAL, 0, 0, 0, st, OP_SYNC)
+/*
+ * note: these use pre-release 6 formats, should probably eventually add
+ * in some detection
+ */
+#define _LL(rt, of, bs) Itype(OP_LL, bs, rt, of)
+#define _LLD(rt, of, bs) Itype(OP_LLD, bs, rt, of)
+#define _SC(rt, of, bs) Itype(OP_SC, bs, rt, of)
+#define _SCD(rt, of, bs) Itype(OP_SCD, bs, rt, of)
+#define _MOVR(rt, rd) _ORI(rt, rd, rn(_ZERO))
+
+#if __WORDSIZE == 64
+#define _WADDR(rd, rs, rt) _DADDU(rd, rs, rt)
+#define _WADDIU(rd, rs, i0) _DADDIU(rd, rs, i0)
+#define _WSUBR(rd, rs, rt) _DSUBU(rd, rs, rt)
+#define _WMULT(rs, rt) _DMULT(rs, rt)
+#define _WMULTU(rs, rt) _DMULTU(rs, rt)
+#define _WDIV(rs, rt) _DDIV(rs, rt)
+#define _WDIVU(rs, rt) _DDIVU(rs, rt)
+#define _WSLLV(rd, rt, rs) _DSLLV(rd, rt, rs)
+#define _WSRAV(rd, rt, rs) _DSRAV(rd, rt, rs)
+#define _WSRLV(rd, rt, rs) _DSRLV(rd, rt, rs)
+#define _WLD(rt, of, bs) _LD(rt, of, bs)
+#define _WLL(rt, of, bs) _LLD(rt, of, bs)
+#define _WSC(rt, of, bs) _SCD(rt, of, bs)
+#else
+#define _WADDR(rd, rs, rt) _ADDU(rd, rs, rt)
+#define _WADDIU(rd, rs, i0) _ADDIU(rd, rs, i0)
+#define _WSUBR(rd, rs, rt) _SUBU(rd, rs, rt)
+#define _WMULT(rs, rt) _MULT(rs, rt)
+#define _WMULTU(rs, rt) _MULTU(rs, rt)
+#define _WDIV(rs, rt) _DIV(rs, rt)
+#define _WDIVU(rs, rt) _DIVU(rs, rt)
+#define _WSLLV(rd, rt, rs) _SLLV(rd, rt, rs)
+#define _WSRAV(rd, rt, rs) _SRAV(rd, rt, rs)
+#define _WSRLV(rd, rt, rs) _SRLV(rd, rt, rs)
+#define _WLD(rt, of, bs) _LW(rt, of, bs)
+#define _WLL(rt, of, bs) _LL(rt, of, bs)
+#define _WSC(rt, of, bs) _SC(rt, of, bs)
+#endif
+
+static void addr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void addi(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+static void addcr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void addci(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+static void addxr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void addxi(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+
+static void subr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void subi(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+static void subcr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void subci(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+static void subxr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void subxi(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+
+static void mulr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void muli(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+
+static void divr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void divi(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+static void divr_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void divi_u(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+
+static void remr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void remi(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+static void remr_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void remi_u(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+
+static void andr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void andi(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+static void orr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void ori(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0);
+static void xorr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void xori(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+static void lshr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void lshi(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+static void rshr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void rshi(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+static void rshr_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void rshi_u(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+
+static void qmulr(jit_state_t * _jit, int32_t r0, int32_t r1,
+ int32_t r2, int32_t r3);
+static void qmulr_u(jit_state_t * _jit, int32_t r0, int32_t r1,
+ int32_t r2, int32_t r3);
+static void qmuli(jit_state_t * _jit, int32_t r0, int32_t r1,
+ int32_t r2, jit_word_t i0);
+static void qmuli_u(jit_state_t * _jit, int32_t r0, int32_t r1,
+ int32_t r2, jit_word_t i0);
+
+static void qdivr(jit_state_t * _jit, int32_t r0, int32_t r1,
+ int32_t r2, int32_t r3);
+static void qdivr_u(jit_state_t * _jit, int32_t r0, int32_t r1,
+ int32_t r2, int32_t r3);
+static void qdivi(jit_state_t * _jit, int32_t r0, int32_t r1,
+ int32_t r2, jit_word_t i0);
+static void qdivi_u(jit_state_t * _jit, int32_t r0, int32_t r1,
+ int32_t r2, jit_word_t i0);
+
+static void negr(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void comr(jit_state_t * _jit, int32_t r0, int32_t r1);
+
+static void movr(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void movi(jit_state_t * _jit, int32_t r0, jit_word_t i0);
+
+static jit_reloc_t mov_addr(jit_state_t * _jit, int32_t r0);
+static jit_reloc_t movi_from_immediate(jit_state_t * _jit, int32_t r0);
+static void emit_immediate_reloc(jit_state_t * _jit, int32_t r0,
+ jit_bool_t in_veneer);
+
+static void extr_c(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void extr_uc(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void extr_s(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void extr_us(jit_state_t * _jit, int32_t r0, int32_t r1);
+
+#if __WORDSIZE == 64
+static void extr_i(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void extr_ui(jit_state_t * _jit, int32_t r0, int32_t r1);
+#endif
+
+static jit_reloc_t bltr(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t blti(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bltr_u(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t blti_u(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bler(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t blei(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bler_u(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t blei_u(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t beqr(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t beqi(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bger(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bgei(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bger_u(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bgei_u(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bgtr(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bgti(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bgtr_u(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bgti_u(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bner(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bnei(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+
+static jit_reloc_t bmsr(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bmsi(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bmcr(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bmci(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t boaddr(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t boaddi(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t boaddr_u(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t boaddi_u(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bxaddr(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bxaddi(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bxaddr_u(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bxaddi_u(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bosubr(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bosubi(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bosubr_u(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bosubi_u(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bxsubr(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bxsubi(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+static jit_reloc_t bxsubr_u(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bxsubi_u(jit_state_t * _jit, int32_t r0, jit_word_t i1);
+
+static void str_c(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void str_s(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void str_i(jit_state_t * _jit, int32_t r0, int32_t r1);
+#if __WORDSIZE == 64
+static void str_l(jit_state_t * _jit, int32_t r0, int32_t r1);
+#endif
+
+static void sti_c(jit_state_t * _jit, jit_word_t i0, int32_t r0);
+static void sti_s(jit_state_t * _jit, jit_word_t i0, int32_t r0);
+static void sti_i(jit_state_t * _jit, jit_word_t i0, int32_t r0);
+#if __WORDSIZE == 64
+static void sti_l(jit_state_t * _jit, jit_word_t i0, int32_t r0);
+#endif
+
+static void stxr_c(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void stxr_s(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void stxr_i(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+#if __WORDSIZE == 64
+static void stxr_l(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+#endif
+
+static void stxi_c(jit_state_t * _jit, jit_word_t i0, int32_t r0,
+ int32_t r1);
+static void stxi_s(jit_state_t * _jit, jit_word_t i0, int32_t r0,
+ int32_t r1);
+static void stxi_i(jit_state_t * _jit, jit_word_t i0, int32_t r0,
+ int32_t r1);
+#if __WORDSIZE == 64
+static void stxi_l(jit_state_t * _jit, jit_word_t i0, int32_t r0,
+ int32_t r1);
+#endif
+
+static void ldr_c(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void ldr_uc(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void ldr_s(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void ldr_us(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void ldr_i(jit_state_t * _jit, int32_t r0, int32_t r1);
+#if __WORDSIZE == 64
+static void ldr_ui(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void ldr_l(jit_state_t * _jit, int32_t r0, int32_t r1);
+#endif
+
+static void ldi_c(jit_state_t * _jit, int32_t r0, jit_word_t i0);
+static void ldi_uc(jit_state_t * _jit, int32_t r0, jit_word_t i0);
+static void ldi_s(jit_state_t * _jit, int32_t r0, jit_word_t i0);
+static void ldi_us(jit_state_t * _jit, int32_t r0, jit_word_t i0);
+static void ldi_i(jit_state_t * _jit, int32_t r0, jit_word_t i0);
+#if __WORDSIZE == 64
+static void ldi_ui(jit_state_t * _jit, int32_t r0, jit_word_t i0);
+static void ldi_l(jit_state_t * _jit, int32_t r0, jit_word_t i0);
+#endif
+
+static void ldxr_c(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void ldxr_uc(jit_state_t * _jit, int32_t r0, int32_t r1,
+ int32_t r2);
+static void ldxr_s(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void ldxr_us(jit_state_t * _jit, int32_t r0, int32_t r1,
+ int32_t r2);
+static void ldxr_i(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+#if __WORDSIZE == 64
+static void ldxr_ui(jit_state_t * _jit, int32_t r0, int32_t r1,
+ int32_t r2);
+static void ldxr_l(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+#endif
+
+static void ldxi_c(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+static void ldxi_uc(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+static void ldxi_us(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+static void ldxi_s(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+static void ldxi_i(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+#if __WORDSIZE == 64
+static void ldxi_ui(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+static void ldxi_l(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+#endif
+
+static void ret(jit_state_t * _jit);
+static void retr(jit_state_t * _jit, int32_t r0);
+static void reti(jit_state_t * _jit, jit_word_t i0);
+static void retval_c(jit_state_t * _jit, int32_t r0);
+static void retval_uc(jit_state_t * _jit, int32_t r0);
+static void retval_s(jit_state_t * _jit, int32_t r0);
+static void retval_us(jit_state_t * _jit, int32_t r0);
+static void retval_i(jit_state_t * _jit, int32_t r0);
+#if __WORDSIZE == 64
+static void retval_ui(jit_state_t * _jit, int32_t r0);
+static void retval_l(jit_state_t * _jit, int32_t r0);
+#endif
+
+static uint32_t patch_jump(uint32_t inst, int32_t offset);
+static jit_reloc_t emit_jump(jit_state_t * _jit, uint32_t inst, uint32_t delay_slot);
+
+static void callr(jit_state_t * _jit, int32_t r0);
+static void calli(jit_state_t * _jit, jit_word_t i0);
+static void jmpi_with_link(jit_state_t * _jit, jit_word_t i0);
+static void pop_link_register(jit_state_t * _jit);
+static void push_link_register(jit_state_t * _jit);
+static void jmpr(jit_state_t * _jit, int32_t r0);
+static void jmpi(jit_state_t * _jit, jit_word_t i0);
+static jit_reloc_t jmp(jit_state_t * _jit);
+
+static void ldr_atomic(jit_state_t * _jit, int32_t dst, int32_t loc);
+static void str_atomic(jit_state_t * _jit, int32_t loc, int32_t val);
+static void swap_atomic(jit_state_t * _jit, int32_t dst, int32_t loc,
+ int32_t val);
+static void cas_atomic(jit_state_t * _jit, int32_t dst, int32_t loc,
+ int32_t expected, int32_t desired);
+
+static void bswapr_us(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void bswapr_ui(jit_state_t * _jit, int32_t r0, int32_t r1);
+#if __WORDSIZE == 64
+static void bswapr_ul(jit_state_t * _jit, int32_t r0, int32_t r1);
+#endif
+
+static void nop(jit_state_t * _jit, int32_t i0);
+static void breakpoint(jit_state_t * _jit);
+
+#define rn(x) jit_gpr_regno(x)
+
+static void
+addr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _WADDR(r0, r1, r2));
+}
+
+static void
+addi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0)
+ movr(_jit, r0, r1);
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ em_wp(_jit, _WADDR(r0, r1, rn(t0)));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+addcr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r1) {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ em_wp(_jit, _WADDR(rn(t0), r1, r2));
+ em_wp(_jit, _SLTU(rn(JIT_CARRY), rn(t0), r1));
+ movr(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+ } else {
+ em_wp(_jit, _WADDR(r0, r1, r2));
+ em_wp(_jit, _SLTU(rn(JIT_CARRY), r0, r1));
+ }
+}
+
+static void
+addci(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ if (r0 == r1) {
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _WADDIU(rn(t0), r1, i0));
+ else {
+ movi(_jit, rn(t0), i0);
+ em_wp(_jit, _WADDR(rn(t0), r1, rn(t0)));
+ }
+ em_wp(_jit, _SLTU(rn(JIT_CARRY), rn(t0), r1));
+ movr(_jit, r0, rn(t0));
+ } else {
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _WADDIU(r0, r1, i0));
+ else {
+ movi(_jit, rn(t0), i0);
+ em_wp(_jit, _WADDR(r0, r1, rn(t0)));
+ }
+ em_wp(_jit, _SLTU(rn(JIT_CARRY), r0, r1));
+ }
+
+ unget_temp_gpr(_jit);
+}
+
+static void
+addxr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movr(_jit, rn(t0), rn(JIT_CARRY));
+ addcr(_jit, r0, r1, r2);
+ addcr(_jit, r0, r0, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+addxi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movr(_jit, rn(t0), rn(JIT_CARRY));
+ addci(_jit, r0, r1, i0);
+ addcr(_jit, r0, r0, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+subr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _WSUBR(r0, r1, r2));
+}
+
+static void
+subi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0)
+ movr(_jit, r0, r1);
+ else if (can_sign_extend_short_p(i0) && (i0 & 0xffff) != 0x8000)
+ em_wp(_jit, _WADDIU(r0, r1, -i0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ em_wp(_jit, _WSUBR(r0, r1, rn(t0)));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+subcr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r1) {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ em_wp(_jit, _WSUBR(rn(t0), r1, r2));
+ em_wp(_jit, _SLTU(rn(JIT_CARRY), r1, rn(t0)));
+ movr(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+ } else {
+ em_wp(_jit, _WSUBR(r0, r1, r2));
+ em_wp(_jit, _SLTU(rn(JIT_CARRY), r1, r0));
+ }
+}
+
+static void
+subci(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ if (r0 == r1) {
+ if (can_sign_extend_short_p(i0) && (i0 & 0xffff) != 0x8000)
+ em_wp(_jit, _WADDIU(rn(t0), r1, -i0));
+ else {
+ movi(_jit, rn(t0), i0);
+ em_wp(_jit, _WSUBR(rn(t0), r1, rn(t0)));
+ }
+ em_wp(_jit, _SLTU(rn(JIT_CARRY), r1, rn(t0)));
+ movr(_jit, r0, rn(t0));
+ } else {
+ if (can_sign_extend_short_p(i0) && (i0 & 0xffff) != 0x8000)
+ em_wp(_jit, _WADDIU(r0, r1, -i0));
+ else {
+ movi(_jit, rn(t0), i0);
+ em_wp(_jit, _WSUBR(r0, r1, rn(t0)));
+ }
+ em_wp(_jit, _SLTU(rn(JIT_CARRY), r1, r0));
+ }
+ unget_temp_gpr(_jit);
+}
+
+static void
+subxr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movr(_jit, rn(t0), rn(JIT_CARRY));
+ subcr(_jit, r0, r1, r2);
+ subcr(_jit, r0, r0, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+subxi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movr(_jit, rn(t0), rn(JIT_CARRY));
+ subci(_jit, r0, r1, i0);
+ subcr(_jit, r0, r0, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+mulr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _WMULTU(r1, r2));
+ em_wp(_jit, _MFLO(r0));
+}
+
+static void
+muli(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ mulr(_jit, r0, r1, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+divr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _WDIV(r1, r2));
+ em_wp(_jit, _MFLO(r0));
+}
+
+static void
+divi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ divr(_jit, r0, r1, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+divr_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _WDIVU(r1, r2));
+ em_wp(_jit, _MFLO(r0));
+}
+
+static void
+divi_u(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ divr_u(_jit, r0, r1, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+remr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _WDIV(r1, r2));
+ em_wp(_jit, _MFHI(r0));
+}
+
+static void
+remi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ remr(_jit, r0, r1, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+remr_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _WDIVU(r1, r2));
+ em_wp(_jit, _MFHI(r0));
+}
+
+static void
+remi_u(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ remr_u(_jit, r0, r1, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+lshr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _WSLLV(r0, r1, r2));
+}
+
+static void
+rshr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _WSRAV(r0, r1, r2));
+}
+
+static void
+rshr_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _WSRLV(r0, r1, r2));
+}
+
+#if __WORDSIZE == 64
+static void
+lshi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ assert(i0 >= 0 && i0 <= 63);
+ if (i0 < 32)
+ em_wp(_jit, _DSLL(r0, r1, i0));
+ else
+ em_wp(_jit, _DSLL32(r0, r1, i0 - 32));
+}
+
+static void
+rshi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ assert(i0 >= 0 && i0 <= 63);
+ if (i0 < 32)
+ em_wp(_jit, _DSRA(r0, r1, i0));
+ else
+ em_wp(_jit, _DSRA32(r0, r1, i0 - 32));
+}
+
+static void
+rshi_u(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ assert(i0 >= 0 && i0 <= 63);
+ if (i0 < 32)
+ em_wp(_jit, _DSRL(r0, r1, i0));
+ else
+ em_wp(_jit, _DSRL32(r0, r1, i0 - 32));
+}
+#else
+static void
+lshi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ em_wp(_jit, _SLL(r0, r1, i0));
+}
+
+static void
+rshi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ em_wp(_jit, _SRA(r0, r1, i0));
+}
+
+static void
+rshi_u(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ em_wp(_jit, _SRL(r0, r1, i0));
+}
+#endif
+
+static void
+iqmulr(jit_state_t * _jit, int32_t r0, int32_t r1,
+ int32_t r2, int32_t r3, jit_bool_t sign)
+{
+ if (sign)
+ em_wp(_jit, _WMULT(r2, r3));
+ else
+ em_wp(_jit, _WMULTU(r2, r3));
+
+ em_wp(_jit, _MFLO(r0));
+ em_wp(_jit, _MFHI(r1));
+}
+
+static void
+iqmuli(jit_state_t * _jit, int32_t r0, int32_t r1,
+ int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ iqmulr(_jit, r0, r1, r2, rn(t0), sign);
+ unget_temp_gpr(_jit);
+}
+
+static void
+qmulr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ iqmulr(_jit, r0, r1, r2, r3, 1);
+}
+
+static void
+qmulr_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ iqmulr(_jit, r0, r1, r2, r3, 0);
+}
+
+static void
+qmuli(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2,
+ jit_word_t i0)
+{
+ iqmuli(_jit, r0, r1, r2, i0, 1);
+}
+
+static void
+qmuli_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2,
+ jit_word_t i0)
+{
+ iqmuli(_jit, r0, r1, r2, i0, 0);
+}
+
+static void
+iqdivr(jit_state_t * _jit, int32_t r0, int32_t r1,
+ int32_t r2, int32_t r3, jit_bool_t sign)
+{
+ if (sign)
+ em_wp(_jit, _WDIV(r2, r3));
+ else
+ em_wp(_jit, _WDIVU(r2, r3));
+
+ em_wp(_jit, _MFLO(r0));
+ em_wp(_jit, _MFHI(r1));
+}
+
+static void
+iqdivi(jit_state_t * _jit, int32_t r0, int32_t r1,
+ int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ iqdivr(_jit, r0, r1, r2, rn(t0), sign);
+ unget_temp_gpr(_jit);
+}
+
+static void
+qdivr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ iqdivr(_jit, r0, r1, r2, r3, 1);
+}
+
+static void
+qdivr_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ iqdivr(_jit, r0, r1, r2, r3, 0);
+}
+
+static void
+qdivi(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2,
+ jit_word_t i0)
+{
+ iqdivi(_jit, r0, r1, r2, i0, 1);
+}
+
+static void
+qdivi_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2,
+ jit_word_t i0)
+{
+ iqdivi(_jit, r0, r1, r2, i0, 0);
+}
+
+static void
+negr(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ subr(_jit, r0, rn(_ZERO), r1);
+}
+
+static void
+comr(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ xori(_jit, r0, r1, -1);
+}
+
+static void
+andr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _AND(r0, r1, r2));
+}
+
+static void
+andi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_zero_extend_short_p(i0))
+ em_wp(_jit, _ANDI(r0, r1, i0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ em_wp(_jit, _AND(r0, r1, rn(t0)));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+orr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _OR(r0, r1, r2));
+}
+
+static void
+ori(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_zero_extend_short_p(i0))
+ em_wp(_jit, _ORI(r0, r1, i0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ orr(_jit, r0, r1, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+xorr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _XOR(r0, r1, r2));
+}
+
+static void
+xori(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_zero_extend_short_p(i0))
+ em_wp(_jit, _XORI(r0, r1, i0 & 0xffff));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ xorr(_jit, r0, r1, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+movr(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ if (r0 != r1)
+ em_wp(_jit, _OR(r0, r1, rn(_ZERO)));
+}
+
+static void
+movi(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ if (i0 == 0)
+ em_wp(_jit, _OR(r0, rn(_ZERO), rn(_ZERO)));
+ else if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _WADDIU(r0, rn(_ZERO), i0));
+ else if (can_zero_extend_short_p(i0))
+ em_wp(_jit, _ORI(r0, rn(_ZERO), i0));
+ else {
+ if (can_sign_extend_int_p(i0))
+ em_wp(_jit, _LUI(r0, i0 >> 16));
+ else if (can_zero_extend_int_p(i0)) {
+ if (i0 & 0xffff0000) {
+ em_wp(_jit, _ORI(r0, rn(_ZERO), (i0 >> 16) & 0xffff));
+ lshi(_jit, r0, r0, 16);
+ }
+ }
+#if __WORDSIZE == 64
+ else {
+ movi(_jit, r0, (jit_uword_t) i0 >> 32);
+ if (i0 & 0xffff0000) {
+ lshi(_jit, r0, r0, 16);
+ em_wp(_jit, _ORI(r0, r0, (i0 >> 16) & 0xffff));
+ lshi(_jit, r0, r0, 16);
+ } else
+ lshi(_jit, r0, r0, 32);
+ }
+#endif
+ if (i0 & 0xffff)
+ em_wp(_jit, _ORI(r0, r0, i0 & 0xffff));
+ }
+}
+
+typedef struct {
+#if __WORDSIZE == 64
+ instr_t lui;
+ instr_t ori2;
+ instr_t dsl1;
+ instr_t ori1;
+ instr_t dsl0;
+#else
+ instr_t lui;
+#endif
+ instr_t ori0;
+} immediate_t;
+
+/*
+ * TODO: does this work for both BE and LE?
+ */
+static void
+patch_immediate_reloc(uint32_t * loc, jit_pointer_t addr)
+{
+ immediate_t *i = (immediate_t *) loc;
+ jit_word_t a = (jit_word_t) addr;
+#if __WORDSIZE == 64
+ i->lui.I.i0 = a >> 48;
+ i->ori2.I.i0 = a >> 32;
+ i->ori1.I.i0 = a >> 16;
+#else
+ i->lui.I.i0 = a >> 16;
+#endif
+ i->ori0.I.i0 = a & 0xffff;
+}
+
+static void
+emit_immediate_reloc(jit_state_t * _jit, int32_t r0, jit_bool_t in_veneer)
+{
+ void (*emit)(jit_state_t * _jit, uint32_t u32) =
+ in_veneer ? emit_u32 : emit_u32_with_pool;
+
+#if __WORDSIZE == 64
+ emit(_jit, _LUI(r0, 0));
+ emit(_jit, _ORI(r0, r0, 0));
+ emit(_jit, _DSLL(r0, r0, 16));
+ emit(_jit, _ORI(r0, r0, 0));
+ emit(_jit, _DSLL(r0, r0, 16));
+#else
+ emit(_jit, _LUI(r0, 0));
+#endif
+ emit(_jit, _ORI(r0, r0, 0));
+}
+
+static jit_reloc_t
+movi_from_immediate(jit_state_t * _jit, int32_t r0)
+{
+ uint8_t *pc_base = _jit->pc.uc;
+ jit_reloc_t w =
+ jit_reloc(_jit, JIT_RELOC_IMMEDIATE, 0, _jit->pc.uc, pc_base, 0);
+ emit_immediate_reloc(_jit, r0, 0);
+
+ return w;
+}
+
+static jit_reloc_t
+mov_addr(jit_state_t * _jit, int32_t r0)
+{
+ return movi_from_immediate(_jit, r0);
+}
+
+static void
+ldr_c(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LB(r0, 0, r1));
+}
+
+static void
+ldr_uc(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LBU(r0, 0, r1));
+}
+
+static void
+ldr_s(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LH(r0, 0, r1));
+}
+
+static void
+ldr_us(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LHU(r0, 0, r1));
+}
+
+static void
+ldr_i(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LW(r0, 0, r1));
+}
+
+#if __WORDSIZE == 64
+static void
+ldr_ui(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LWU(r0, 0, r1));
+}
+
+static void
+ldr_l(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LD(r0, 0, r1));
+}
+#endif
+
+static void
+ldi_c(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _LB(r0, i0, rn(_ZERO)));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ ldr_c(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldi_uc(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _LBU(r0, i0, rn(_ZERO)));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ ldr_uc(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldi_s(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _LH(r0, i0, rn(_ZERO)));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ ldr_s(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldi_us(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _LHU(r0, i0, rn(_ZERO)));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ ldr_us(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldi_i(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _LW(r0, i0, rn(_ZERO)));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ ldr_i(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+#if __WORDSIZE == 64
+static void
+ldi_ui(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _LWU(r0, i0, rn(_ZERO)));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ ldr_ui(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldi_l(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _LD(r0, i0, rn(_ZERO)));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ ldr_l(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+#endif
+
+static void
+ldxr_c(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ em_wp(_jit, _WADDR(rn(t0), r1, r2));
+ ldr_c(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxi_c(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _LB(r0, i0, r1));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, rn(t0), r1, i0);
+ ldr_c(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_uc(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ em_wp(_jit, _WADDR(rn(t0), r1, r2));
+ ldr_uc(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxi_uc(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _LBU(r0, i0, r1));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, rn(t0), r1, i0);
+ ldr_uc(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_s(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ em_wp(_jit, _WADDR(rn(t0), r1, r2));
+ ldr_s(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxi_s(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _LH(r0, i0, r1));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, rn(t0), r1, i0);
+ ldr_s(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_us(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ em_wp(_jit, _WADDR(rn(t0), r1, r2));
+ ldr_us(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxi_us(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _LHU(r0, i0, r1));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, rn(t0), r1, i0);
+ ldr_us(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_i(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ em_wp(_jit, _WADDR(rn(t0), r1, r2));
+ ldr_i(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxi_i(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _LW(r0, i0, r1));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, rn(t0), r1, i0);
+ ldr_i(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+#if __WORDSIZE == 64
+static void
+ldxr_ui(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ em_wp(_jit, _WADDR(rn(t0), r1, r2));
+ ldr_ui(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxi_ui(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _LWU(r0, i0, r1));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, rn(t0), r1, i0);
+ ldr_ui(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_l(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ em_wp(_jit, _WADDR(rn(t0), r1, r2));
+ ldr_l(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxi_l(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _LD(r0, i0, r1));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, rn(t0), r1, i0);
+ ldr_l(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+#endif
+
+static void
+str_c(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _SB(r1, 0, r0));
+}
+
+static void
+str_s(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _SH(r1, 0, r0));
+}
+
+static void
+str_i(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _SW(r1, 0, r0));
+}
+
+#if __WORDSIZE == 64
+static void
+str_l(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _SD(r1, 0, r0));
+}
+#endif
+
+static void
+sti_c(jit_state_t * _jit, jit_word_t i0, int32_t r0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _SB(r0, i0, rn(_ZERO)));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ str_c(_jit, rn(t0), r0);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+sti_s(jit_state_t * _jit, jit_word_t i0, int32_t r0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _SH(r0, i0, rn(_ZERO)));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ str_s(_jit, rn(t0), r0);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+sti_i(jit_state_t * _jit, jit_word_t i0, int32_t r0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _SW(r0, i0, rn(_ZERO)));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ str_i(_jit, rn(t0), r0);
+ unget_temp_gpr(_jit);
+ }
+}
+
+#if __WORDSIZE == 64
+static void
+sti_l(jit_state_t * _jit, jit_word_t i0, int32_t r0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _SD(r0, i0, rn(_ZERO)));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ str_l(_jit, rn(t0), r0);
+ unget_temp_gpr(_jit);
+ }
+}
+#endif
+
+static void
+stxr_c(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ em_wp(_jit, _WADDR(rn(t0), r0, r1));
+ str_c(_jit, rn(t0), r2);
+ unget_temp_gpr(_jit);
+}
+
+static void
+stxi_c(jit_state_t * _jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _SB(r1, i0, r0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, rn(t0), r0, i0);
+ str_c(_jit, rn(t0), r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+stxr_s(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ em_wp(_jit, _WADDR(rn(t0), r0, r1));
+ str_s(_jit, rn(t0), r2);
+ unget_temp_gpr(_jit);
+}
+
+static void
+stxi_s(jit_state_t * _jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _SH(r1, i0, r0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, rn(t0), r0, i0);
+ str_s(_jit, rn(t0), r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+stxr_i(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ em_wp(_jit, _WADDR(rn(t0), r0, r1));
+ str_i(_jit, rn(t0), r2);
+ unget_temp_gpr(_jit);
+}
+
+static void
+stxi_i(jit_state_t * _jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _SW(r1, i0, r0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, rn(t0), r0, i0);
+ str_i(_jit, rn(t0), r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+#if __WORDSIZE == 64
+static void
+stxr_l(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ em_wp(_jit, _WADDR(rn(t0), r0, r1));
+ str_l(_jit, rn(t0), r2);
+ unget_temp_gpr(_jit);
+}
+
+static void
+stxi_l(jit_state_t * _jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _SD(r1, i0, r0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, rn(t0), r0, i0);
+ str_l(_jit, rn(t0), r1);
+ unget_temp_gpr(_jit);
+ }
+}
+#endif
+
+static void
+bswapr_us(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ rshi(_jit, rn(t0), r1, 8);
+ andi(_jit, r0, r1, 0xff);
+ andi(_jit, rn(t0), rn(t0), 0xff);
+ lshi(_jit, r0, r0, 8);
+ orr(_jit, r0, r0, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+bswapr_ui(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ jit_gpr_t t1 = get_temp_gpr(_jit);
+ jit_gpr_t t2 = get_temp_gpr(_jit);
+ rshi(_jit, rn(t0), r1, 24);
+ rshi(_jit, rn(t1), r1, 16);
+ rshi(_jit, rn(t2), r1, 8);
+ andi(_jit, rn(t0), rn(t0), 0xff);
+ andi(_jit, rn(t1), rn(t1), 0xff);
+ andi(_jit, rn(t2), rn(t2), 0xff);
+ andi(_jit, r0, r1, 0xff);
+ lshi(_jit, r0, r0, 24);
+ lshi(_jit, rn(t1), rn(t1), 8);
+ orr(_jit, r0, r0, rn(t0));
+ lshi(_jit, rn(t2), rn(t2), 16);
+ orr(_jit, r0, r0, rn(t1));
+ orr(_jit, r0, r0, rn(t2));
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+}
+
+#if __WORDSIZE == 64
+static void
+bswapr_ul(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ rshi_u(_jit, rn(t0), r1, 32);
+ bswapr_ui(_jit, r0, r1);
+ bswapr_ui(_jit, rn(t0), rn(t0));
+ lshi(_jit, r0, r0, 32);
+ orr(_jit, r0, r0, rn(t0));
+ unget_temp_gpr(_jit);
+}
+#endif
+
+static void
+extr_uc(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _ANDI(r0, r1, 0xff));
+}
+
+static void
+extr_us(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _ANDI(r0, r1, 0xffff));
+}
+
+static void
+extr_c(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ if (jit_mips2_p())
+ em_wp(_jit, _SEB(r0, r1));
+ else {
+ lshi(_jit, r0, r1, __WORDSIZE - 8);
+ rshi(_jit, r0, r0, __WORDSIZE - 8);
+ }
+}
+
+static void
+extr_s(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ if (jit_mips2_p())
+ em_wp(_jit, _SEH(r0, r1));
+ else {
+ lshi(_jit, r0, r1, __WORDSIZE - 16);
+ rshi(_jit, r0, r0, __WORDSIZE - 16);
+ }
+}
+
+#if __WORDSIZE == 64
+static void
+extr_i(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _SLL(r0, r1, 0));
+}
+
+static void
+extr_ui(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ lshi(_jit, r0, r1, 32);
+ rshi_u(_jit, r0, r0, 32);
+}
+#endif
+
+static jit_reloc_t
+bltr(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ em_wp(_jit, _SLT(rn(t0), r0, r1));
+ jit_reloc_t w = emit_jump(_jit, _BNE(rn(t0), rn(_ZERO), 0), _NOP(1));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bltr_u(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ em_wp(_jit, _SLTU(rn(t0), r0, r1));
+ jit_reloc_t w = emit_jump(_jit, _BNE(rn(t0), rn(_ZERO), 0), _NOP(1));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+blti(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ jit_reloc_t w = bltr(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+blti_u(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ jit_reloc_t w = bltr_u(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bler(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ return bger(_jit, r1, r0);
+}
+
+static jit_reloc_t
+bler_u(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ return bger_u(_jit, r1, r0);
+}
+
+static jit_reloc_t
+blei(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ jit_reloc_t w = bler(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+blei_u(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ jit_reloc_t w = bler_u(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+beqr(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_reloc_t w = emit_jump(_jit, _BEQ(r0, r1, 0), _NOP(1));
+ return w;
+}
+
+static jit_reloc_t
+beqi(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ jit_reloc_t w = beqr(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bger(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ em_wp(_jit, _SLT(rn(t0), r0, r1));
+ jit_reloc_t w = beqr(_jit, rn(t0), rn(_ZERO));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bger_u(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ em_wp(_jit, _SLTU(rn(t0), r0, r1));
+ jit_reloc_t w = beqr(_jit, rn(t0), rn(_ZERO));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bgei(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ jit_reloc_t w = bger(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bgei_u(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ jit_reloc_t w = bger_u(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bgtr(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ return bltr(_jit, r1, r0);
+}
+
+static jit_reloc_t
+bgtr_u(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ return bltr_u(_jit, r1, r0);
+}
+
+static jit_reloc_t
+bgti(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ jit_reloc_t w = bgtr(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bgti_u(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ jit_reloc_t w = bgtr_u(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bner(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_reloc_t w = emit_jump(_jit, _BNE(r0, r1, 0), _NOP(1));
+
+ return (w);
+}
+
+static jit_reloc_t
+bnei(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ jit_reloc_t w = bner(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static int32_t
+read_jmp_offset(uint32_t * loc)
+{
+ instr_t *i = (instr_t *) loc;
+ return i->I.i0 + 1;
+}
+
+static int32_t
+read_jcc_offset(uint32_t * loc)
+{
+ return read_jmp_offset(loc);
+}
+
+static void
+patch_jmp_offset(uint32_t * loc, ptrdiff_t offset)
+{
+ assert(simm16_p(offset - 1));
+ instr_t *i = (instr_t *) loc;
+ i->I.i0 = offset - 1;
+}
+
+static void
+patch_jcc_offset(uint32_t * loc, ptrdiff_t offset)
+{
+ patch_jmp_offset(loc, offset);
+}
+
+static void
+patch_veneer_jmp_offset(uint32_t * loc, ptrdiff_t offset)
+{
+ patch_jmp_offset(loc, offset);
+}
+
+static void
+patch_veneer(uint32_t * loc, jit_pointer_t addr)
+{
+ patch_immediate_reloc(loc, addr);
+}
+
+static void
+emit_veneer(jit_state_t * _jit, jit_pointer_t target)
+{
+ jit_pointer_t veneer = jit_address(_jit);
+ emit_immediate_reloc(_jit, rn(_AT), 1);
+
+ /* in some rare cases we can run into overflow in emit_immediate_reloc,
+ * and since patch_veneer uses patch_immediate_reloc it assumes all
+ * instructions are available */
+ if (!jit_has_overflow(_jit))
+ patch_veneer(veneer, target);
+
+ emit_u32(_jit, _JR(rn(_AT)));
+ /* branch delay slot */
+ emit_u32(_jit, _NOP(1));
+}
+
+static void
+patch_veneer_jcc_offset(uint32_t * loc, ptrdiff_t offset)
+{
+ patch_jcc_offset(loc, offset);
+}
+
+static int
+offset_in_jmp_range(ptrdiff_t offset, int flags)
+{
+ (void) flags;
+ return simm16_p(offset - 1);
+}
+
+static int
+offset_in_jcc_range(ptrdiff_t offset, int flags)
+{
+ return offset_in_jmp_range(offset, flags);
+}
+
+static uint32_t
+patch_jump(uint32_t inst, int32_t offset)
+{
+ instr_t i;
+ i.w = inst;
+ i.I.i0 = offset - 1;
+ return i.w;
+}
+
+static jit_reloc_t
+emit_jump(jit_state_t * _jit, uint32_t inst, uint32_t delay_slot)
+{
+ while (1) {
+ uint8_t *pc_base = _jit->pc.uc;
+ int32_t off = ((uint8_t *)jit_address(_jit)) - pc_base;
+ jit_reloc_t w =
+ jit_reloc(_jit, JIT_RELOC_JMP_WITH_VENEER, 0, _jit->pc.uc,
+ pc_base,
+ 2);
+ uint8_t jump_width = 16;
+
+ if (add_pending_literal(_jit, w, jump_width - 1)) {
+ emit_u32(_jit, patch_jump(inst, off >> 2));
+ em_wp(_jit, delay_slot);
+ return w;
+ }
+ }
+}
+
+static void
+jmpr(jit_state_t * _jit, int32_t r0)
+{
+ emit_u32(_jit, _JR(r0));
+ em_wp(_jit, _NOP(1));
+}
+
+static void
+jmpi_with_link(jit_state_t * _jit, jit_word_t i0)
+{
+ calli(_jit, i0);
+}
+
+static void
+jmpi(jit_state_t * _jit, jit_word_t i0)
+{
+ if (((_jit->pc.w + sizeof(int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) {
+ emit_u32(_jit, _J((i0 & ~0xf0000000) >> 2));
+ em_wp(_jit, _NOP(1));
+ } else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ jmpr(_jit, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+pop_link_register(jit_state_t * _jit)
+{
+ (void) _jit; /* unused */
+}
+
+static void
+push_link_register(jit_state_t * _jit)
+{
+ (void) _jit; /* unused */
+}
+
+static jit_reloc_t
+jmp(jit_state_t * _jit)
+{
+ /*
+ * BEQ works as unconditional jump in this case, J and the like aren't
+ * PC relative
+ */
+ jit_reloc_t w = emit_jump(_jit, _BEQ(rn(_ZERO), rn(_ZERO), 0), _NOP(1));
+
+ return (w);
+}
+
+static jit_reloc_t
+boaddr(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ /*
+ * t1 = r0 + r1; overflow = r1 < 0 ? r0 < t1 : t1 < r0
+ */
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ jit_gpr_t t1 = get_temp_gpr(_jit);
+ jit_gpr_t t2 = get_temp_gpr(_jit);
+
+ em_wp(_jit, _SLT(rn(t0), r1, rn(_ZERO)));
+
+ addr(_jit, rn(t1), r0, r1);
+
+ em_wp(_jit, _SLT(rn(t2), rn(t1), r0));
+ em_wp(_jit, _SLT(rn(t1), r0, rn(t1)));
+ em_wp(_jit, _MOVZ(rn(t1), rn(t2), rn(t0)));
+ jit_reloc_t w = emit_jump(_jit, _BNE(rn(_ZERO), rn(t1), 0), _WADDR(r0, r0, r1));
+
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+boaddi(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ jit_reloc_t w = boaddr(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+boaddr_u(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ jit_gpr_t t1 = get_temp_gpr(_jit);
+
+ addr(_jit, rn(t0), r0, r1);
+
+ em_wp(_jit, _SLTU(rn(t1), rn(t0), r0));
+ jit_reloc_t w = emit_jump(_jit, _BNE(rn(_ZERO), rn(t1), 0), _MOVR(r0, rn(t0)));
+
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+boaddi_u(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ jit_reloc_t w = boaddr_u(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bxaddr(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ /*
+ * t1 = r0 + r1; overflow = r1 < 0 ? r0 < t1 : t1 < r0
+ */
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ jit_gpr_t t1 = get_temp_gpr(_jit);
+ jit_gpr_t t2 = get_temp_gpr(_jit);
+
+ addr(_jit, rn(t0), r0, r1);
+ em_wp(_jit, _SLTI(rn(t1), r1, 0));
+ em_wp(_jit, _SLT(rn(t2), rn(t0), r0));
+ jit_reloc_t w = emit_jump(_jit, _BEQ(rn(t1), rn(t2), 0), _MOVR(r0, rn(t0)));
+
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bxaddi(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ jit_reloc_t w = bxaddr(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bxaddr_u(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ jit_gpr_t t1 = get_temp_gpr(_jit);
+
+ addr(_jit, rn(t0), r0, r1);
+
+ em_wp(_jit, _SLTU(rn(t1), rn(t0), r0));
+ jit_reloc_t w = emit_jump(_jit, _BEQ(rn(_ZERO), rn(t1), 0), _MOVR(r0, rn(t0)));
+
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bxaddi_u(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ jit_reloc_t w = bxaddr_u(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bosubr(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ /*
+ * t1 = r0 - r1; overflow = 0 < r1 ? r0 < t1 : t1 < r0
+ */
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ jit_gpr_t t1 = get_temp_gpr(_jit);
+ jit_gpr_t t2 = get_temp_gpr(_jit);
+
+ subr(_jit, rn(t0), r0, r1);
+
+ em_wp(_jit, _SLTI(rn(t1), r1, 0));
+ em_wp(_jit, _SLT(rn(t2), r0, rn(t0)));
+ jit_reloc_t w = emit_jump(_jit, _BNE(rn(t1), rn(t2), 0), _MOVR(r0, rn(t0)));
+
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bosubi(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ jit_reloc_t w = bosubr(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bosubr_u(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ jit_gpr_t t1 = get_temp_gpr(_jit);
+
+ subr(_jit, rn(t0), r0, r1);
+
+ em_wp(_jit, _SLTU(rn(t1), r0, rn(t0)));
+ jit_reloc_t w = emit_jump(_jit, _BNE(rn(_ZERO), rn(t1), 0), _MOVR(r0, rn(t0)));
+
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bosubi_u(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ jit_reloc_t w = bosubr_u(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bxsubr(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ jit_gpr_t t1 = get_temp_gpr(_jit);
+ jit_gpr_t t2 = get_temp_gpr(_jit);
+
+ subr(_jit, rn(t0), r0, r1);
+
+ em_wp(_jit, _SLTI(rn(t1), r1, 0));
+ em_wp(_jit, _SLT(rn(t2), r0, rn(t0)));
+ jit_reloc_t w = emit_jump(_jit, _BEQ(rn(t1), rn(t2), 0), _MOVR(r0, rn(t0)));
+
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bxsubi(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ jit_reloc_t w = bxsubr(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bxsubr_u(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ jit_gpr_t t1 = get_temp_gpr(_jit);
+
+ subr(_jit, rn(t0), r0, r1);
+
+ em_wp(_jit, _SLTU(rn(t1), r0, rn(t0)));
+ jit_reloc_t w = emit_jump(_jit, _BEQ(rn(_ZERO), rn(t1), 0), _MOVR(r0, rn(t0)));
+
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bxsubi_u(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ jit_reloc_t w = bxsubr_u(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bmsr(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ andr(_jit, rn(t0), r0, r1);
+ jit_reloc_t w = bner(_jit, rn(t0), rn(_ZERO));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bmsi(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ andi(_jit, rn(t0), r0, i0);
+ jit_reloc_t w = bner(_jit, rn(t0), rn(_ZERO));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bmcr(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ andr(_jit, rn(t0), r0, r1);
+ jit_reloc_t w = beqr(_jit, rn(t0), rn(_ZERO));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static jit_reloc_t
+bmci(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ andi(_jit, rn(t0), r0, i0);
+ jit_reloc_t w = beqr(_jit, rn(t0), rn(_ZERO));
+ unget_temp_gpr(_jit);
+
+ return (w);
+}
+
+static void
+callr(jit_state_t * _jit, int32_t r0)
+{
+ if (r0 != rn(_T9))
+ movr(_jit, rn(_T9), r0);
+
+ emit_u32(_jit, _JALR(rn(_RA), rn(_T9)));
+ em_wp(_jit, _NOP(1));
+}
+
+static void
+calli(jit_state_t * _jit, jit_word_t i0)
+{
+ movi(_jit, rn(_T9), i0);
+ emit_u32(_jit, _JALR(rn(_RA), rn(_T9)));
+ em_wp(_jit, _NOP(1));
+}
+
+static void
+ret(jit_state_t * _jit)
+{
+ emit_u32(_jit, _JR(rn(_RA)));
+ em_wp(_jit, _NOP(1));
+}
+
+static void
+retr(jit_state_t * _jit, int32_t r0)
+{
+ if (r0 != rn(_V0))
+ movr(_jit, rn(_V0), r0);
+
+ ret(_jit);
+}
+
+static void
+reti(jit_state_t * _jit, jit_word_t i0)
+{
+ movi(_jit, rn(_V0), i0);
+ ret(_jit);
+}
+
+static void
+retval_c(jit_state_t * _jit, int32_t r0)
+{
+ extr_c(_jit, r0, rn(_V0));
+}
+
+static void
+retval_uc(jit_state_t * _jit, int32_t r0)
+{
+ extr_uc(_jit, r0, rn(_V0));
+}
+
+static void
+retval_s(jit_state_t * _jit, int32_t r0)
+{
+ extr_s(_jit, r0, rn(_V0));
+}
+
+static void
+retval_us(jit_state_t * _jit, int32_t r0)
+{
+ extr_us(_jit, r0, rn(_V0));
+}
+
+static void
+retval_i(jit_state_t * _jit, int32_t r0)
+{
+#if __WORDSIZE == 64
+ extr_i(_jit, r0, rn(_V0));
+#else
+ if (r0 != rn(_V0))
+ movr(_jit, r0, rn(_V0));
+#endif
+}
+
+#if __WORDSIZE == 64
+static void
+retval_ui(jit_state_t * _jit, int32_t r0)
+{
+ extr_ui(_jit, r0, rn(_V0));
+}
+
+static void
+retval_l(jit_state_t * _jit, int32_t r0)
+{
+ if (r0 != rn(_V0))
+ movr(_jit, r0, rn(_V0));
+}
+#endif
+
+static void
+ldr_atomic(jit_state_t * _jit, int32_t dst, int32_t loc)
+{
+ em_wp(_jit, _SYNC(0x00));
+#if __WORDSIZE == 64
+ ldr_l(_jit, dst, loc);
+#else
+ ldr_i(_jit, dst, loc);
+#endif
+ em_wp(_jit, _SYNC(0x00));
+}
+
+static void
+str_atomic(jit_state_t * _jit, int32_t loc, int32_t val)
+{
+ em_wp(_jit, _SYNC(0x00));
+#if __WORDSIZE == 64
+ str_l(_jit, loc, val);
+#else
+ str_i(_jit, loc, val);
+#endif
+ em_wp(_jit, _SYNC(0x00));
+}
+
+static void
+swap_atomic(jit_state_t * _jit, int32_t dst, int32_t loc, int32_t val)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ jit_gpr_t t1 = loc == dst ? get_temp_gpr(_jit) : JIT_GPR(loc);
+ jit_gpr_t t2 = val == dst ? get_temp_gpr(_jit) : JIT_GPR(val);
+
+ movr(_jit, rn(t1), loc);
+ movr(_jit, rn(t2), val);
+ em_wp(_jit, _SYNC(0x00));
+
+ void *retry = jit_address(_jit);
+ movr(_jit, rn(t0), rn(t2));
+ em_wp(_jit, _WLL(dst, 0, rn(t1)));
+ em_wp(_jit, _WSC(rn(t0), 0, rn(t1)));
+ jit_patch_there(_jit, beqr(_jit, rn(t0), rn(_ZERO)), retry);
+
+ em_wp(_jit, _SYNC(0x00));
+
+ if (val == dst) unget_temp_gpr(_jit);
+ if (loc == dst) unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+}
+
+static void
+cas_atomic(jit_state_t * _jit, int32_t dst, int32_t loc, int32_t expected,
+ int32_t desired)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ jit_gpr_t t1 = get_temp_gpr(_jit);
+
+ em_wp(_jit, _SYNC(0x00));
+ void *retry = jit_address(_jit);
+
+ movr(_jit, rn(t1), desired);
+ em_wp(_jit, _WLL(rn(t0), 0, loc));
+ jit_reloc_t fail = bner(_jit, rn(t0), expected);
+ em_wp(_jit, _WSC(rn(t1), 0, loc));
+
+ jit_patch_there(_jit, beqr(_jit, rn(t1), rn(_ZERO)), retry);
+ jit_patch_here(_jit, fail);
+ em_wp(_jit, _SYNC(0x00));
+
+ movr(_jit, dst, rn(t0));
+
+ unget_temp_gpr(_jit);
+ unget_temp_gpr(_jit);
+}
+
+static void
+nop(jit_state_t * _jit, int32_t i0)
+{
+ for (; i0 > 0; i0 -= 4)
+ em_wp(_jit, _NOP());
+
+ assert(i0 == 0);
+}
+
+static void
+breakpoint(jit_state_t * _jit)
+{
+ /*
+ * interesting, Linux on qemu-system-mips64el 6.1.0 crashes when
+ * executing a breakpoint?
+ */
+ em_wp(_jit, _SDBBP());
+}
diff --git a/deps/lightening/lightening/mips-fpu.c b/deps/lightening/lightening/mips-fpu.c
new file mode 100644
index 0000000..01c24cb
--- /dev/null
+++ b/deps/lightening/lightening/mips-fpu.c
@@ -0,0 +1,1015 @@
+/*
+ * Copyright (C) 2012-2017 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+#define FMT_S 0x10 /* float32 */
+#define FMT_D 0x11 /* float64 */
+#define FMT_W 0x14 /* int32 */
+#define FMT_L 0x15 /* int64 */
+#define FMT_P 0x16 /* 2 x float32 */
+#define FMT_S_PU 0x20
+#define FMT_S_PL 0x26
+#define FUN_ADD 0x00
+#define FUN_LWXC1 0x00
+#define FUN_SUB 0x01
+#define FUN_LDXC1 0x01
+#define FUN_MUL 0x02
+#define FUN_DIV 0x03
+#define FUN_SQRT 0x04
+#define FUN_ABS 0x05
+#define FUN_LUXC1 0x05
+#define FUN_MOV 0x06
+#define FUN_NEG 0x07
+#define FUN_SWXC1 0x08
+#define FUN_ROUND_L 0x08
+#define FUN_TRUNC_L 0x09
+#define FUN_SDXC1 0x09
+#define FUN_CEIL_L 0x0a
+#define FUN_FLOOR_L 0x0b
+#define FUN_ROUND_W 0x0c
+#define FUN_TRUNC_W 0x0d
+#define FUN_SUXC1 0x0d
+#define FUN_CEIL_W 0x0e
+#define FUN_FLOOR_W 0x0f
+#define FUN_RECIP 0x15
+#define FUN_RSQRT 0x16
+#define FUN_ALNV_P 0x1e
+#define FUN_CVT_S 0x20
+#define FUN_CVT_D 0x21
+#define FUN_CVT_W 0x24
+#define FUN_CVT_L 0x25
+#define FUN_PLL 0x2c
+#define FUN_PLU 0x2d
+#define FUN_PUL 0x2e
+#define FUN_PUU 0x2f
+#define FUN_MADD_S (0x20 | FMT_S)
+#define FUN_MADD_D (0x20 | FMT_D)
+#define FUN_MADD_P (0x20 | FMT_P)
+#define FUN_MSUB_S (0x28 | FMT_S)
+#define FUN_MSUB_D (0x28 | FMT_D)
+#define FUN_MSUB_P (0x28 | FMT_P)
+#define FUN_NMADD_S (0x30 | FMT_S)
+#define FUN_NMADD_D (0x30 | FMT_D)
+#define FUN_NMADD_P (0x30 | FMT_P)
+#define FUN_NMSUB_S (0x38 | FMT_S)
+#define FUN_NMSUB_D (0x38 | FMT_D)
+#define FUN_NMSUB_P (0x38 | FMT_P)
+#define COND_F 0x30
+#define COND_UN 0x31
+#define COND_EQ 0x32
+#define COND_UEQ 0x33
+#define COND_OLT 0x34
+#define COND_ULT 0x35
+#define COND_OLE 0x36
+#define COND_ULE 0x37
+#define COND_SF 0x38
+#define COND_NGLE 0x39
+#define COND_SEQ 0x3a
+#define COND_NGL 0x3b
+#define COND_LT 0x3c
+#define COND_NGE 0x3d
+#define COND_LE 0x3e
+#define COND_UGT 0x3f
+
+#define _ADD_S(fd, fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, fd, FUN_ADD)
+#define _ADD_D(fd, fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, fd, FUN_ADD)
+#define _SUB_S(fd, fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, fd, FUN_SUB)
+#define _SUB_D(fd, fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, fd, FUN_SUB)
+#define _MUL_S(fd, fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, fd, FUN_MUL)
+#define _MUL_D(fd, fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, fd, FUN_MUL)
+#define _DIV_S(fd, fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, fd, FUN_DIV)
+#define _DIV_D(fd, fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, fd, FUN_DIV)
+#define _ABS_S(fd, fs) Rtype(OP_COP1, FMT_S, 00, fs, fd, FUN_ABS)
+#define _ABS_D(fd, fs) Rtype(OP_COP1, FMT_D, 00, fs, fd, FUN_ABS)
+#define _NEG_S(fd, fs) Rtype(OP_COP1, FMT_S, 00, fs, fd, FUN_NEG)
+#define _NEG_D(fd, fs) Rtype(OP_COP1, FMT_D, 00, fs, fd, FUN_NEG)
+#define _SQRT_S(fd, fs) Rtype(OP_COP1, FMT_S, 00, fs, fd, FUN_SQRT)
+#define _SQRT_D(fd, fs) Rtype(OP_COP1, FMT_D, 00, fs, fd, FUN_SQRT)
+#define _MFC1(rt, fs) Rtype(OP_COP1, OP_MF, rt, fs, 00, 00)
+#define _MFHC1(rt, fs) Rtype(OP_COP1, OP_MFH, rt, fs, 00, 00)
+#define _MTC1(rt, fs) Rtype(OP_COP1, OP_MT, rt, fs, 00, 00)
+#define _MTHC1(rt, fs) Rtype(OP_COP1, OP_MTH, rt, fs, 00, 00)
+#define _DMFC1(rt, fs) Rtype(OP_COP1, OP_DMF, rt, fs, 00, 00)
+#define _DMTC1(rt, fs) Rtype(OP_COP1, OP_DMT, rt, fs, 00, 00)
+#define _CVT_D_S(fd, fs) Rtype(OP_COP1, FMT_S, 0, fs, fd, FUN_CVT_D)
+#define _CVT_D_W(fd, fs) Rtype(OP_COP1, FMT_W, 0, fs, fd, FUN_CVT_D)
+#define _CVT_D_L(fd, fs) Rtype(OP_COP1, FMT_L, 0, fs, fd, FUN_CVT_D)
+#define _CVT_L_S(fd, fs) Rtype(OP_COP1, FMT_S, 0, fs, fd, FUN_CVT_L)
+#define _CVT_L_D(fd, fs) Rtype(OP_COP1, FMT_D, 0, fs, fd, FUN_CVT_L)
+#define _CVT_P_S(fd, fs) Rtype(OP_COP1, FMT_S, 0, fs, fd, FUN_CVT_P)
+#define _CVT_S_D(fd, fs) Rtype(OP_COP1, FMT_D, 0, fs, fd, FUN_CVT_S)
+#define _CVT_S_W(fd, fs) Rtype(OP_COP1, FMT_W, 0, fs, fd, FUN_CVT_S)
+#define _CVT_S_L(fd, fs) Rtype(OP_COP1, FMT_L, 0, fs, fd, FUN_CVT_S)
+#define _CVT_S_PL(fd, fs) Rtype(OP_COP1, FMT_P, 0, fs, fd, FUN_CVT_S_PL)
+#define _CVT_S_PU(fd, fs) Rtype(OP_COP1, FMT_P, 0, fs, fd, FUN_CVT_S_PU)
+#define _CVT_W_S(fd, fs) Rtype(OP_COP1, FMT_S, 0, fs, fd, FUN_CVT_W)
+#define _CVT_W_D(fd, fs) Rtype(OP_COP1, FMT_D, 0, fs, fd, FUN_CVT_W)
+#define _TRUNC_L_S(fd, fs) Rtype(OP_COP1, FMT_S, 0, fs, fd, FUN_TRUNC_L)
+#define _TRUNC_L_D(fd, fs) Rtype(OP_COP1, FMT_D, 0, fs, fd, FUN_TRUNC_L)
+#define _TRUNC_W_S(fd, fs) Rtype(OP_COP1, FMT_S, 0, fs, fd, FUN_TRUNC_W)
+#define _TRUNC_W_D(fd, fs) Rtype(OP_COP1, FMT_D, 0, fs, fd, FUN_TRUNC_W)
+#define _LWC1(rt, of, rb) Itype(OP_LWC1, rb, rt, of)
+#define _SWC1(rt, of, rb) Itype(OP_SWC1, rb, rt, of)
+#define _LDC1(rt, of, rb) Itype(OP_LDC1, rb, rt, of)
+#define _SDC1(rt, of, rb) Itype(OP_SDC1, rb, rt, of)
+#define _MOV_S(fd, fs) Rtype(OP_COP1, FMT_S, 0, fs, fd, FUN_MOV)
+#define _MOV_D(fd, fs) Rtype(OP_COP1, FMT_D, 0, fs, fd, FUN_MOV)
+#define _BC1F(im) Itype(OP_COP1, OP_BC, OP_BCF, im)
+#define _BC1T(im) Itype(OP_COP1, OP_BC, OP_BCT, im)
+#define _C_F_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_F)
+#define _C_F_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_F)
+#define _C_F_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_F)
+#define _C_UN_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_UN)
+#define _C_UN_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_UN)
+#define _C_UN_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_UN)
+#define _C_EQ_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_EQ)
+#define _C_EQ_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_EQ)
+#define _C_EQ_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_EQ)
+#define _C_UEQ_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_UEQ)
+#define _C_UEQ_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_UEQ)
+#define _C_UEQ_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_UEQ)
+#define _C_OLT_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_OLT)
+#define _C_OLT_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_OLT)
+#define _C_OLT_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_OLT)
+#define _C_ULT_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_ULT)
+#define _C_ULT_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_ULT)
+#define _C_ULT_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_ULT)
+#define _C_OLE_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_OLE)
+#define _C_OLE_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_OLE)
+#define _C_OLE_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_OLE)
+#define _C_ULE_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_ULE)
+#define _C_ULE_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_ULE)
+#define _C_ULE_P(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_ULE)
+#define _C_SF_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_SF)
+#define _C_SF_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_SF)
+#define _C_SF_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_SF)
+#define _C_NGLE_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_NGLE)
+#define _C_NGLE_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_NGLE)
+#define _C_NGLE_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_NGLE)
+#define _C_SEQ_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_SEQ)
+#define _C_SEQ_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_SEQ)
+#define _C_SEQ_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_SEQ)
+#define _C_NGL_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_NGL)
+#define _C_NGL_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_NGL)
+#define _C_NGL_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_NGL)
+#define _C_NLT_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_NLT)
+#define _C_NLT_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_NLT)
+#define _C_NLT_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_NLT)
+#define _C_NGE_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_NGE)
+#define _C_NGE_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_NGE)
+#define _C_NGE_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_NGE)
+#define _C_NLE_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_NLE)
+#define _C_NLE_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_NLE)
+#define _C_NLE_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_NLE)
+#define _C_UGT_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_UGT)
+#define _C_UGT_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_UGT)
+#define _C_UGT_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_UGT)
+
+#define fn(x) jit_fpr_regno(x)
+
+static void addr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void addr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void subr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void subr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void mulr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void mulr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void divr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void divr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+
+static void sqrtr_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void sqrtr_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void negr_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void negr_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void absr_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void absr_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+
+static void movr_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void movr_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+#if JIT_PASS_DOUBLES_IN_GPR_PAIRS
+static void movr_d_ww(jit_state_t * _jit, int32_t r0, int32_t r1,
+ int32_t r2);
+static void movr_ww_d(jit_state_t * _jit, int32_t r0, int32_t r1,
+ int32_t r2);
+static void stxi_ww(jit_state_t * _jit, jit_word_t o0, int32_t r0,
+ int32_t r1, int32_t r2);
+static void ldxi_ww(jit_state_t * _jit, int32_t r0, int32_t r1,
+ int32_t r2, jit_word_t o0);
+#endif
+#if JIT_PASS_FLOATS_IN_GPRS
+static void movr_f_w(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void movr_w_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void stxi_w(jit_state_t * _jit, jit_word_t o0, int32_t r0,
+ int32_t r1);
+static void ldxi_w(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t o0);
+#endif
+
+static void retval_f(jit_state_t * _jit, int32_t r0);
+static void retval_d(jit_state_t * _jit, int32_t r0);
+static void retr_f(jit_state_t * _jit, int32_t u);
+static void retr_d(jit_state_t * _jit, int32_t u);
+
+static void ldr_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void ldr_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void ldi_f(jit_state_t * _jit, int32_t r0, jit_word_t i0);
+static void ldxr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void ldxi_f(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+static void ldi_d(jit_state_t * _jit, int32_t r0, jit_word_t i0);
+static void ldxr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void ldxi_d(jit_state_t * _jit, int32_t r0, int32_t r1,
+ jit_word_t i0);
+
+static void str_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void str_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void sti_f(jit_state_t * _jit, jit_word_t i0, int32_t r0);
+static void stxr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void stxi_f(jit_state_t * _jit, jit_word_t i0, int32_t r0,
+ int32_t r1);
+static void sti_d(jit_state_t * _jit, jit_word_t i0, int32_t r0);
+static void stxr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2);
+static void stxi_d(jit_state_t * _jit, jit_word_t i0, int32_t r0,
+ int32_t r1);
+
+static jit_reloc_t bltr_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bler_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t beqr_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bger_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bgtr_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bner_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bunltr_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bunler_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t buneqr_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bunger_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bungtr_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bltgtr_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bordr_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bunordr_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bltr_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bler_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t beqr_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bger_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bgtr_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bner_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bunltr_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bunler_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t buneqr_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bunger_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bungtr_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bltgtr_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bordr_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+static jit_reloc_t bunordr_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+
+static void truncr_f_i(jit_state_t * _jit, int32_t r0, int32_t i0);
+static void truncr_d_i(jit_state_t * _jit, int32_t r0, int32_t i0);
+#if __WORDSIZE == 64
+static void truncr_f_l(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void truncr_d_l(jit_state_t * _jit, int32_t r0, int32_t r1);
+#endif
+
+static void extr_f(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void extr_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void extr_f_d(jit_state_t * _jit, int32_t r0, int32_t r1);
+static void extr_d_f(jit_state_t * _jit, int32_t t0, int32_t r1);
+
+static void movi_f(jit_state_t * _jit, int32_t r0, jit_float32_t i0);
+static void movi_d(jit_state_t * _jit, int32_t r0, jit_float64_t i0);
+
+static void
+addr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _ADD_S(r0, r1, r2));
+}
+
+static void
+addr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _ADD_D(r0, r1, r2));
+}
+
+static void
+subr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _SUB_S(r0, r1, r2));
+}
+
+static void
+subr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _SUB_D(r0, r1, r2));
+}
+
+static void
+mulr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _MUL_S(r0, r1, r2));
+}
+
+static void
+mulr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _MUL_D(r0, r1, r2));
+}
+
+static void
+divr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _DIV_S(r0, r1, r2));
+}
+
+static void
+divr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _DIV_D(r0, r1, r2));
+}
+
+static void
+sqrtr_f(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _SQRT_S(r0, r1));
+}
+
+static void
+sqrtr_d(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _SQRT_D(r0, r1));
+}
+
+static void
+negr_f(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _NEG_S(r0, r1));
+}
+
+static void
+negr_d(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _NEG_D(r0, r1));
+}
+
+static void
+absr_f(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _ABS_S(r0, r1));
+}
+
+static void
+absr_d(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _ABS_D(r0, r1));
+}
+
+static void
+extr_f(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_fpr_t t0 = get_temp_fpr(_jit);
+#if __WORDSIZE == 32
+ em_wp(_jit, _MTC1(r1, fn(t0)));
+ em_wp(_jit, _CVT_S_W(r0, fn(t0)));
+#else
+ em_wp(_jit, _DMTC1(r1, fn(t0)));
+ em_wp(_jit, _CVT_S_L(r0, fn(t0)));
+#endif
+ unget_temp_fpr(_jit);
+}
+
+static void
+truncr_f_i(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_fpr_t t0 = get_temp_fpr(_jit);
+ em_wp(_jit, _TRUNC_W_S(fn(t0), r1));
+ em_wp(_jit, _MFC1(r0, fn(t0)));
+ unget_temp_fpr(_jit);
+}
+
+#if __WORDSIZE == 64
+static void
+truncr_f_l(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_fpr_t t0 = get_temp_fpr(_jit);
+ em_wp(_jit, _TRUNC_L_S(fn(t0), r1));
+ em_wp(_jit, _DMFC1(r0, fn(t0)));
+ unget_temp_fpr(_jit);
+}
+#endif
+
+static void
+ldi_f(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _LWC1(r0, i0, rn(_ZERO)));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ ldr_f(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addr(_jit, rn(t0), r1, r2);
+ ldr_f(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxi_f(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _LWC1(r0, i0, r1));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, rn(t0), r1, i0);
+ ldr_f(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+str_f(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _SWC1(r1, 0, r0));
+}
+
+static void
+sti_f(jit_state_t * _jit, jit_word_t i0, int32_t r0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _SWC1(r0, i0, rn(_ZERO)));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ str_f(_jit, rn(t0), r0);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+stxr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addr(_jit, rn(t0), r0, r1);
+ str_f(_jit, rn(t0), r2);
+ unget_temp_gpr(_jit);
+}
+
+static void
+stxi_f(jit_state_t * _jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _SWC1(r1, i0, r0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, rn(t0), r0, i0);
+ str_f(_jit, rn(t0), r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+movr_f(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ if (r0 != r1)
+ em_wp(_jit, _MOV_S(r0, r1));
+}
+
+static void
+movi_f(jit_state_t * _jit, int32_t r0, jit_float32_t i0)
+{
+ union {
+ int32_t i;
+ jit_float32_t f;
+ } u = {.f = i0};
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), u.i);
+ em_wp(_jit, _MTC1(rn(t0), r0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+extr_f_d(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _CVT_D_S(r0, r1));
+}
+
+static void
+extr_d_f(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _CVT_S_D(r0, r1));
+}
+
+static void
+extr_d(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_fpr_t t0 = get_temp_fpr(_jit);
+#if __WORDSIZE == 32
+ em_wp(_jit, _MTC1(r1, fn(t0)));
+ em_wp(_jit, _CVT_D_W(r0, fn(t0)));
+#else
+ em_wp(_jit, _DMTC1(r1, fn(t0)));
+ em_wp(_jit, _CVT_D_L(r0, fn(t0)));
+#endif
+ unget_temp_fpr(_jit);
+}
+
+static void
+truncr_d_i(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_fpr_t t0 = get_temp_fpr(_jit);
+ em_wp(_jit, _TRUNC_W_D(fn(t0), r1));
+ em_wp(_jit, _MFC1(r0, fn(t0)));
+ unget_temp_fpr(_jit);
+}
+
+#if __WORDSIZE == 64
+static void
+truncr_d_l(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ jit_fpr_t t0 = get_temp_fpr(_jit);
+ em_wp(_jit, _TRUNC_L_D(fn(t0), r1));
+ em_wp(_jit, _DMFC1(r0, fn(t0)));
+ unget_temp_fpr(_jit);
+}
+#endif
+
+static void
+ldr_f(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LWC1(r0, 0, r1));
+}
+
+static void
+ldr_d(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LDC1(r0, 0, r1));
+}
+
+static void
+ldi_d(jit_state_t * _jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _LDC1(r0, i0, rn(_ZERO)));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ em_wp(_jit, _LDC1(r0, 0, rn(t0)));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addr(_jit, rn(t0), r1, r2);
+ ldr_d(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldxi_d(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _LDC1(r0, i0, r1));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, rn(t0), r1, i0);
+ ldr_d(_jit, r0, rn(t0));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+str_d(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _SDC1(r1, 0, r0));
+}
+
+static void
+sti_d(jit_state_t * _jit, jit_word_t i0, int32_t r0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _SDC1(r0, i0, rn(_ZERO)));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ movi(_jit, rn(t0), i0);
+ str_d(_jit, rn(t0), r0);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+stxr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addr(_jit, rn(t0), r0, r1);
+ str_d(_jit, rn(t0), r2);
+ unget_temp_gpr(_jit);
+}
+
+static void
+stxi_d(jit_state_t * _jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _SDC1(r1, i0, r0));
+ else {
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ addi(_jit, rn(t0), r0, i0);
+ str_d(_jit, rn(t0), r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+movr_d(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ if (r0 != r1)
+ em_wp(_jit, _MOV_D(r0, r1));
+}
+
+#if JIT_PASS_DOUBLES_IN_GPR_PAIRS
+static void
+movr_d_ww(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ assert(r1 == r2 - 1);
+ em_wp(_jit, _MTC1(r1, r0));
+ em_wp(_jit, _MTHC1(r2, r0));
+}
+
+static void
+movr_ww_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ assert(r0 == r1 - 1);
+ em_wp(_jit, _MFC1(r0, r2));
+ em_wp(_jit, _MFHC1(r1, r2));
+}
+
+static void
+stxi_ww(jit_state_t * _jit, jit_word_t o0, int32_t r0, int32_t r1,
+ int32_t r2)
+{
+ jit_fpr_t t0 = get_temp_fpr(_jit);
+ movr_d_ww(_jit, fn(t0), r1, r2);
+ stxi_d(_jit, o0, r0, fn(t0));
+ unget_temp_fpr(_jit);
+}
+
+static void
+ldxi_ww(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2,
+ jit_word_t o0)
+{
+ jit_fpr_t t0 = get_temp_fpr(_jit);
+ ldxi_d(_jit, fn(t0), r2, o0);
+ movr_ww_d(_jit, r0, r1, fn(t0));
+ unget_temp_fpr(_jit);
+}
+#endif
+
+#if JIT_PASS_FLOATS_IN_GPRS
+static void
+movr_f_w(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _MTC1(r1, r0));
+}
+
+static void
+movr_w_f(jit_state_t * _jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _MFC1(r0, r1));
+}
+
+static void
+stxi_w(jit_state_t * _jit, jit_word_t o0, int32_t r0, int32_t r1)
+{
+ jit_fpr_t t0 = get_temp_fpr(_jit);
+ movr_f_w(_jit, fn(t0), r1);
+ stxi_f(_jit, o0, r0, fn(t0));
+ unget_temp_fpr(_jit);
+}
+
+static void
+ldxi_w(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t o0)
+{
+ jit_fpr_t t0 = get_temp_fpr(_jit);
+ ldxi_f(_jit, fn(t0), r1, o0);
+ movr_w_f(_jit, r0, fn(t0));
+ unget_temp_fpr(_jit);
+}
+#endif
+
+static void
+movi_d(jit_state_t * _jit, int32_t r0, jit_float64_t i0)
+{
+ union {
+ int64_t l;
+ struct {
+ int32_t l;
+ int32_t h;
+ } i;
+ jit_float64_t d;
+ } data = {.d = i0};
+
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+#if __WORDSIZE == 64
+ movi(_jit, rn(t0), data.l);
+ em_wp(_jit, _DMTC1(rn(t0), r0));
+#else
+ if (data.i.l) {
+ movi(_jit, rn(t0), data.i.l);
+ em_wp(_jit, _MTC1(rn(t0), r0));
+ } else
+ em_wp(_jit, _MTC1(rn(_ZERO), r0));
+
+ if (data.i.h) {
+ movi(_jit, rn(t0), data.i.h);
+ em_wp(_jit, _MTHC1(rn(t0), r0));
+ } else
+ em_wp(_jit, _MTHC1(rn(_ZERO), r0));
+#endif
+ unget_temp_gpr(_jit);
+}
+
+static void
+retval_f(jit_state_t * _jit, int32_t r0)
+{
+ if (fn(_F0) != r0)
+ movr_f(_jit, r0, fn(_F0));
+}
+
+static void
+retval_d(jit_state_t * _jit, int32_t r0)
+{
+ if (fn(_F0) != r0)
+ movr_d(_jit, r0, fn(_F0));
+}
+
+static void
+retr_f(jit_state_t * _jit, int32_t r0)
+{
+ if (fn(_F0) != r0)
+ movr_f(_jit, fn(_F0), r0);
+
+ ret(_jit);
+}
+
+static void
+retr_d(jit_state_t * _jit, int32_t r0)
+{
+ if (fn(_F0) != r0)
+ movr_d(_jit, fn(_F0), r0);
+
+ ret(_jit);
+}
+
+static jit_reloc_t
+bct(jit_state_t * _jit)
+{
+ return emit_jump(_jit, _BC1T(0), _NOP(1));
+}
+
+static jit_reloc_t
+bcf(jit_state_t * _jit)
+{
+ return emit_jump(_jit, _BC1F(0), _NOP(1));
+}
+
+static jit_reloc_t
+bltr_f(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_OLT_S(r1, r2));
+ jit_reloc_t w = bct(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bler_f(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_OLE_S(r1, r2));
+ jit_reloc_t w = bct(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+beqr_f(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_EQ_S(r1, r2));
+ jit_reloc_t w = bct(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bger_f(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_ULT_S(r1, r2));
+ jit_reloc_t w = bcf(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bgtr_f(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_ULE_S(r1, r2));
+ jit_reloc_t w = bcf(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bner_f(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_EQ_S(r1, r2));
+ jit_reloc_t w = bcf(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bunltr_f(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_ULT_S(r1, r2));
+ jit_reloc_t w = bct(_jit);;
+ return (w);
+}
+
+static jit_reloc_t
+bunler_f(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_ULE_S(r1, r2));
+ jit_reloc_t w = bct(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+buneqr_f(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_UEQ_S(r1, r2));
+ jit_reloc_t w = bct(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bunger_f(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_OLT_S(r1, r2));
+ jit_reloc_t w = bcf(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bungtr_f(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_OLE_S(r1, r2));
+ jit_reloc_t w = bcf(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bltgtr_f(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_UEQ_S(r1, r2));
+ jit_reloc_t w = bcf(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bordr_f(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_UN_S(r1, r2));
+ jit_reloc_t w = bcf(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bunordr_f(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_UN_S(r1, r2));
+ jit_reloc_t w = bct(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bltr_d(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_OLT_D(r1, r2));
+ jit_reloc_t w = bct(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bler_d(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_OLE_D(r1, r2));
+ jit_reloc_t w = bct(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+beqr_d(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_EQ_D(r1, r2));
+ jit_reloc_t w = bct(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bger_d(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_ULT_D(r1, r2));
+ jit_reloc_t w = bcf(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bgtr_d(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_ULE_D(r1, r2));
+ jit_reloc_t w = bcf(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bner_d(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_EQ_D(r1, r2));
+ jit_reloc_t w = bcf(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bunltr_d(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_ULT_D(r1, r2));
+ jit_reloc_t w = bct(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bunler_d(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_ULE_D(r1, r2));
+ jit_reloc_t w = bct(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+buneqr_d(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_UEQ_D(r1, r2));
+ jit_reloc_t w = bct(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bunger_d(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_OLT_D(r1, r2));
+ jit_reloc_t w = bcf(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bungtr_d(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_OLE_D(r1, r2));
+ jit_reloc_t w = bcf(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bltgtr_d(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_UEQ_D(r1, r2));
+ jit_reloc_t w = bcf(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bordr_d(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_UN_D(r1, r2));
+ jit_reloc_t w = bcf(_jit);
+ return (w);
+}
+
+static jit_reloc_t
+bunordr_d(jit_state_t * _jit, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _C_UN_D(r1, r2));
+ jit_reloc_t w = bct(_jit);
+ return (w);
+}
diff --git a/deps/lightening/lightening/mips.c b/deps/lightening/lightening/mips.c
new file mode 100644
index 0000000..ac77e2f
--- /dev/null
+++ b/deps/lightening/lightening/mips.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) 2012-2019 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+#include "mips-cpu.c"
+#include "mips-fpu.c"
+
+static const jit_gpr_t abi_gpr_args[] = {
+ _A0, _A1, _A2, _A3,
+#if NEW_ABI
+ _A4, _A5, _A6, _A7,
+#endif
+};
+
+static const jit_fpr_t abi_fpr_args[] = {
+#if NEW_ABI
+ _F12, _F13, _F14, _F15, _F16, _F17, _F18, _F19
+#else
+ _F12, _F14
+#endif
+};
+
+static const int abi_gpr_arg_count =
+ sizeof(abi_gpr_args) / sizeof(abi_gpr_args[0]);
+static const int abi_fpr_arg_count =
+ sizeof(abi_fpr_args) / sizeof(abi_fpr_args[0]);
+
+struct abi_arg_iterator {
+ const jit_operand_t *args;
+ int argc;
+
+ int arg_idx;
+#if !NEW_ABI
+ int gpr_used;
+ int gpr_idx;
+ int fpr_idx;
+#endif
+ jit_word_t stack_size;
+ int stack_padding;
+};
+
+static size_t page_size;
+
+/*
+ * Implementation
+ */
+
+static jit_bool_t
+has_fpu()
+{
+#if __mips_hard_float
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+jit_bool_t
+jit_get_cpu(void)
+{
+ page_size = sysconf(_SC_PAGE_SIZE);
+ /*
+ * TODO: extensions?
+ */
+ return has_fpu();
+}
+
+jit_bool_t
+jit_init(jit_state_t * _jit)
+{
+ (void) _jit;
+ return has_fpu();
+}
+
+static size_t
+jit_initial_frame_size(void)
+{
+ return 0;
+}
+
+static void
+reset_abi_arg_iterator(struct abi_arg_iterator *iter, size_t argc,
+ const jit_operand_t * args)
+{
+ memset(iter, 0, sizeof(*iter));
+ iter->argc = argc;
+ iter->args = args;
+#if !NEW_ABI
+ iter->stack_size = 16;
+#endif
+}
+
+#if !NEW_ABI
+static int
+jit_operand_abi_sizeof(enum jit_operand_abi abi)
+{
+ switch (abi) {
+ case JIT_OPERAND_ABI_UINT8:
+ case JIT_OPERAND_ABI_INT8:
+ return 1;
+ case JIT_OPERAND_ABI_UINT16:
+ case JIT_OPERAND_ABI_INT16:
+ return 2;
+ case JIT_OPERAND_ABI_UINT32:
+ case JIT_OPERAND_ABI_INT32:
+ return 4;
+ case JIT_OPERAND_ABI_UINT64:
+ case JIT_OPERAND_ABI_INT64:
+ return 8;
+ case JIT_OPERAND_ABI_POINTER:
+ return CHOOSE_32_64(4, 8);
+ case JIT_OPERAND_ABI_FLOAT:
+ return 4;
+ case JIT_OPERAND_ABI_DOUBLE:
+ return 8;
+ default:
+ abort();
+ }
+}
+#endif
+
+static void
+next_abi_arg(struct abi_arg_iterator *iter, jit_operand_t * arg)
+{
+ ASSERT(iter->arg_idx < iter->argc);
+ enum jit_operand_abi abi = iter->args[iter->arg_idx].abi;
+#if NEW_ABI
+ int idx = iter->arg_idx++;
+ /*
+ * on new abi the first eight arguments of any type are passed in
+ * registers
+ */
+ if (is_gpr_arg(abi) && idx < 8) {
+ *arg = jit_operand_gpr(abi, abi_gpr_args[idx]);
+ return;
+ }
+
+ if (is_fpr_arg(abi) && idx < 8) {
+ *arg = jit_operand_fpr(abi, abi_fpr_args[idx]);
+ return;
+ }
+
+ *arg = jit_operand_mem(abi, JIT_SP, iter->stack_size);
+ iter->stack_size += 8;
+#else
+ /*
+ * O32 argument passing is a bit of a mess
+ */
+ iter->arg_idx++;
+ if (is_gpr_arg(abi) && iter->gpr_idx < abi_gpr_arg_count) {
+ *arg = jit_operand_gpr(abi, abi_gpr_args[iter->gpr_idx]);
+ iter->gpr_used = 1;
+ iter->gpr_idx++;
+ return;
+ }
+
+ if (is_fpr_arg(abi) && iter->gpr_idx <= 3) {
+ if (abi == JIT_OPERAND_ABI_DOUBLE && iter->gpr_idx % 2 != 0)
+ iter->gpr_idx++;
+
+ if (!iter->gpr_used && iter->fpr_idx < abi_fpr_arg_count)
+ *arg = jit_operand_fpr(abi, abi_fpr_args[iter->fpr_idx]);
+ else if (abi == JIT_OPERAND_ABI_FLOAT) {
+ *arg = jit_operand_gpr(abi, abi_gpr_args[iter->gpr_idx]);
+ } else {
+ *arg = jit_operand_gpr_pair(abi,
+ abi_gpr_args[iter->gpr_idx + 0],
+ abi_gpr_args[iter->gpr_idx + 1]);
+ }
+
+ iter->fpr_idx++;
+ iter->gpr_idx += abi == JIT_OPERAND_ABI_DOUBLE ? 2 : 1;
+ return;
+ }
+
+ size_t abi_size = jit_operand_abi_sizeof(abi);
+ abi_size = jit_align_up(abi_size, 4);
+
+ iter->stack_size = jit_align_up(iter->stack_size, abi_size);
+ *arg = jit_operand_mem(abi, JIT_SP, iter->stack_size);
+
+ iter->stack_size += abi_size;
+#endif
+
+}
+
+static void
+jit_flush(void *fptr, void *tptr)
+{
+ jit_word_t f = (jit_word_t) fptr & -page_size;
+ jit_word_t t = (((jit_word_t) tptr) + page_size - 1) & -page_size;
+ /* libgcc's __clear_cache is apparently in some situations a no-op:
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90929
+ *
+ * use __builtin_ instead, seems to work on real hardware
+ */
+ __builtin___clear_cache((void *) f, (void *) t);
+}
+
+static inline size_t
+jit_stack_alignment(void)
+{
+#if NEW_ABI
+ return 16;
+#else
+ return 8;
+#endif
+}
+
+static void
+jit_try_shorten(jit_state_t * _jit, jit_reloc_t reloc, jit_pointer_t addr)
+{
+ (void) _jit;
+ (void) reloc;
+ (void) addr;
+}
+
+static void *
+bless_function_pointer(void *ptr)
+{
+ return ptr;
+}
+
+/*
+ * Jumps around the veneer
+ */
+
+static void
+patch_jmp_without_veneer(jit_state_t * _jit, uint32_t * loc)
+{
+ patch_jmp_offset(loc, _jit->pc.ui - loc);
+}
+
+static uint32_t *
+jmp_without_veneer(jit_state_t * _jit)
+{
+ uint32_t *loc = _jit->pc.ui;
+ emit_u32(_jit, _BEQ(rn(_ZERO), rn(_ZERO), 0));
+ /* branch delay slot */
+ emit_u32(_jit, _NOP(1));
+ return loc;
+}
+
+/*
+ * Load from pool offset
+ */
+static void
+patch_load_from_pool_offset(uint32_t * loc, int32_t v)
+{
+ /*
+ * not used by this backend
+ */
+ (void) loc;
+ (void) v;
+ abort();
+}
+
+static int32_t
+read_load_from_pool_offset(uint32_t * loc)
+{
+ /*
+ * not used by this backend
+ */
+ (void) loc;
+ abort();
+ return 0;
+}
diff --git a/deps/lightening/lightening/mips.h b/deps/lightening/lightening/mips.h
new file mode 100644
index 0000000..62950f4
--- /dev/null
+++ b/deps/lightening/lightening/mips.h
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2012-2017 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_mips_h
+#define _jit_mips_h
+
+#if _MIPS_SIM != _ABIO32
+#define NEW_ABI 1
+#endif
+
+#define JIT_NEEDS_LITERAL_POOL 1
+#define JIT_USE_IMMEDIATE_RELOC 1
+
+#if !NEW_ABI
+#define JIT_PASS_DOUBLES_IN_GPR_PAIRS 1
+#define JIT_PASS_FLOATS_IN_GPRS 1
+#endif
+
+#define _ZERO JIT_GPR(0)
+#define _AT JIT_GPR(1)
+#define _V0 JIT_GPR(2)
+#define _V1 JIT_GPR(3)
+#define _A0 JIT_GPR(4)
+#define _A1 JIT_GPR(5)
+#define _A2 JIT_GPR(6)
+#define _A3 JIT_GPR(7)
+#define _A4 JIT_GPR(8)
+
+#if NEW_ABI
+#define _A5 JIT_GPR(9)
+#define _A6 JIT_GPR(10)
+#define _A7 JIT_GPR(11)
+#else
+#define _T0 JIT_GPR(9)
+#define _T1 JIT_GPR(10)
+#define _T2 JIT_GPR(11)
+#endif
+
+#define _T4 JIT_GPR(12)
+#define _T5 JIT_GPR(13)
+#define _T6 JIT_GPR(14)
+#define _T7 JIT_GPR(15)
+
+#define _S0 JIT_GPR(16)
+#define _S1 JIT_GPR(17)
+#define _S2 JIT_GPR(18)
+#define _S3 JIT_GPR(19)
+#define _S4 JIT_GPR(20)
+#define _S5 JIT_GPR(21)
+#define _S6 JIT_GPR(22)
+#define _S7 JIT_GPR(23)
+
+#define _T8 JIT_GPR(24)
+#define _T9 JIT_GPR(25)
+
+/*
+ * k0-k1 reserved for kernel usage
+ */
+
+#define _GP JIT_GPR(28)
+#define _SP JIT_GPR(29)
+#define _FP JIT_GPR(30)
+
+#define _RA JIT_GPR(31)
+
+#define _F0 JIT_FPR(0)
+#define _F1 JIT_FPR(1)
+#define _F2 JIT_FPR(2)
+#define _F3 JIT_FPR(3)
+#define _F4 JIT_FPR(4)
+#define _F5 JIT_FPR(5)
+#define _F6 JIT_FPR(6)
+#define _F7 JIT_FPR(7)
+#define _F8 JIT_FPR(8)
+#define _F9 JIT_FPR(9)
+#define _F10 JIT_FPR(10)
+#define _F11 JIT_FPR(11)
+#define _F12 JIT_FPR(12)
+#define _F13 JIT_FPR(13)
+#define _F14 JIT_FPR(14)
+#define _F15 JIT_FPR(15)
+#define _F16 JIT_FPR(16)
+#define _F17 JIT_FPR(17)
+#define _F18 JIT_FPR(18)
+#define _F19 JIT_FPR(19)
+#define _F20 JIT_FPR(20)
+#define _F21 JIT_FPR(21)
+#define _F22 JIT_FPR(22)
+#define _F23 JIT_FPR(23)
+#define _F24 JIT_FPR(24)
+#define _F25 JIT_FPR(25)
+#define _F26 JIT_FPR(26)
+#define _F27 JIT_FPR(27)
+#define _F28 JIT_FPR(28)
+#define _F29 JIT_FPR(29)
+#define _F30 JIT_FPR(30)
+#define _F31 JIT_FPR(31)
+
+#define JIT_LR _RA
+#define JIT_SP _SP
+#define JIT_FP _FP
+
+#define JIT_R0 _A0
+#define JIT_R1 _A1
+#define JIT_R2 _A2
+#define JIT_R3 _A3
+
+#if NEW_ABI
+#define JIT_R4 _A4
+#define JIT_R5 _A5
+#define JIT_R6 _A6
+#define JIT_R7 _A7
+#else
+#define JIT_R4 _T0
+#define JIT_R5 _T1
+#define JIT_R6 _T2
+#define JIT_R7 _T3
+#endif
+
+#define JIT_CARRY _T4
+#define JIT_TMP0 _T5
+#define JIT_TMP1 _T6
+#define JIT_TMP2 _T7
+#define JIT_TMP3 _T8
+/* _T9 is used as a kind of link register, and as such can be overwritten whenever */
+//#define JIT_TMP4 _T9
+
+#define JIT_V0 _S0
+#define JIT_V1 _S1
+#define JIT_V2 _S2
+#define JIT_V3 _S3
+#define JIT_V4 _S4
+#define JIT_V5 _S5
+#define JIT_V6 _S6
+#define JIT_V7 _S7
+
+#define JIT_F0 _F0
+#define JIT_F1 _F2
+#define JIT_F2 _F6
+#define JIT_F3 _F8
+#define JIT_F4 _F10
+#define JIT_F5 _F12
+#define JIT_F6 _F14
+#define JIT_F7 _F16
+#define JIT_F8 _F18
+#define JIT_FTMP _F4
+
+#if NEW_ABI
+#define JIT_F9 _F20
+#define JIT_F10 _F22
+#else
+#define JIT_VF4 _F20
+#define JIT_VF5 _F22
+#endif
+
+#define JIT_VF0 _F24
+#define JIT_VF1 _F26
+#define JIT_VF2 _F28
+#define JIT_VF3 _F30
+
+#if NEW_ABI
+/*
+ * _RA is in some documents referred to as caller-save, but storing it in
+ * the function stack frame works equally well, which is what we do here
+ * (gcc apparently does this)
+ */
+#define JIT_PLATFORM_CALLEE_SAVE_GPRS _SP, _FP, _GP, _RA
+#else
+#define JIT_PLATFORM_CALLEE_SAVE_GPRS _SP, _FP, _RA
+#endif
+
+#define JIT_PLATFORM_CALLEE_SAVE_FPRS
+
+#define JIT_JMP_MAX_SIZE (sizeof(uint32_t) * 2)
+
+#if __WORDSIZE == 64
+#define JIT_LITERAL_MAX_SIZE ((sizeof(uint32_t) * 6) + JIT_JMP_MAX_SIZE)
+#else
+#define JIT_LITERAL_MAX_SIZE ((sizeof(uint32_t) * 2) + JIT_JMP_MAX_SIZE)
+#endif
+
+#define JIT_INST_MAX_SIZE JIT_JMP_MAX_SIZE
+
+// A little bit overkill, but just to be safe
+#define JIT_EXTRA_SPACE (16 * sizeof(uint32_t))
+
+#endif /* _jit_mips_h */
diff --git a/deps/lightening/lightening/ppc-cpu.c b/deps/lightening/lightening/ppc-cpu.c
new file mode 100644
index 0000000..a56c207
--- /dev/null
+++ b/deps/lightening/lightening/ppc-cpu.c
@@ -0,0 +1,3136 @@
+/*
+ * Copyright (C) 2012-2017, 2019 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+# if __WORDSIZE == 32
+# define gpr_save_area 72 /* r14~r31 = 18 * 4 */
+# define params_offset 24
+# define can_sign_extend_int_p(im) 1
+# define can_zero_extend_int_p(im) 1
+# define fits_uint32_p(im) 1
+# else
+# define gpr_save_area 144 /* r14~r31 = 18 * 8 */
+# if ABI_ELFv2
+# define params_offset 32
+# else
+# define params_offset 48
+# endif
+# define can_sign_extend_int_p(im) \
+ (((im) >= 0 && (long)(im) <= 0x7fffffffL) || \
+ ((im) < 0 && (long)(im) >= -0x80000000L))
+# define can_zero_extend_int_p(im) \
+ ((im) >= 0 && (im) < 0x80000000L)
+# define fits_uint32_p(im) ((im & 0xffffffff00000000L) == 0)
+# endif
+# define fpr_save_area 64
+
+# define can_sign_extend_short_p(im) ((im) >= -32768 && (im) <= 32767)
+# define can_zero_extend_short_p(im) ((im) >= 0 && (im) <= 65535)
+# define can_sign_extend_jump_p(im) ((im) >= -33554432 && (im) <= 33554431)
+
+#define simm14_p(i0) ((i0) <= 0x1fff && (i0) >= -0x2000)
+#define simm24_p(i0) ((i0) <= 0x7ffff && (i0) >= -0x800000)
+
+# define _FXO(o,d,a,b,e,x) FXO(o,d,a,b,e,x,0)
+# define _FXO_(o,d,a,b,e,x) FXO(o,d,a,b,e,x,1)
+# define _FDs(o,d,a,s) FDs(o,d,a,s)
+# define _FDu(o,d,a,s) FDu(o,d,a,s)
+# define _FX(o,d,a,b,x) FX(o,d,a,b,x,0)
+# define _FX_(o,d,a,b,x) FX(o,d,a,b,x,1)
+# define _FI(o,t,a,k) FI(o,t,a,k)
+# define _FB(o,bo,bi,t,a,k) FB(o,bo,bi,t,a,k)
+# define _FXL(o,bo,bi,x) FXL(o,bo,bi,x,0)
+# define _FXL_(o,bo,bi,x) FXL(o,bo,bi,x,1)
+# define _FC(o,d,l,a,b,x) FC(o,d,l,a,b,x)
+# define _FCI(o,d,l,a,s) FCI(o,d,l,a,s)
+# define _FXFX(o,s,x,f) FXFX(o,s,x,f)
+# define _FM(o,s,a,h,b,e,r) FM(o,s,a,h,b,e,r)
+# if __WORDSIZE == 64
+# define _FMDS(o,s,a,b,e,x) FMDS(o,s,a,b,e,x,0)
+# define _FMDS_(o,s,a,b,e,x) FMDS(o,s,a,b,e,x,1)
+# define _FMD(o,s,a,h,b,x,i) FMD(o,s,a,h,b,x,i,0)
+# define _FMD_(o,s,a,h,b,x,i) FMD(o,s,a,h,b,x,i,1)
+# define _FXS(o,d,a,h,x,i) FXS(o,d,a,h,x,i,0)
+# define _FXS_(o,d,a,h,x,i) FXS(o,d,a,h,x,i,1)
+# endif
+
+static uint32_t FXO(int,int,int,int,int,int,int);
+static uint32_t FDs(int,int,int,int);
+static uint32_t FDu(int,int,int,int);
+static uint32_t FX(int,int,int,int,int,int);
+static uint32_t FI(int,int,int,int);
+static uint32_t FB(int,int,int,int,int,int);
+static uint32_t FXL(int,int,int,int,int);
+static uint32_t FC(int,int,int,int,int,int);
+static uint32_t FCI(int,int,int,int,int);
+static uint32_t FXFX(int,int,int,int);
+static uint32_t FM(int,int,int,int,int,int,int);
+
+# if __WORDSIZE == 64
+static uint32_t FMD(int,int,int,int,int,int,int,int);
+static uint32_t FXS(int,int,int,int,int,int,int);
+# endif
+
+# define CR_0 0
+# define CR_1 1
+# define CR_2 2
+# define CR_3 3
+# define CR_4 4
+# define CR_5 5
+# define CR_6 6
+# define CR_7 7
+# define CR_LT 0
+# define CR_GT 1
+# define CR_EQ 2
+# define CR_SO 3
+# define CR_UN 3
+# define BCC_F 4
+# define BCC_T 12
+# define _ADD(d,a,b) _FXO(31,d,a,b,0,266)
+# define _ADD_(d,a,b) _FXO_(31,d,a,b,0,266)
+# define _ADDO(d,a,b) _FXO(31,d,a,b,1,266)
+# define _ADDO_(d,a,b) _FXO_(31,d,a,b,1,266)
+# define _ADDC(d,a,b) _FXO_(31,d,a,b,0,10)
+# define _ADDC_(d,a,b) _FXO_(31,d,a,b,0,10)
+# define _ADDCO(d,a,b) _FXO(31,d,a,b,1,10)
+# define _ADDCO_(d,a,b) _FXO_(31,d,a,b,1,10)
+# define _ADDE(d,a,b) _FXO(31,d,a,b,0,138)
+# define _ADDE_(d,a,b) _FXO_(31,d,a,b,0,138)
+# define _ADDEO(d,a,b) _FXO(31,d,a,b,1,138)
+# define _ADDEO_(d,a,b) _FXO_(31,d,a,b,1,138)
+# define _ADDI(d,a,s) _FDs(14,d,a,s)
+# define _ADDIC(d,a,s) _FDs(12,d,a,s)
+# define _ADDIC_(d,a,s) _FDs(13,d,a,s)
+# define _ADDIS(d,a,s) _FDs(15,d,a,s)
+# define _LIS(d,s) _ADDIS(d,0,s)
+# define _ADDME(d,a) _FXO(31,d,a,0,0,234)
+# define _ADDME_(d,a) _FXO_(31,d,a,0,0,234)
+# define _ADDMEO(d,a) _FXO(31,d,a,0,1,234)
+# define _ADDMEO_(d,a) _FXO_(31,d,a,0,1,234)
+# define _ADDZE(d,a) _FXO(31,d,a,0,0,202)
+# define _ADDZE_(d,a) _FXO_(31,d,a,0,0,202)
+# define _ADDZEO(d,a) _FXO(31,d,a,0,1,202)
+# define _ADDZEO_(d,a) _FXO_(31,d,a,0,1,202)
+# define _AND(d,a,b) _FX(31,a,d,b,28)
+# define _ANDC(d,a,b) _FXO(31,a,d,b,0,60)
+# define _ANDC_(d,a,b) _FXO_(31,a,d,b,0,60)
+# define _AND_(d,a,b) _FX_(31,a,b,d,28)
+# define _ANDI_(d,a,u) _FDu(28,a,d,u)
+# define _ANDIS_(d,a,u) _FDu(29,a,d,u)
+# define _B(t) _FI(18,t,0,0)
+# define _BA(t) _FI(18,t,1,0)
+# define _BL(t) _FI(18,t,0,1)
+# define _BLA(t) _FI(18,t,1,1)
+# define _BC(o,i,t) _FB(16,o,i,t,0,0)
+# define _BCA(o,i,t) _FB(16,o,i,t,1,0)
+# define _BCL(o,i,t) _FB(16,o,i,t,0,1)
+# define _BCLA(o,i,t) _FB(16,o,i,t,1,1)
+# define _BLT(t) _BC(BCC_T,CR_LT,t)
+# define _BLE(t) _BC(BCC_F,CR_GT,t)
+# define _BEQ(t) _BC(BCC_T,CR_EQ,t)
+# define _BGE(t) _BC(BCC_F,CR_LT,t)
+# define _BGT(t) _BC(BCC_T,CR_GT,t)
+# define _BNE(t) _BC(BCC_F,CR_EQ,t)
+# define _BUN(t) _BC(BCC_T,CR_UN,t)
+# define _BNU(t) _BC(BCC_F,CR_UN,t)
+# define _BCCTR(o,i) _FXL(19,o,i,528)
+# define _BCCTRL(o,i) _FXL_(19,o,i,528)
+# define _BLTCTR() _BCCTR(BCC_T,CR_LT)
+# define _BLECTR() _BCCTR(BCC_F,CR_GT)
+# define _BEQCTR() _BCCTR(BCC_T,CR_EQ)
+# define _BGECTR() _BCCTR(BCC_F,CR_LT)
+# define _BGTCTR() _BCCTR(BCC_T,CR_GT)
+# define _BNECTR() _BCCTR(BCC_F,CR_EQ)
+# define _BCTR() _BCCTR(20,0)
+# define _BCTRL() _BCCTRL(20,0)
+# define _BCLR(o,i) _FXL(19,o,i,16)
+# define _BCLRL(o,i) _FXL_(19,o,i,16)
+# define _BLTLR() _BCLR(BCC_T,CR_LT)
+# define _BLELR() _BCLR(BCC_F,CR_GT)
+# define _BEQLR() _BCLR(BCC_T,CR_EQ)
+# define _BGELR() _BCLR(BCC_F,CR_LT)
+# define _BGTLR() _BCLR(BCC_T,CR_GT)
+# define _BNELR() _BCLR(BCC_F,CR_EQ)
+# define _BLR() _BCLR(20,0)
+# define _BLRL() _BCLRL(20,0)
+# define _XCMP(cr,l,a,b) _FC(31,cr,l,a,b,0)
+# define _CMPD(a,b) _XCMP(0,1,a,b)
+# define _CMPW(a,b) _XCMP(0,0,a,b)
+# define _XCMPI(cr,l,a,s) _FCI(11,cr,l,a,s)
+# define _CMPDI(a,s) _XCMPI(0,1,a,s)
+# define _CMPWI(a,s) _XCMPI(0,0,a,s)
+# define _XCMPL(cr,l,a,b) _FC(31,cr,l,a,b,32)
+# define _CMPLD(a,b) _XCMPL(0,1,a,b)
+# define _CMPLW(a,b) _XCMPL(0,0,a,b)
+# define _XCMPLI(cr,l,a,u) _FCI(10,cr,l,a,u)
+# define _CMPLDI(a,s) _XCMPLI(0,1,a,s)
+# define _CMPLWI(a,s) _XCMPLI(0,0,a,s)
+# define _CNTLZW(a,s) _FX(31,s,a,0,26)
+# define _CNTLZW_(a,s) _FX_(31,s,a,0,26)
+# define _CRAND(d,a,b) _FX(19,d,a,b,257)
+# define _CRANDC(d,a,b) _FX(19,d,a,b,129)
+# define _CREQV(d,a,b) _FX(19,d,a,b,289)
+# define _CRSET(d) _CREQV(d,d,d)
+# define _CRNAND(d,a,b) _FX(19,d,a,b,225)
+# define _CRNOR(d,a,b) _FX(19,d,a,b,33)
+# define _CRNOT(d,a) _CRNOR(d,a,a)
+# define _CROR(d,a,b) _FX(19,d,a,b,449)
+# define _CRMOVE(d,a) _CROR(d,a,a)
+# define _CRORC(d,a,b) _FX(19,d,a,b,417)
+# define _CRXOR(d,a,b) _FX(19,d,a,b,193)
+# define _CRCLR(d) _CRXOR(d,d,d)
+# define _DCBA(a,b) _FX(31,0,a,b,758)
+# define _DCBF(a,b) _FX(31,0,a,b,86)
+# define _DCBI(a,b) _FX(31,0,a,b,470)
+# define _DCBST(a,b) _FX(31,0,a,b,54)
+# define _DCBT(a,b) _FX(31,0,a,b,278)
+# define _DCBTST(a,b) _FX(31,0,a,b,246)
+# define _DCBZ(a,b) _FX(31,0,a,b,1014)
+# define _DIVW(d,a,b) _FXO(31,d,a,b,0,491)
+# define _DIVW_(d,a,b) _FXO_(31,d,a,b,0,491)
+# define _DIVWO(d,a,b) _FXO(31,d,a,b,1,491)
+# define _DIVWO_(d,a,b) _FXO_(31,d,a,b,1,491)
+# define _DIVWU(d,a,b) _FXO(31,d,a,b,0,459)
+# define _DIVWU_(d,a,b) _FXO_(31,d,a,b,0,459)
+# define _DIVWUO(d,a,b) _FXO(31,d,a,b,1,459)
+# define _DIVWUO_(d,a,b) _FXO_(31,d,a,b,1,459)
+# define _DIVD(d,a,b) _FXO(31,d,a,b,0,489)
+# define _DIVD_(d,a,b) _FXO_(31,d,a,b,0,489)
+# define _DIVDO(d,a,b) _FXO(31,d,a,b,1,489)
+# define _DIVDO_(d,a,b) _FXO_(31,d,a,b,1,489)
+# define _DIVDU(d,a,b) _FXO(31,d,a,b,0,457)
+# define _DIVDU_(d,a,b) _FXO_(31,d,a,b,0,457)
+# define _DIVDUO(d,a,b) _FXO(31,d,a,b,1,457)
+# define _DIVDUO_(d,a,b) _FXO_(31,d,a,b,1,457)
+# define _ECIWX(d,a,b) _FX(31,d,a,b,310)
+# define _ECOWX(s,a,b) _FX(31,s,a,b,438)
+# define _EIEIO() _FX(31,0,0,0,854)
+# define _EQV(d,a,b) _FX(31,a,d,b,284)
+# define _EQV_(d,a,b) _FX_(31,a,d,b,284)
+# define _EXTSB(d,a) _FX(31,a,d,0,954)
+# define _EXTSB_(d,a) _FX_(31,a,d,0,954)
+# define _EXTSH(d,a) _FX(31,a,d,0,922)
+# define _EXTSH_(d,a) _FX_(31,a,d,0,922)
+# define _EXTSW(d,a) _FX(31,a,d,0,986)
+# define _EXTSW_(d,a) _FX_(31,a,d,0,986)
+# define _ICIB(a,b) _FX(31,0,a,b,982)
+# define _ISYNC() _FXL(19,0,0,150)
+# define _LBZ(d,a,s) _FDs(34,d,a,s)
+# define _LBZU(d,a,s) _FDs(35,d,a,s)
+# define _LBZUX(d,a,b) _FX(31,d,a,b,119)
+# define _LBZX(d,a,b) _FX(31,d,a,b,87)
+# define _LHA(d,a,s) _FDs(42,d,a,s)
+# define _LHAU(d,a,s) _FDs(43,d,a,s)
+# define _LHAUX(d,a,b) _FX(31,d,a,b,375)
+# define _LHAX(d,a,b) _FX(31,d,a,b,343)
+# define _LHRBX(d,a,b) _FX(31,d,a,b,790)
+# define _LHZ(d,a,s) _FDs(40,d,a,s)
+# define _LHZU(d,a,s) _FDs(41,d,a,s)
+# define _LHZUX(d,a,b) _FX(31,d,a,b,311)
+# define _LHZX(d,a,b) _FX(31,d,a,b,279)
+# define _LA(d,a,s) _ADDI(d,a,s)
+# define _LI(d,s) _ADDI(d,0,s)
+# define _LMW(d,a,s) _FDs(46,d,a,s)
+# define _LSWI(d,a,n) _FX(31,d,a,n,597)
+# define _LSWX(d,a,b) _FX(31,d,a,b,533)
+# define _LWARX(d,a,b) _FX(31,d,a,b,20)
+# define _LWBRX(d,a,b) _FX(31,d,a,b,534)
+# define _LWA(d,a,s) _FDs(58,d,a,s|2)
+# define _LWAUX(d,a,b) _FX(31,d,a,b,373)
+# define _LWAX(d,a,b) _FX(31,d,a,b,341)
+# define _LWZ(d,a,s) _FDs(32,d,a,s)
+# define _LWZU(d,a,s) _FDs(33,d,a,s)
+# define _LWZUX(d,a,b) _FX(31,d,a,b,55)
+# define _LWZX(d,a,b) _FX(31,d,a,b,23)
+# define _LD(d,a,s) _FDs(58,d,a,s)
+# define _LDX(d,a,b) _FX(31,d,a,b,21)
+# define _MCRF(d,s) _FXL(19,d<<2,(s)<<2,0)
+/* In case instruction is emulated, check the kernel can handle it.
+ Will only generate it if DEBUG is enabled.
+"""
+Chapter 6. Optional Facilities and Instructions that are being
+Phased Out of the Architecture
+...
+6.1 Move To Condition Register from XER
+The mcrxr instruction is being phased out of the archi-
+tecture. Its description is included here as an aid to
+constructing operating system code to emulate it.
+
+Move to Condition Register from XER
+X-form
+mcrxr BF
+31 BF // /// /// 512 /
+0 6 9 11 16 21 31
+CR(4xBF:4xBF+3) <- XER(32:35)
+XER(32:35) <- 0b0000
+The contents of XER(32:35) are copied to Condition Reg-
+ister field BF. XER(32:35) are set to zero.
+Special Registers Altered:
+CR field BF XER(32:35)
+
+Programming Note
+Warning: This instruction has been phased out of
+the architecture. Attempting to execute this
+instruction will cause the system illegal instruction
+error handler to be invoked
+"""
+ */
+static void mcrxr(jit_state_t*, int32_t);
+# define _MFCR(d) _FX(31,d,0,0,19)
+# define _MFMSR(d) _FX(31,d,0,0,83)
+# define _MFSPR(d,s) _FXFX(31,d,s<<5,339)
+# define _MFXER(d) _MFSPR(d,1)
+# define _MFLR(d) _MFSPR(d,8)
+# define _MFCTR(d) _MFSPR(d,9)
+# define _MFSR(d,s) _FX(31,d,s,0,595)
+# define _MFSRIN(d,b) _FX(31,d,0,b,659)
+# define _MFTB(d,x,y) _FXFX(31,d,(x)|((y)<<5),371)
+# define _MFTBL(d) _MFTB(d,8,12)
+# define _MFTBU(d) _MFTB(d,8,13)
+# define _MTCRF(c,s) _FXFX(31,s,c<<1,144)
+# define _MTCR(s) _MTCRF(0xff,s)
+# define _MTMSR(s) _FX(31,s,0,0,146)
+# define _MTSPR(d,s) _FXFX(31,d,s<<5,467)
+# define _MTXER(d) _MTSPR(d,1)
+# define _MTLR(d) _MTSPR(d,8)
+# define _MTCTR(d) _MTSPR(d,9)
+# define _MTSR(r,s) _FX(31,s<<1,r,0,210)
+# define _MTSRIN(r,b) _FX(31,r<<1,0,b,242)
+# define _MULLI(d,a,s) _FDs(07,d,a,s)
+# define _MULHW(d,a,b) _FXO(31,d,a,b,0,75)
+# define _MULHW_(d,a,b) _FXO_(31,d,a,b,0,75)
+# define _MULHWU(d,a,b) _FXO(31,d,a,b,0,11)
+# define _MULHWU_(d,a,b) _FXO_(31,d,a,b,0,11)
+# define _MULLW(d,a,b) _FXO(31,d,a,b,0,235)
+# define _MULLW_(d,a,b) _FXO_(31,d,a,b,0,235)
+# define _MULLWO(d,a,b) _FXO(31,d,a,b,1,235)
+# define _MULLWO_(d,a,b) _FXO_(31,d,a,b,1,235)
+# define _MULHD(d,a,b) _FXO(31,d,a,b,0,73)
+# define _MULHD_(d,a,b) _FXO_(31,d,a,b,0,73)
+# define _MULHDU(d,a,b) _FXO(31,d,a,b,0,9)
+# define _MULHDU_(d,a,b) _FXO_(31,d,a,b,0,9)
+# define _MULLD(d,a,b) _FXO(31,d,a,b,0,233)
+# define _MULLD_(d,a,b) _FXO_(31,d,a,b,0,233)
+# define _MULLDO(d,a,b) _FXO(31,d,a,b,1,233)
+# define _MULLDO_(d,a,b) _FXO_(31,d,a,b,1,233)
+# define _NAND(d,a,b) _FX(31,a,d,b,476)
+# define _NAND_(d,a,b) _FX_(31,a,d,b,476)
+# define _NEG(d,a) _FXO(31,d,a,0,0,104)
+# define _NEG_(d,a) _FXO_(31,d,a,0,0,104)
+# define _NEGO(d,a) _FXO(31,d,a,0,1,104)
+# define _NEGO_(d,a) _FXO_(31,d,a,0,1,104)
+# define _NOR(d,a,b) _FX(31,a,d,b,124)
+# define _NOR_(d,a,b) _FX_(31,a,d,b,124)
+# define _NOT(d,s) _NOR(d,s,s)
+# define _OR(d,a,b) _FX(31,a,d,b,444)
+# define _OR_(d,a,b) _FX_(31,a,d,b,444)
+# define _MR(d,a) _OR(d,a,a)
+# define _ORC(d,a,b) _FX(31,a,d,b,412)
+# define _ORC_(d,a,b) _FX_(31,a,d,b,412)
+# define _ORI(d,a,u) _FDu(24,a,d,u)
+# define _NOP() _ORI(0,0,0)
+# define _ORIS(d,a,u) _FDu(25,a,d,u)
+# define _RFI() _FXL(19,0,0,50)
+# define _RLWIMI(d,s,h,b,e) _FM(20,s,d,h,b,e,0)
+# define _RLWIMI_(d,s,h,b,e) _FM(20,s,d,h,b,e,1)
+# define _INSLWI(a,s,n,b) _RLWIMI(a,s,32-b,b,b+n-1)
+# define _INSRWI(a,s,n,b) _RLWIMI(a,s,32-(b+n),b,(b+n)-1)
+# define _RLWINM(a,s,h,b,e) _FM(21,s,a,h,b,e,0)
+# define _RLWINM_(a,s,h,b,e) _FM(21,s,a,h,b,e,1)
+# define _EXTLWI(a,s,n,b) _RLWINM(a,s,b,0,n-1)
+# define _EXTRWI(a,s,n,b) _RLWINM(a,s,b+n,32-n,31)
+# define _ROTLWI(a,s,n) _RLWINM(a,s,n,0,31)
+# define _ROTRWI(a,s,n) _RLWINM(a,s,32-n,0,31)
+# define _SLWI(a,s,n) _RLWINM(a,s,n,0,31-n)
+# define _SRWI(a,s,n) _RLWINM(a,s,32-n,n,31)
+# define _CLRLWI(a,s,n) _RLWINM(a,s,0,n,31)
+# define _CLRRWI(a,s,n) _RLWINM(a,s,0,0,31-n)
+# define _CLRLSWI(a,s,b,n) _RLWINM(a,s,n,b-n,31-n)
+# define _RLWNM(a,s,b,m,e) _FM(23,s,a,b,m,e,0)
+# define _RLWNM_(a,s,b,m,e) _FM(23,s,a,b,m,e,1)
+# define _ROTLW(a,s,b) _RLWNM(a,s,b,0,31)
+# define _SC() _FDu(17,0,0,2)
+# define _SLW(a,s,b) _FX(31,s,a,b,24)
+# define _SLW_(a,s,b) _FX_(31,s,a,b,24)
+# define _SRAW(a,s,b) _FX(31,s,a,b,792)
+# define _SRAW_(a,s,b) _FX_(31,s,a,b,792)
+# define _SRAWI(a,s,h) _FX(31,s,a,h,824)
+# define _SRAWI_(a,s,h) _FX_(31,s,a,h,824)
+# define _SRW(a,s,b) _FX(31,s,a,b,536)
+# define _SRW_(a,s,b) _FX_(31,s,a,b,536)
+# if __WORDSIZE == 64
+# define _RLDICL(a,s,h,b) _FMD(30,s,a,h&~32,b,0,h>>5)
+# define _RLDICL_(a,s,h,b) _FMD_(30,s,a,h&~32,b,0,h>>5)
+# define _EXTRDI(x,y,n,b) _RLDICL(x,y,(b+n),(64-n))
+# define _SRDI(x,y,n) _RLDICL(x,y,(64-n),n)
+# define _CLRLDI(x,y,n) _RLDICL(x,y,0,n)
+# define _RLDICR(a,s,h,e) _FMD(30,s,a,h&~32,e,1,h>>5)
+# define _RLDICR_(a,s,h,e) _FMD_(30,s,a,h&~32,e,1,h>>5)
+# define _EXTRLI(x,y,n,b) _RLDICR(x,y,b,(n-1))
+# define _SLDI(x,y,n) _RLDICR(x,y,n,(63-n))
+# define _CLRRDI(x,y,n) _RLDICR(x,y,0,(63-n))
+# define _RLDIC(a,s,h,b) _FMD(30,s,a,h&~32,b,2,h>>5)
+# define _RLDIC_(a,s,h,b) _FMD_(30,s,a,h&~32,b,2,h>>5)
+# define _CLRLSLDI(x,y,b,n) _RLDIC(x,y,n,(b-n))
+# define _RLDCL(a,s,h,b) _FMDS(30,s,a,h,b,8)
+# define _RLDCL_(a,s,h,b) _FMDS_(30,s,a,h,b,8)
+# define _ROTLD(x,y,z) _RLDCL(x,y,z,0)
+# define _RLDCR(a,s,b,e) _FMDS(30,s,a,b,e,0)
+# define _RLDCR_(a,s,b,e) _FMDS_(30,s,a,b,e,0)
+# define _RLDIMI(a,s,h,b) _FMD(30,s,a,h&~32,b,3,h>>5)
+# define _RLDIMI_(a,s,h,b) _FMD_(30,s,a,h&~32,b,3,h>>5)
+# define _INSRDI(x,y,n,b) _RLDIMI(x,y,(64-(b+n)),b)
+# define _SLD(a,s,b) _FX(31,s,a,b,27)
+# define _SLD_(a,s,b) _FX_(31,s,a,b,27)
+# define _SRD(a,s,b) _FX(31,s,a,b,539)
+# define _SRD_(a,s,b) _FX_(31,s,a,b,539)
+# define _SRADI(a,s,h) _FXS(31,s,a,h&~32,413,h>>5)
+# define _SRADI_(a,s,h) _FXS_(31,s,a,h&~32,413,h>>5)
+# define _SRAD(a,s,b) _FX(31,s,a,b,794)
+# define _SRAD_(a,s,b) _FX_(31,s,a,b,794)
+# endif
+# define _STB(s,a,d) _FDs(38,s,a,d)
+# define _STBU(s,a,d) _FDs(39,s,a,d)
+# define _STBUX(s,a,b) _FX(31,s,a,b,247)
+# define _STBX(s,a,b) _FX(31,s,a,b,215)
+# define _STH(s,a,d) _FDs(44,s,a,d)
+# define _STHBRX(s,a,b) _FX(31,s,a,b,918)
+# define _STHU(s,a,d) _FDs(45,s,a,d)
+# define _STHUX(s,a,b) _FX(31,s,a,b,439)
+# define _STHX(s,a,b) _FX(31,s,a,b,407)
+# define _STMW(s,a,d) _FDs(47,s,a,d)
+# define _STWSI(s,a,nb) _FX(31,s,a,nb,725)
+# define _STSWX(s,a,b) _FX(31,s,a,b,661)
+# define _STW(s,a,d) _FDs(36,s,a,d)
+# define _STWBRX(s,a,b) _FX(31,s,a,b,662)
+# define _STWCX_(s,a,b) _FX_(31,s,a,b,150)
+# define _STWU(s,a,d) _FDs(37,s,a,d)
+# define _STWUX(s,a,b) _FX(31,s,a,b,183)
+# define _STWX(s,a,b) _FX(31,s,a,b,151)
+# define _STD(s,a,d) _FDs(62,s,a,d)
+# define _STDX(s,a,b) _FX(31,s,a,b,149)
+# define _STDCX(s,a,b) _FX_(31,s,a,b,214)
+# define _STDU(s,a,d) _FDs(62,s,a,d|1)
+# define _STDUX(s,a,b) _FX(31,s,a,b,181)
+# define _SUBF(d,a,b) _FXO(31,d,a,b,0,40)
+# define _SUBF_(d,a,b) _FXO_(31,d,a,b,0,40)
+# define _SUBFO(d,a,b) _FXO(31,d,a,b,1,40)
+# define _SUBFO_(d,a,b) _FXO_(31,d,a,b,1,40)
+# define _SUB(d,a,b) _SUBF(d,b,a)
+# define _SUB_(d,a,b) _SUBF_(d,b,a)
+# define _SUBO(d,a,b) _SUBFO(d,b,a)
+# define _SUBO_(d,a,b) _SUBFO_(d,b,a)
+# define _SUBI(d,a,s) _ADDI(d,a,-s)
+# define _SUBIS(d,a,s) _ADDIS(d,a,-s)
+# define _SUBFC(d,a,b) _FXO(31,d,a,b,0,8)
+# define _SUBFC_(d,a,b) _FXO_(31,d,a,b,0,8)
+# define _SUBFCO(d,a,b) _FXO(31,d,a,b,1,8)
+# define _SUBFCO_(d,a,b) _FXO_(31,d,a,b,1,8)
+# define _SUBC(d,a,b) _SUBFC(d,b,a)
+# define _SUBIC(d,a,s) _ADDIC(d,a,-s)
+# define _SUBIC_(d,a,s) _ADDIC_(d,a,-s)
+# define _SUBFE(d,a,b) _FXO(31,d,a,b,0,136)
+# define _SUBFE_(d,a,b) _FXO_(31,d,a,b,0,136)
+# define _SUBFEO(d,a,b) _FXO(31,d,a,b,1,136)
+# define _SUBFEO_(d,a,b) _FXO_(31,d,a,b,1,136)
+# define _SUBE(d,a,b) _SUBFE(d,b,a)
+# define _SUBFIC(d,a,s) _FDs(8,d,a,s)
+# define _SUBFME(d,a) _FXO(31,d,a,0,0,232)
+# define _SUBFME_(d,a) _FXO_(31,d,a,0,0,232)
+# define _SUBFMEO(d,a) _FXO(31,d,a,0,1,232)
+# define _SUBFMEO_(d,a) _FXO_(31,d,a,0,1,232)
+# define _SUBFZE(d,a) _FXO(31,d,a,0,0,200)
+# define _SUBFZE_(d,a) _FXO_(31,d,a,0,0,200)
+# define _SUBFZEO(d,a) _FXO(31,d,a,0,1,200)
+# define _SUBFZEO_(d,a) _FXO_(31,d,a,0,1,200)
+# define _SYNC(l, sc) _FX(31,l,sc,0,598)
+# define _TLBIA() _FX(31,0,0,0,370)
+# define _TLBIE(b) _FX(31,0,0,b,306)
+# define _TLBSYNC() _FX(31,0,0,0,566)
+# define _TW(t,a,b) _FX(31,t,a,b,4)
+# define _TWEQ(a,b) _FX(31,4,a,b,4)
+# define _TWLGE(a,b) _FX(31,5,a,b,4)
+# define _TRAP() _FX(31,31,0,0,4)
+# define _TWI(t,a,s) _FDs(3,t,a,s)
+# define _TWGTI(a,s) _TWI(8,a,s)
+# define _TWLLEI(a,s) _TWI(6,a,s)
+# define _XOR(d,a,b) _FX(31,a,d,b,316)
+# define _XOR_(d,a,b) _FX_(31,a,d,b,316)
+# define _XORI(s,a,u) _FDu(26,a,s,u)
+# define _XORIS(s,a,u) _FDu(27,a,s,u)
+
+// Atomics
+# define _LDARX(rt, ra, rb) _FX(31, rt, ra, rb, 84)
+# define _HWSYNC() _SYNC(0, 0)
+
+#if __WORDSIZE == 64
+# define _STX(r0, r1, o) _STD(r0, r1, o)
+# define _LXX(r0, r1, o) _LD(r0, r1, o)
+# define _CMPX(r0, r1) _CMPD(r0, r1)
+# define _CMPXI(r0, i0) _CMPDI(r0, i0)
+# define _CMPLX(r0, r1) _CMPLD(r0, r1)
+# define _CMPLXI(r0, u0) _CMPLDI(r0, u0)
+# define _LXARX(r0, r1) _LDARX(r0, 0, r1)
+# define _STXCX(r0, r1) _STDCX(r0, 0, r1)
+#else
+# define _STX(r0, r1, o) _STW(r0, r1, o)
+# define _LXX(r0, r1, o) _LW(r0, r1, o)
+# define _CMPX(r0, r1) _CMPW(r0, r1)
+# define _CMPXI(r0, i0) _CMPWI(r0, i0)
+# define _CMPLX(r0, r1) _CMPLW(r0, r1)
+# define _CMPLXI(r0, u0) _CMPLWI(r0, u0)
+# define _LXARX(r0, r1) _LWARX(r0, 0, r1)
+# define _STXCX(r0, r1) _STWCX(r0, 0, r1)
+#endif
+
+static void nop(jit_state_t*,int32_t);
+static void movr(jit_state_t*,int32_t,int32_t);
+static void movi(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t mov_addr(jit_state_t *,int32_t);
+static jit_reloc_t movi_from_immediate(jit_state_t*,int32_t);
+static void emit_immediate_reloc(jit_state_t*,int32_t,jit_bool_t);
+
+static void bswapr_us(jit_state_t*,int32_t,int32_t);
+static void bswapr_ui(jit_state_t*,int32_t,int32_t);
+static void bswapr_ul(jit_state_t*,int32_t,int32_t);
+
+static void addr(jit_state_t*,int32_t,int32_t,int32_t);
+static void addi(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+static void addcr(jit_state_t*,int32_t,int32_t,int32_t);
+static void addci(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+static void addxr(jit_state_t*,int32_t,int32_t,int32_t);
+static void addxi(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+static void subr(jit_state_t*,int32_t,int32_t,int32_t);
+static void subi(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+static void subcr(jit_state_t*,int32_t,int32_t,int32_t);
+static void subci(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+static void subxr(jit_state_t*,int32_t,int32_t,int32_t);
+static void subxi(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+# if __WORDSIZE == 32
+# define _MULR(r0,r1,r2) _MULLW(r0,r1,r2)
+# define _MULLR(r0,r1,r2) _MULLW(r0,r1,r2)
+# define _MULHR(r0,r1,r2) _MULHW(r0,r1,r2)
+# define _MULHR_U(r0,r1,r2) _MULHWU(r0,r1,r2)
+# else
+# define _MULR(r0,r1,r2) _MULLD(r0,r1,r2)
+# define _MULLR(r0,r1,r2) _MULLD(r0,r1,r2)
+# define _MULHR(r0,r1,r2) _MULHD(r0,r1,r2)
+# define _MULHR_U(r0,r1,r2) _MULHDU(r0,r1,r2)
+# endif
+
+static void mulr(jit_state_t*,int32_t,int32_t,int32_t);
+static void muli(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+static void qmulr(jit_state_t*,int32_t,int32_t,int32_t,int32_t);
+static void qmulr_u(jit_state_t*,int32_t,int32_t,int32_t,int32_t);
+static void iqmulr(jit_state_t*,int32_t,int32_t,
+ int32_t,int32_t,jit_bool_t);
+
+static void qmuli(jit_state_t*,int32_t,int32_t,int32_t,jit_word_t);
+static void qmuli_u(jit_state_t*,int32_t,int32_t,int32_t,jit_word_t);
+static void iqmuli(jit_state_t*,int32_t,int32_t,
+ int32_t,jit_word_t,jit_bool_t);
+# if __WORDSIZE == 32
+# define _DIVR(r0,r1,r2) _DIVW(r0,r1,r2)
+# else
+# define _DIVR(r0,r1,r2) _DIVD(r0,r1,r2)
+# endif
+
+static void divr(jit_state_t*,int32_t,int32_t,int32_t);
+static void divi(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+# if __WORDSIZE == 32
+# define _DIVR_U(r0,r1,r2) _DIVWU(r0,r1,r2)
+# else
+# define _DIVR_U(r0,r1,r2) _DIVDU(r0,r1,r2)
+# endif
+
+static void divr_u(jit_state_t*,int32_t,int32_t,int32_t);
+static void divi_u(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+static void qdivr(jit_state_t*,int32_t,int32_t,int32_t,int32_t);
+static void qdivr_u(jit_state_t*,int32_t,int32_t,int32_t,int32_t);
+static void iqdivr(jit_state_t*,int32_t,int32_t,
+ int32_t,int32_t,jit_bool_t);
+
+static void qdivi(jit_state_t*,int32_t,int32_t,int32_t,jit_word_t);
+static void qdivi_u(jit_state_t*,int32_t,int32_t,int32_t,jit_word_t);
+static void iqdivi(jit_state_t*,int32_t,int32_t,
+ int32_t,jit_word_t,jit_bool_t);
+
+static void remr(jit_state_t*,int32_t,int32_t,int32_t);
+static void remi(jit_state_t*,int32_t,int32_t,jit_word_t);
+static void remr_u(jit_state_t*,int32_t,int32_t,int32_t);
+static void remi_u(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+static void andr(jit_state_t*,int32_t,int32_t,int32_t);
+static void andi(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+static void orr(jit_state_t*,int32_t,int32_t,int32_t);
+static void ori(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+static void xorr(jit_state_t*,int32_t,int32_t,int32_t);
+static void xori(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+# if __WORDSIZE == 32
+# define _LSHR(r0,r1,r2) _SLW(r0,r1,r2)
+# else
+# define _LSHR(r0,r1,r2) _SLD(r0,r1,r2)
+# endif
+
+static void lshr(jit_state_t*,int32_t,int32_t,int32_t);
+static void lshi(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+# if __WORDSIZE == 32
+# define _RSHR(r0,r1,r2) _SRAW(r0,r1,r2)
+# else
+# define _RSHR(r0,r1,r2) _SRAD(r0,r1,r2)
+# endif
+
+static void rshr(jit_state_t*,int32_t,int32_t,int32_t);
+static void rshi(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+# if __WORDSIZE == 32
+# define _RSHR_U(r0,r1,r2) _SRW(r0,r1,r2)
+# else
+# define _RSHR_U(r0,r1,r2) _SRD(r0,r1,r2)
+# endif
+
+static void rshr_u(jit_state_t*,int32_t,int32_t,int32_t);
+static void rshi_u(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+static jit_reloc_t bltr(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t blti(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t bltr_u(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t blti_u(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t bler(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t blei(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t bler_u(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t blei_u(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t beqr(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t beqi(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t bger(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bgei(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t bger_u(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bgei_u(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t bgtr(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bgti(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t bgtr_u(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bgti_u(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t bner(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bnei(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t bmsr(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bmsi(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t bmcr(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bmci(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t boaddr(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t boaddi(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t bxaddr(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bxaddi(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t bosubr(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bosubi(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t bxsubr(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bxsubi(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t boaddr_u(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t boaddi_u(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t bxaddr_u(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bxaddi_u(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t bosubr_u(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bosubi_u(jit_state_t*,int32_t,jit_word_t);
+static jit_reloc_t bxsubr_u(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bxsubi_u(jit_state_t*,int32_t,jit_word_t);
+
+static void ldr_c(jit_state_t*,int32_t,int32_t);
+static void ldi_c(jit_state_t*,int32_t,jit_word_t);
+static void ldxr_c(jit_state_t*,int32_t,int32_t,int32_t);
+static void ldxi_c(jit_state_t*,int32_t,int32_t,jit_word_t);
+static void ldr_uc(jit_state_t*,int32_t,int32_t);
+static void ldi_uc(jit_state_t*,int32_t,jit_word_t);
+static void ldxr_uc(jit_state_t*,int32_t,int32_t,int32_t);
+static void ldxi_uc(jit_state_t*,int32_t,int32_t,jit_word_t);
+static void ldr_s(jit_state_t*,int32_t,int32_t);
+static void ldi_s(jit_state_t*,int32_t,jit_word_t);
+static void ldxr_s(jit_state_t*,int32_t,int32_t,int32_t);
+static void ldxi_s(jit_state_t*,int32_t,int32_t,jit_word_t);
+static void ldr_us(jit_state_t*,int32_t,int32_t);
+static void ldi_us(jit_state_t*,int32_t,jit_word_t);
+static void ldxr_us(jit_state_t*,int32_t,int32_t,int32_t);
+static void ldxi_us(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+# if __WORDSIZE == 32
+# define _LDR_I(r0,r1) _LWZX(r0, rn(_R0), r1)
+# else
+# define _LDR_I(r0,r1) _LWAX(r0, rn(_R0), r1)
+# endif
+
+static void ldr_i(jit_state_t*,int32_t,int32_t);
+static void ldi_i(jit_state_t*,int32_t,jit_word_t);
+static void ldxr_i(jit_state_t*,int32_t,int32_t,int32_t);
+static void ldxi_i(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+# if __WORDSIZE == 64
+static void ldr_ui(jit_state_t*,int32_t,int32_t);
+static void ldi_ui(jit_state_t*,int32_t,jit_word_t);
+static void ldxr_ui(jit_state_t*,int32_t,int32_t,int32_t);
+static void ldxi_ui(jit_state_t*,int32_t,int32_t,jit_word_t);
+static void ldr_l(jit_state_t*,int32_t,int32_t);
+static void ldi_l(jit_state_t*,int32_t,jit_word_t);
+static void ldxr_l(jit_state_t*,int32_t,int32_t,int32_t);
+static void ldxi_l(jit_state_t*,int32_t,int32_t,jit_word_t);
+# endif
+
+static void str_c(jit_state_t*,int32_t,int32_t);
+static void sti_c(jit_state_t*,jit_word_t,int32_t);
+static void stxr_c(jit_state_t*,int32_t,int32_t,int32_t);
+static void stxi_c(jit_state_t*,jit_word_t,int32_t,int32_t);
+static void str_s(jit_state_t*,int32_t,int32_t);
+static void sti_s(jit_state_t*,jit_word_t,int32_t);
+static void stxr_s(jit_state_t*,int32_t,int32_t,int32_t);
+static void stxi_s(jit_state_t*,jit_word_t,int32_t,int32_t);
+static void str_i(jit_state_t*,int32_t,int32_t);
+static void sti_i(jit_state_t*,jit_word_t,int32_t);
+static void stxr_i(jit_state_t*,int32_t,int32_t,int32_t);
+static void stxi_i(jit_state_t*,jit_word_t,int32_t,int32_t);
+
+# if __WORDSIZE == 64
+static void str_l(jit_state_t*,int32_t,int32_t);
+static void sti_l(jit_state_t*,jit_word_t,int32_t);
+static void stxr_l(jit_state_t*,int32_t,int32_t,int32_t);
+static void stxi_l(jit_state_t*,jit_word_t,int32_t,int32_t);
+# endif
+
+static jit_reloc_t jmp(jit_state_t*);
+static void jmpr(jit_state_t*,int32_t);
+static void jmpi(jit_state_t*,jit_word_t);
+static void callr(jit_state_t*,int32_t);
+static void calli(jit_state_t*,jit_word_t);
+
+static void push_link_register(jit_state_t *);
+static void pop_link_register(jit_state_t *);
+
+# define _u16(v) ((v) & 0xffff)
+# define _u26(v) ((v) & 0x3ffffff)
+static uint32_t
+FXO(int o, int d, int a, int b, int e, int x, int r)
+{
+ assert(!(o & ~((1 << 6) - 1)));
+ assert(!(d & ~((1 << 5) - 1)));
+ assert(!(a & ~((1 << 5) - 1)));
+ assert(!(b & ~((1 << 5) - 1)));
+ assert(!(e & ~((1 << 1) - 1)));
+ assert(!(x & ~((1 << 9) - 1)));
+ assert(!(r & ~((1 << 1) - 1)));
+ instr_t ins = {.XO = {.po = o, .rt = d, .ra = a, .rb = b, .u0 = e, .xo = x, .u1 = r}};
+ return ins.w;
+}
+
+static uint32_t
+FDs(int o, int d, int a, int s)
+{
+ assert(!(o & ~((1 << 6) - 1)));
+ assert(!(d & ~((1 << 5) - 1)));
+ assert(!(a & ~((1 << 5) - 1)));
+ assert(can_sign_extend_short_p(s));
+ instr_t ins = {.D = {.po = o, .rx = d, .ra = a, .d = _u16(s)}};
+ return ins.w;
+}
+
+static uint32_t
+FDu(int o, int d, int a, int s)
+{
+ assert(!(o & ~((1 << 6) - 1)));
+ assert(!(d & ~((1 << 5) - 1)));
+ assert(!(a & ~((1 << 5) - 1)));
+ assert(can_zero_extend_short_p(s));
+ instr_t ins = {.D = {.po = o, .rx = d, .ra = a, .d = _u16(s)}};
+ return ins.w;
+}
+
+static uint32_t
+FX(int o, int s, int a, int b, int x, int r)
+{
+ assert(!(o & ~((1 << 6) - 1)));
+ assert(!(s & ~((1 << 5) - 1)));
+ assert(!(a & ~((1 << 5) - 1)));
+ assert(!(b & ~((1 << 5) - 1)));
+ assert(!(x & ~((1 << 10) - 1)));
+ assert(!(r & ~((1 << 1) - 1)));
+ instr_t ins = {.X = {.po = o, .f0 = s, .ra = a, .rb = b, .xo = x, .u0 = r}};
+ return ins.w;
+}
+
+static uint32_t
+FI(int o, int t, int a, int k)
+{
+ assert(!(o & ~(( 1 << 6) - 1)));
+ assert(!(t & 3) && can_sign_extend_jump_p(t));
+ assert(!(a & ~(( 1 << 1) - 1)));
+ assert(!(k & ~(( 1 << 1) - 1)));
+ instr_t ins = {.I = {.po = o, .li = _u26(t) >> 2, .aa = a, .lk = k}};
+ return ins.w;
+}
+
+static uint32_t
+FB(int o, int bo, int bi, int t, int a, int k)
+{
+ assert(!( o & ~((1 << 6) - 1)));
+ assert(!(bo & ~((1 << 5) - 1)));
+ assert(!(bi & ~((1 << 5) - 1)));
+ assert(!(t & 3) && can_sign_extend_short_p(t));
+ assert(!(a & ~(( 1 << 1) - 1)));
+ assert(!(k & ~(( 1 << 1) - 1)));
+ instr_t ins = {.B = {.po = o, .bo = bo, .bi = bi, .bd = _u16(t) >> 2, .aa = a, .lk = k}};
+ return ins.w;
+}
+
+static uint32_t
+FXL(int o, int bo, int bi, int x, int k)
+{
+ assert(!( o & ~((1 << 6) - 1)));
+ assert(!(bo & ~((1 << 5) - 1)));
+ assert(!(bi & ~((1 << 5) - 1)));
+ assert(!(x & ~(( 1 << 10) - 1)));
+ assert(!(k & ~(( 1 << 1) - 1)));
+ instr_t ins = {.XL = {.po = o, .bo = bo, .ba = bi, .bb = 0, .xo = x, .lk = k}};
+ return ins.w;
+}
+
+static uint32_t
+FC(int o, int d, int l, int a, int b, int x)
+{
+ // NOTE: this seems to one variation on the X format
+ assert(!(o & ~((1 << 6) - 1)));
+ assert(!(d & ~((1 << 3) - 1)));
+ assert(!(l & ~((1 << 1) - 1)));
+ assert(!(a & ~((1 << 5) - 1)));
+ assert(!(b & ~((1 << 5) - 1)));
+ assert(!(x & ~((1 << 10) - 1)));
+ instr_t ins = {.X = {.po = o, .f0 = d << 3 | l, .ra = a, .rb = b, .xo = x, .u0 = 0}};
+ return ins.w;
+}
+
+static uint32_t
+FCI(int o, int d, int l, int a, int s)
+{
+ assert(!(o & ~((1 << 6) - 1)));
+ assert(!(d & ~((1 << 3) - 1)));
+ assert(!(l & ~((1 << 1) - 1)));
+ assert(!(a & ~((1 << 5) - 1)));
+ if (o == 11) assert(can_sign_extend_short_p(s));
+ else if (o == 10) assert(can_zero_extend_short_p(s));
+#if DEBUG
+ else abort();
+#endif
+ instr_t ins = {.D = {.po = o, .rx = d << 2 | l, .ra = a, .d = _u16(s)}};
+ return ins.w;
+}
+
+static uint32_t
+FXFX(int o, int d, int x, int f)
+{
+ assert(!(o & ~((1 << 6) - 1)));
+ assert(!(d & ~((1 << 5) - 1)));
+ assert(!(x & ~((1 << 10) - 1)));
+ assert(!(f & ~((1 << 10) - 1)));
+ instr_t ins = {.XFX = {.po = o, .rs = d, .fx = x, .xo = f, .u0 = 0}};
+ return ins.w;
+}
+
+static uint32_t
+FM(int o, int s, int a, int h, int b, int e, int r)
+{
+ assert(!(o & ~((1 << 6) - 1)));
+ assert(!(s & ~((1 << 5) - 1)));
+ assert(!(a & ~((1 << 5) - 1)));
+ assert(!(h & ~((1 << 5) - 1)));
+ assert(!(b & ~((1 << 5) - 1)));
+ assert(!(e & ~((1 << 5) - 1)));
+ assert(!(r & ~((1 << 1) - 1)));
+ instr_t ins = {.M = {.po = o, .rs = s, .ra = a, .rb = h, .mb = b, .me = e, .rc = r}};
+ return ins.w;
+}
+
+# if __WORDSIZE == 64
+static uint32_t
+FMD(int o, int s, int a, int h, int e, int x, int i, int r)
+{
+ assert(!(o & ~((1 << 6) - 1)));
+ assert(!(s & ~((1 << 5) - 1)));
+ assert(!(a & ~((1 << 5) - 1)));
+ assert(!(h & ~((1 << 5) - 1)));
+ assert(!(e & ~((1 << 6) - 1)));
+ assert(!(x & ~((1 << 3) - 1)));
+ assert(!(i & ~((1 << 1) - 1)));
+ assert(!(r & ~((1 << 1) - 1)));
+ e = (e >> 5) | ((e << 1) & 63);
+ instr_t ins = {.MD = {.po = o, .rs = s, .ra = a, .s0 = h, .mx = e, .xo = x, .s1 = i, .rc = r}};
+ return ins.w;
+}
+
+static uint32_t
+FXS(int o, int s, int a, int h, int x, int i, int r)
+{
+ assert(!(o & ~((1 << 6) - 1)));
+ assert(!(s & ~((1 << 5) - 1)));
+ assert(!(a & ~((1 << 5) - 1)));
+ assert(!(h & ~((1 << 5) - 1)));
+ assert(!(x & ~((1 << 9) - 1)));
+ assert(!(i & ~((1 << 1) - 1)));
+ assert(!(r & ~((1 << 1) - 1)));
+ instr_t ins = {.XS = {.po = o, .rs = s, .ra = a, .s0 = h, .xo = x, .s1 = i, .rc = r}};
+ return ins.w;
+}
+#endif
+
+#if !DEBUG
+/*
+ * Use the sequence commented at
+ * http://tenfourfox.blogspot.com/2011/04/attention-g5-owners-your-javascript-no.html
+ */
+static void
+mcrxr(jit_state_t *_jit, int32_t cr)
+{
+ int32_t reg = rn(get_temp_gpr(_jit));
+ em_wp(_jit, _MFXER(reg));
+ em_wp(_jit, _MTCRF(128, reg));
+ em_wp(_jit, _RLWINM(reg, reg, 0, 0, 28));
+ em_wp(_jit, _MTXER(reg));
+ unget_temp_gpr(_jit);
+}
+
+#else
+static void
+mcrxr(jit_state_t *_jit, int32_t cr)
+{
+ em_wp(_jit, _FX(31, cr << 2, 0, 0, 512));
+}
+#endif
+
+static int32_t
+read_jmp_offset(uint32_t *loc)
+{
+ // FIXME unsigned to signed?
+ instr_t *i = (instr_t *)loc;
+ return i->I.li;
+}
+
+static int32_t
+read_jcc_offset(uint32_t *loc)
+{
+ instr_t *i = (instr_t *)loc;
+ return i->B.bd;
+}
+
+static void
+patch_jmp_offset(uint32_t *loc, ptrdiff_t offset)
+{
+ assert(simm24_p(offset));
+ instr_t *i = (instr_t *)loc;
+ i->I.li = offset;
+}
+
+static void
+patch_veneer_jmp_offset(uint32_t *loc, ptrdiff_t offset)
+{
+ patch_jmp_offset(loc, offset);
+}
+
+static void
+patch_veneer(uint32_t *loc, jit_pointer_t addr)
+{
+ patch_immediate_reloc(loc, addr);
+}
+
+static void
+emit_veneer(jit_state_t *_jit, jit_pointer_t target)
+{
+ jit_pointer_t veneer = jit_address(_jit);
+
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ emit_immediate_reloc(_jit, rn(reg), 1);
+
+ // see mips-cpu.c:emit_veneer()
+ if (!jit_has_overflow(_jit))
+ patch_veneer(veneer, target);
+
+ emit_u32(_jit, _MTCTR(rn(reg)));
+ emit_u32(_jit, _BCTR());
+ unget_temp_gpr(_jit);
+}
+
+static void
+patch_veneer_jcc_offset(uint32_t *loc, ptrdiff_t offset)
+{
+ patch_jcc_offset(loc, offset);
+}
+
+static int
+offset_in_jmp_range(ptrdiff_t offset, int flags)
+{
+ (void)flags;
+ return simm24_p(offset);
+}
+
+static int
+offset_in_jcc_range(ptrdiff_t offset, int flags)
+{
+ (void)flags;
+ return simm14_p(offset);
+}
+
+static void
+patch_jcc_offset(uint32_t *loc, ptrdiff_t offset)
+{
+ assert(simm14_p(offset));
+ instr_t *i = (instr_t *)loc;
+ i->B.bd = offset;
+}
+
+static uint32_t
+patch_cc_jump(uint32_t inst, int32_t offset)
+{
+ instr_t i;
+ i.w = inst;
+ i.B.bd = offset;
+ return i.w;
+}
+
+static jit_reloc_t
+emit_cc_jump(jit_state_t *_jit, uint32_t inst)
+{
+ while (1) {
+ uint8_t *pc_base = _jit->pc.uc;
+ int32_t off = ((uint8_t *)jit_address(_jit)) - pc_base;
+ jit_reloc_t w = jit_reloc(_jit, JIT_RELOC_JCC_WITH_VENEER, 0, _jit->pc.uc,
+ pc_base,
+ 2);
+ uint8_t jump_width = 14;
+
+ if (add_pending_literal(_jit, w, jump_width - 1)) {
+ em_wp(_jit, patch_cc_jump(inst, off >> 2));
+ return w;
+ }
+ }
+}
+
+static jit_reloc_t
+emit_atomic_jump(jit_state_t *_jit, uint32_t inst)
+{
+ while (1) {
+ uint8_t *pc_base = _jit->pc.uc;
+ int32_t off = ((uint8_t *)jit_address(_jit)) - pc_base;
+ jit_reloc_t w = jit_reloc(_jit, JIT_RELOC_JCC_WITH_VENEER, 0, _jit->pc.uc,
+ pc_base,
+ 2);
+ uint8_t jump_width = 14;
+
+ // TODO is JCC_WITH_VENEER fine here?
+ if (add_pending_literal(_jit, w, jump_width - 1)) {
+ emit_u32(_jit, patch_cc_jump(inst, off >> 2));
+ return w;
+ }
+ }
+}
+
+static uint32_t patch_jump(uint32_t inst, int32_t offset)
+{
+ instr_t i;
+ i.w = inst;
+ i.I.li = offset;
+ return i.w;
+}
+
+static jit_reloc_t
+emit_jump(jit_state_t *_jit, uint32_t inst)
+{
+ while(1) {
+ uint8_t *pc_base = _jit->pc.uc;
+ int32_t off = ((uint8_t *)jit_address(_jit)) - pc_base;
+ jit_reloc_t w = jit_reloc(_jit, JIT_RELOC_JMP_WITH_VENEER, 0, _jit->pc.uc,
+ pc_base,
+ 2);
+ uint8_t jump_width = 24;
+
+ if (add_pending_literal(_jit, w, jump_width - 1)) {
+ em_wp(_jit, patch_jump(inst, off >> 2));
+ return w;
+ }
+ }
+}
+
+static void
+nop(jit_state_t *_jit, int32_t i0)
+{
+ for (; i0 > 0; i0 -= 4)
+ em_wp(_jit, _NOP());
+ assert(i0 == 0);
+}
+
+static void
+movr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (r0 != r1)
+ em_wp(_jit, _MR(r0, r1));
+}
+
+static void
+movi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _LI(r0, i0));
+ else {
+ if (can_sign_extend_int_p(i0))
+ em_wp(_jit, _LIS(r0, (int16_t)(i0 >> 16)));
+ else if (can_zero_extend_int_p(i0)) {
+ if (i0 & 0xffff0000) {
+ em_wp(_jit, _ORI(r0, r0, (uint16_t)(i0 >> 16)));
+ em_wp(_jit, _SLWI(r0, r0, 16));
+ }
+ }
+# if __WORDSIZE == 64
+ else {
+ movi(_jit, r0, (uint32_t)(i0 >> 32));
+ if (i0 & 0xffff0000) {
+ em_wp(_jit, _SLDI(r0, r0, 16));
+ em_wp(_jit, _ORI(r0, r0, (uint16_t)(i0 >> 16)));
+ em_wp(_jit, _SLDI(r0, r0, 16));
+ }
+ else
+ em_wp(_jit, _SLDI(r0, r0, 32));
+ }
+# endif
+ if (i0 & 0xffff)
+ em_wp(_jit, _ORI(r0, r0, (uint16_t)i0));
+ }
+}
+
+typedef struct {
+#if __WORDSIZE == 64
+ instr_t lis0;
+ instr_t ori0;
+ instr_t sldi0;
+
+ instr_t ori1;
+ instr_t sldi1;
+ instr_t ori2;
+#else
+ instr_t lis;
+ instr_t ori;
+#endif
+} immediate_t;
+
+static void
+patch_immediate_reloc(uint32_t *loc, jit_pointer_t addr)
+{
+ immediate_t *i = (immediate_t *)loc;
+ jit_word_t a = (jit_word_t)addr;
+#if __WORDSIZE == 64
+ i->lis0.D.d = a >> 48;
+ i->ori0.D.d = a >> 32;
+ i->ori1.D.d = a >> 16;
+ i->ori2.D.d = a & 0xffff;
+#else
+ i->lis.D.d = a >> 16;
+ i->ori.D.d = a & 0xffff;
+#endif
+}
+
+static void
+emit_immediate_reloc(jit_state_t *_jit, int32_t r0, jit_bool_t in_veneer)
+{
+ void (*emit)(jit_state_t * _jit, uint32_t u32) =
+ in_veneer ? emit_u32 : emit_u32_with_pool;
+
+ emit(_jit, _LIS(r0, 0));
+ emit(_jit, _ORI(r0, r0, 0));
+# if __WORDSIZE == 64
+ emit(_jit, _SLDI(r0, r0, 16));
+ emit(_jit, _ORI(r0, r0, 0));
+ emit(_jit, _SLDI(r0, r0, 16));
+ emit(_jit, _ORI(r0, r0, 0));
+# endif
+}
+
+static jit_reloc_t
+movi_from_immediate(jit_state_t *_jit, int32_t r0)
+{
+ uint8_t *pc_base = _jit->pc.uc;
+ jit_reloc_t w = jit_reloc(_jit, JIT_RELOC_IMMEDIATE, 0, _jit->pc.uc, pc_base, 0);
+ emit_immediate_reloc(_jit, r0, 0);
+ return w;
+}
+
+static jit_reloc_t
+mov_addr(jit_state_t *_jit, int32_t r0)
+{
+ return movi_from_immediate(_jit, r0);
+}
+
+static void
+negr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _NEG(r0, r1));
+}
+
+static void
+comr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _NOT(r0, r1));
+}
+
+static void
+extr_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _EXTSB(r0, r1));
+}
+
+static void
+extr_uc(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _ANDI_(r0, r1, 0xff));
+}
+
+static void
+extr_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _EXTSH(r0, r1));
+}
+
+static void
+extr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _ANDI_(r0, r1, 0xffff));
+}
+
+static void
+bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+ rshi(_jit, rn(t0), r1, 8);
+ andi(_jit, r0, r1, 0xff);
+ andi(_jit, rn(t0), rn(t0), 0xff);
+ lshi(_jit, r0, r0, 8);
+ orr(_jit, r0, r0, rn(t0));
+ unget_temp_gpr(_jit);
+}
+
+static void
+bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ em_wp(_jit, _ROTLWI(rn(reg), r1, 8));
+ em_wp(_jit, _RLWIMI(rn(reg), r1, 24, 0, 7));
+ em_wp(_jit, _RLWIMI(rn(reg), r1, 24, 16, 23));
+ em_wp(_jit, _CLRLDI(r0, rn(reg), 32));
+ unget_temp_gpr(_jit);
+}
+
+#if __WORDSIZE == 64
+static void
+bswapr_ul(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ rshi_u(_jit, rn(reg), r1, 32);
+ bswapr_ui(_jit, r0, r1);
+ bswapr_ui(_jit, rn(reg), rn(reg));
+ lshi(_jit, r0, r0, 32);
+ orr(_jit, r0, r0, rn(reg));
+ unget_temp_gpr(_jit);
+}
+#endif
+
+static void
+addr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _ADD(r0, r1, r2));
+}
+
+static void
+addi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _ADDI(r0, r1, i0));
+ else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff))
+ em_wp(_jit, _ADDIS(r0, r1, i0 >> 16));
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ em_wp(_jit, _ADD(r0, r1, rn(reg)));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+addcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _ADDC(r0, r1, r2));
+}
+
+static void
+addci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _ADDIC(r0, r1, i0));
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ addcr(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+addxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _ADDE(r0, r1, r2));
+}
+
+static void
+addxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ addxr(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+subr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _SUB(r0, r1, r2));
+}
+
+static void
+subi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_word_t ni0 = -i0;
+ if (can_sign_extend_short_p(ni0)) {
+ em_wp(_jit, _ADDI(r0, r1, ni0));
+ } else if (can_zero_extend_int_p(ni0) && !(ni0 & 0x0000ffff)) {
+ em_wp(_jit, _ADDIS(r0, r1, ni0 >> 16));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ subr(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+subcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _SUBC(r0, r1, r2));
+}
+
+static void
+subci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ subcr(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+subxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _SUBFE(r0, r2, r1));
+}
+
+static void
+subxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ subxr(_jit, r0, rn(reg), r1);
+ unget_temp_gpr(_jit);
+}
+
+static void
+mulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _MULR(r0, r1, r2));
+}
+
+static void
+muli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0))
+ em_wp(_jit, _MULLI(r0, r1, i0));
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ mulr(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+iqmulr(jit_state_t *_jit, int32_t r0, int32_t r1,
+ int32_t r2, int32_t r3, jit_bool_t sign)
+{
+ jit_gpr_t reg;
+ if (r0 == r2 || r0 == r3) {
+ reg = get_temp_gpr(_jit);
+ em_wp(_jit, _MULLR(rn(reg), r2, r3));
+ } else {
+ em_wp(_jit, _MULLR(r0, r2, r3));
+ }
+
+ if (sign)
+ em_wp(_jit, _MULHR(r1, r2, r3));
+ else
+ em_wp(_jit, _MULHR_U(r1, r2, r3));
+
+ if (r0 == r2 || r0 == r3) {
+ movr(_jit, r0, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+qmulr(jit_state_t *_jit, int32_t r0, int32_t r1,
+ int32_t r2, int32_t r3)
+{
+ iqmulr(_jit, r0, r1, r2, r3, 1);
+}
+
+static void
+qmulr_u(jit_state_t *_jit, int32_t r0, int32_t r1,
+ int32_t r2, int32_t r3)
+{
+ iqmulr(_jit, r0, r1, r2, r3, 0);
+}
+
+static void
+iqmuli(jit_state_t *_jit, int32_t r0, int32_t r1,
+ int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ iqmulr(_jit, r0, r1, r2, rn(reg), sign);
+ unget_temp_gpr(_jit);
+}
+
+static void
+qmuli(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2,
+ jit_word_t i0)
+{
+ iqmuli(_jit, r0, r1, r2, i0, 1);
+}
+
+static void
+qmuli_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2,
+ jit_word_t i0)
+{
+ iqmuli(_jit, r0, r1, r2, i0, 0);
+}
+
+static void
+divr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _DIVR(r0, r1, r2));
+}
+
+static void
+divi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ divr(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+divr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _DIVR_U(r0, r1, r2));
+}
+
+static void
+divi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ divr_u(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+iqdivr(jit_state_t *_jit, int32_t r0, int32_t r1,
+ int32_t r2, int32_t r3, jit_bool_t sign)
+{
+ jit_gpr_t sv0; int32_t rg0;
+ jit_gpr_t sv1; int32_t rg1;
+
+ if (r0 == r2 || r0 == r3) {
+ sv0 = get_temp_gpr(_jit);
+ rg0 = rn(sv0);
+ } else {
+ rg0 = r0;
+ }
+
+ if (r1 == r2 || r1 == r3) {
+ sv1 = get_temp_gpr(_jit);
+ rg1 = rn(sv1);
+ } else {
+ rg1 = r1;
+ }
+
+ if (sign)
+ divr(_jit, rg0, r2, r3);
+ else
+ divr_u(_jit, rg0, r2, r3);
+
+ mulr(_jit, rg1, r3, rg0);
+ subr(_jit, rg1, r2, rg1);
+
+ if (rg0 != r0) {
+ movr(_jit, r0, rg0);
+ unget_temp_gpr(_jit);
+ }
+
+ if (rg1 != r1) {
+ movr(_jit, r1, rg1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+qdivr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ iqdivr(_jit, r0, r1, r2, r3, 1);
+}
+
+static void
+qdivr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ iqdivr(_jit, r0, r1, r2, r3, 0);
+}
+
+static void
+iqdivi(jit_state_t *_jit, int32_t r0, int32_t r1,
+ int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ iqdivr(_jit, r0, r1, r2, rn(reg), sign);
+ unget_temp_gpr(_jit);
+}
+
+static void
+qdivi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+ iqdivi(_jit, r0, r1, r2, i0, 1);
+}
+
+static void
+qdivi_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t u0)
+{
+ iqdivi(_jit, r0, r1, r2, u0, 0);
+}
+
+static void
+remr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r1 || r0 == r2) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ divr(_jit, rn(reg), r1, r2);
+ mulr(_jit, rn(reg), r2, rn(reg));
+ subr(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+ else {
+ divr(_jit, r0, r1, r2);
+ mulr(_jit, r0, r2, r0);
+ subr(_jit, r0, r1, r0);
+ }
+}
+
+static void
+remi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ remr(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+remr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r1 || r0 == r2) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ divr_u(_jit, rn(reg), r1, r2);
+ mulr(_jit, rn(reg), r2, rn(reg));
+ subr(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+ else {
+ divr_u(_jit, r0, r1, r2);
+ mulr(_jit, r0, r2, r0);
+ subr(_jit, r0, r1, r0);
+ }
+}
+
+static void
+remi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ remr_u(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+andr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _AND(r0, r1, r2));
+}
+
+static void
+andi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_zero_extend_short_p(i0)) {
+ em_wp(_jit, _ANDI_(r0, r1, i0));
+ } else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff)) {
+ em_wp(_jit, _ANDIS_(r0, r1, (jit_uword_t)i0 >> 16));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ em_wp(_jit, _AND(r0, r1, rn(reg)));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+orr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _OR(r0, r1, r2));
+}
+
+static void
+ori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_zero_extend_short_p(i0)) {
+ em_wp(_jit, _ORI(r0, r1, i0));
+ } else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff)) {
+ em_wp(_jit, _ORIS(r0, r1, (jit_uword_t)i0 >> 16));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ orr(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+xorr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _XOR(r0, r1, r2));
+}
+
+static void
+xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_zero_extend_short_p(i0)) {
+ em_wp(_jit, _XORI(r0, r1, i0));
+ } else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff)) {
+ em_wp(_jit, _XORIS(r0, r1, (jit_uword_t)i0 >> 16));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ xorr(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+lshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _LSHR(r0, r1, r2));
+}
+
+static void
+lshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0) {
+ movr(_jit, r0, r1);
+ } else {
+# if __WORDSIZE == 32
+ em_wp(_jit, _SLWI(r0, r1, i0));
+# else
+ em_wp(_jit, _SLDI(r0, r1, i0));
+# endif
+ }
+}
+
+static void
+rshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _RSHR(r0, r1, r2));
+}
+
+static void
+rshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0) {
+ movr(_jit, r0, r1);
+ } else {
+# if __WORDSIZE == 32
+ em_wp(_jit, _SRAWI(r0, r1, i0));
+# else
+ em_wp(_jit, _SRADI(r0, r1, i0));
+# endif
+ }
+}
+
+static void
+rshr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _RSHR_U(r0, r1, r2));
+}
+
+static void
+rshi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0) {
+ movr(_jit, r0, r1);
+ } else {
+# if __WORDSIZE == 32
+ em_wp(_jit, _SRWI(r0, r1, i0));
+# else
+ em_wp(_jit, _SRDI(r0, r1, i0));
+# endif
+ }
+}
+
+static jit_reloc_t
+bltr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _CMPX(r0, r1));
+ return emit_cc_jump(_jit, _BLT(0));
+}
+
+static jit_reloc_t
+blti(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_sign_extend_short_p(i1)) {
+ em_wp(_jit, _CMPXI(r0, i1));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i1);
+ em_wp(_jit, _CMPX(r0, rn(reg)));
+ unget_temp_gpr(_jit);
+ }
+
+ return emit_cc_jump(_jit, _BLT(0));
+}
+
+static jit_reloc_t
+bltr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _CMPLX(r0, r1));
+ return emit_cc_jump(_jit, _BLT(0));
+}
+
+static jit_reloc_t
+blti_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_zero_extend_short_p(i1)) {
+ em_wp(_jit, _CMPLXI(r0, i1));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i1);
+ em_wp(_jit, _CMPLX(r0, rn(reg)));
+ unget_temp_gpr(_jit);
+ }
+
+ return emit_cc_jump(_jit, _BLT(0));
+}
+
+static jit_reloc_t
+bler(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _CMPX(r0, r1));
+ return emit_cc_jump(_jit, _BLE(0));
+}
+
+static jit_reloc_t
+blei(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_sign_extend_short_p(i1)) {
+ em_wp(_jit, _CMPXI(r0, i1));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i1);
+ em_wp(_jit, _CMPX(r0, rn(reg)));
+ unget_temp_gpr(_jit);
+ }
+
+ return emit_cc_jump(_jit, _BLE(0));
+}
+
+static jit_reloc_t
+bler_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _CMPLX(r0, r1));
+ return emit_cc_jump(_jit, _BLE(0));
+}
+
+static jit_reloc_t
+blei_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_zero_extend_short_p(i1)) {
+ em_wp(_jit, _CMPLXI(r0, i1));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i1);
+ em_wp(_jit, _CMPLX(r0, rn(reg)));
+ unget_temp_gpr(_jit);
+ }
+
+ return emit_cc_jump(_jit, _BLE(0));
+}
+
+static jit_reloc_t
+beqr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _CMPX(r0, r1));
+ return emit_cc_jump(_jit, _BEQ(0));
+}
+
+static jit_reloc_t
+beqi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_sign_extend_short_p(i1)) {
+ em_wp(_jit, _CMPXI(r0, i1));
+ } else if (can_zero_extend_short_p(i1)) {
+ em_wp(_jit, _CMPLXI(r0, i1));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i1);
+ em_wp(_jit, _CMPX(r0, rn(reg)));
+ unget_temp_gpr(_jit);
+ }
+
+ return emit_cc_jump(_jit, _BEQ(0));
+}
+
+static jit_reloc_t
+bger(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _CMPX(r0, r1));
+ return emit_cc_jump(_jit, _BGE(0));
+}
+
+static jit_reloc_t
+bgei(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_sign_extend_short_p(i1)) {
+ em_wp(_jit, _CMPXI(r0, i1));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i1);
+ em_wp(_jit, _CMPX(r0, rn(reg)));
+ unget_temp_gpr(_jit);
+ }
+
+ return emit_cc_jump(_jit, _BGE(0));
+}
+
+static jit_reloc_t
+bger_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _CMPLX(r0, r1));
+ return emit_cc_jump(_jit, _BGE(0));
+}
+
+static jit_reloc_t
+bgei_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_zero_extend_short_p(i1)) {
+ em_wp(_jit, _CMPLXI(r0, i1));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i1);
+ em_wp(_jit, _CMPLX(r0, rn(reg)));
+ unget_temp_gpr(_jit);
+ }
+ return emit_cc_jump(_jit, _BGE(0));
+}
+
+static jit_reloc_t
+bgtr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _CMPX(r0, r1));
+ return emit_cc_jump(_jit, _BGT(0));
+}
+
+static jit_reloc_t
+bgti(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_sign_extend_short_p(i1)) {
+ em_wp(_jit, _CMPXI(r0, i1));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i1);
+ em_wp(_jit, _CMPX(r0, rn(reg)));
+ unget_temp_gpr(_jit);
+ }
+ return emit_cc_jump(_jit, _BGT(0));
+}
+
+static jit_reloc_t
+bgtr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _CMPLX(r0, r1));
+ return emit_cc_jump(_jit, _BGT(0));
+}
+
+static jit_reloc_t
+bgti_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_zero_extend_short_p(i1)) {
+ em_wp(_jit, _CMPLXI(r0, i1));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i1);
+ em_wp(_jit, _CMPLX(r0, rn(reg)));
+ unget_temp_gpr(_jit);
+ }
+
+ return emit_cc_jump(_jit, _BGT(0));
+}
+
+static jit_reloc_t
+bner(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _CMPX(r0, r1));
+ return emit_cc_jump(_jit, _BNE(0));
+}
+
+static jit_reloc_t
+bnei(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_sign_extend_short_p(i1)) {
+ em_wp(_jit, _CMPXI(r0, i1));
+ } else if (can_zero_extend_short_p(i1)) {
+ em_wp(_jit, _CMPLXI(r0, i1));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i1);
+ em_wp(_jit, _CMPX(r0, rn(reg)));
+ unget_temp_gpr(_jit);
+ }
+
+ return emit_cc_jump(_jit, _BNE(0));
+}
+
+static jit_reloc_t
+bmsr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ andr(_jit, rn(reg), r0, r1);
+ jit_reloc_t w = bnei(_jit, rn(reg), 0);
+ unget_temp_gpr(_jit);
+ return w;
+}
+
+static jit_reloc_t
+bmsi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ andi(_jit, rn(reg), r0, i1);
+ jit_reloc_t w = bnei(_jit, rn(reg), 0);
+ unget_temp_gpr(_jit);
+ return w;
+}
+
+static jit_reloc_t
+bmcr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ andr(_jit, rn(reg), r0, r1);
+ jit_reloc_t w = beqi(_jit, rn(reg), 0);
+ unget_temp_gpr(_jit);
+ return w;
+}
+
+static jit_reloc_t
+bmci(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ andi(_jit, rn(reg), r0, i1);
+ jit_reloc_t w = beqi(_jit, rn(reg), 0);
+ unget_temp_gpr(_jit);
+ return w;
+}
+
+static jit_reloc_t
+boaddr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _ADDO(r0, r0, r1));
+ mcrxr(_jit, CR_0);
+ return emit_cc_jump(_jit, _BGT(0)); /* GT = bit 1 of XER = OV */
+}
+
+static jit_reloc_t
+boaddi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i1);
+ jit_reloc_t w = boaddr(_jit, r0, rn(reg));
+ unget_temp_gpr(_jit);
+ return w;
+}
+
+static jit_reloc_t
+bxaddr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _ADDO(r0, r0, r1));
+ mcrxr(_jit, CR_0);
+ return emit_cc_jump(_jit, _BLE(0));
+}
+
+static jit_reloc_t
+bxaddi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i1);
+ jit_reloc_t w = bxaddr(_jit, r0, rn(reg));
+ unget_temp_gpr(_jit);
+ return w;
+}
+
+static jit_reloc_t
+bosubr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _SUBO(r0, r0, r1));
+ mcrxr(_jit, CR_0);
+ return emit_cc_jump(_jit, _BGT(0));
+}
+
+static jit_reloc_t
+bosubi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i1);
+ jit_reloc_t w = bosubr(_jit, r0, rn(reg));
+ unget_temp_gpr(_jit);
+ return w;
+}
+
+static jit_reloc_t
+bxsubr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _SUBO(r0, r0, r1));
+ mcrxr(_jit, CR_0);
+ return emit_cc_jump(_jit, _BLE(0));
+}
+
+static jit_reloc_t
+bxsubi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i1);
+ jit_reloc_t w = bxsubr(_jit, r0, rn(reg));
+ unget_temp_gpr(_jit);
+ return w;
+}
+
+static jit_reloc_t
+boaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _ADDC(r0, r0, r1));
+ mcrxr(_jit, CR_0);
+ return emit_cc_jump(_jit, _BEQ(0)); /* EQ = bit 2 of XER = CA */
+}
+
+static jit_reloc_t
+boaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_sign_extend_short_p(i1)) {
+ em_wp(_jit, _ADDIC(r0, r0, i1));
+ mcrxr(_jit, CR_0);
+ return emit_cc_jump(_jit, _BEQ(0));
+ }
+
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i1);
+ jit_reloc_t w = boaddr_u(_jit, r0, rn(reg));
+ unget_temp_gpr(_jit);
+ return w;
+}
+
+static jit_reloc_t
+bxaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _ADDC(r0, r0, r1));
+ mcrxr(_jit, CR_0);
+ return emit_cc_jump(_jit, _BNE(0));
+}
+
+static jit_reloc_t
+bxaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_sign_extend_short_p(i1)) {
+ em_wp(_jit, _ADDIC(r0, r0, i1));
+ mcrxr(_jit, CR_0);
+ return emit_cc_jump(_jit, _BNE(0));
+ }
+
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i1);
+ jit_reloc_t w = bxaddr_u(_jit, r0, rn(reg));
+ unget_temp_gpr(_jit);
+ return w;
+}
+
+static jit_reloc_t
+bosubr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _SUBC(r0, r0, r1));
+ mcrxr(_jit, CR_0);
+ return emit_cc_jump(_jit, _BNE(0)); /* PPC uses "carry" not "borrow" */
+}
+
+static jit_reloc_t
+bosubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i1);
+ jit_reloc_t w = bosubr_u(_jit, r0, rn(reg));
+ unget_temp_gpr(_jit);
+ return w;
+}
+
+static jit_reloc_t
+bxsubr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _SUBC(r0, r0, r1));
+ mcrxr(_jit, CR_0);
+ return emit_cc_jump(_jit, _BEQ(0));
+}
+
+static jit_reloc_t
+bxsubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i1);
+ jit_reloc_t w = bxsubr_u(_jit, r0, rn(reg));
+ unget_temp_gpr(_jit);
+ return w;
+}
+
+#if __WORDSIZE == 64
+static void
+extr_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _EXTSW(r0, r1));
+}
+
+static void
+extr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _CLRLDI(r0, r1, 32));
+}
+#endif
+
+static void
+ldr_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ldr_uc(_jit, r0, r1);
+ extr_c(_jit, r0, r0);
+}
+
+static void
+ldi_c(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ ldi_uc(_jit, r0, i0);
+ extr_c(_jit, r0, r0);
+}
+
+static void
+ldxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ ldxr_uc(_jit, r0, r1, r2);
+ extr_c(_jit, r0, r0);
+}
+
+static void
+ldxi_c(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ ldxi_uc(_jit, r0, r1, i0);
+ extr_c(_jit, r0, r0);
+}
+
+static void
+ldr_uc(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LBZX(r0, rn(_R0), r1));
+}
+
+static void
+ldi_uc(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0)) {
+ em_wp(_jit, _LBZ(r0, rn(_R0), i0));
+ } else if (can_sign_extend_int_p(i0)) {
+ jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15));
+ jit_word_t lo = (int16_t)(i0 - (hi << 16));
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ em_wp(_jit, _LIS(rn(reg), hi));
+ em_wp(_jit, _LBZ(r0, rn(reg), lo));
+ unget_temp_gpr(_jit);
+ }
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ ldr_uc(_jit, r0, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_uc(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r1 == rn(_R0)) {
+ if (r2 != rn(_R0)) {
+ em_wp(_jit, _LBZX(r0, r2, r1));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r1);
+ em_wp(_jit, _LBZX(r0, rn(reg), r2));
+ unget_temp_gpr(_jit);
+ }
+ } else {
+ em_wp(_jit, _LBZX(r0, r1, r2));
+ }
+}
+
+static void
+ldxi_uc(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0) {
+ ldr_uc(_jit, r0, r1);
+ } else if (can_sign_extend_short_p(i0)) {
+ if (r1 == rn(_R0)) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r1);
+ em_wp(_jit, _LBZ(r0, rn(reg), i0));
+ unget_temp_gpr(_jit);
+ } else {
+ em_wp(_jit, _LBZ(r0, r1, i0));
+ }
+ }
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ ldxr_uc(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldr_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LHAX(r0, rn(_R0), r1));
+}
+
+static void
+ldi_s(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0)) {
+ em_wp(_jit, _LHA(r0, rn(_R0), i0));
+ } else if (can_sign_extend_int_p(i0)) {
+ jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15));
+ jit_word_t lo = (int16_t)(i0 - (hi << 16));
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ em_wp(_jit, _LIS(rn(reg), hi));
+ em_wp(_jit, _LHA(r0, rn(reg), lo));
+ unget_temp_gpr(_jit);
+ }
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ ldr_s(_jit, r0, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r1 == rn(_R0)) {
+ if (r2 != rn(_R0)) {
+ em_wp(_jit, _LHAX(r0, r2, r1));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r1);
+ em_wp(_jit, _LHAX(r0, rn(reg), r2));
+ unget_temp_gpr(_jit);
+ }
+ } else {
+ em_wp(_jit, _LHAX(r0, r1, r2));
+ }
+}
+
+static void
+ldxi_s(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0) {
+ ldr_s(_jit, r0, r1);
+ } else if (can_sign_extend_short_p(i0)) {
+ if (r1 == rn(_R0)) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r1);
+ em_wp(_jit, _LHA(r0, rn(reg), i0));
+ unget_temp_gpr(_jit);
+ } else {
+ em_wp(_jit, _LHA(r0, r1, i0));
+ }
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ ldxr_s(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LHZX(r0, rn(_R0), r1));
+}
+
+static void
+ldi_us(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0)) {
+ em_wp(_jit, _LHZ(r0, rn(_R0), i0));
+ } else if (can_sign_extend_int_p(i0)) {
+ jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15));
+ jit_word_t lo = (int16_t)(i0 - (hi << 16));
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ em_wp(_jit, _LIS(rn(reg), hi));
+ em_wp(_jit, _LHZ(r0, rn(reg), lo));
+ unget_temp_gpr(_jit);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ ldr_us(_jit, r0, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_us(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r1 == rn(_R0)) {
+ if (r2 != rn(_R0)) {
+ em_wp(_jit, _LHZX(r0, r2, r1));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r1);
+ em_wp(_jit, _LHZX(r0, rn(reg), r2));
+ unget_temp_gpr(_jit);
+ }
+ } else {
+ em_wp(_jit, _LHZX(r0, r1, r2));
+ }
+}
+
+static void
+ldxi_us(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0) {
+ ldr_us(_jit, r0, r1);
+ } else if (can_sign_extend_short_p(i0)) {
+ if (r1 == rn(_R0)) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r1);
+ em_wp(_jit, _LHZ(r0, rn(reg), i0));
+ unget_temp_gpr(_jit);
+ } else {
+ em_wp(_jit, _LHZ(r0, r1, i0));
+ }
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ ldxr_us(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldr_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LDR_I(r0, r1));
+}
+
+# if __WORDSIZE == 32
+static void
+ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0)) {
+ LWZ(r0, rn(_R0), i0);
+ } else if (can_sign_extend_int_p(i0)) {
+ jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15));
+ jit_word_t lo = (int16_t)(i0 - (hi << 16));
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ em_wp(_jit, _LIS(rn(reg), hi));
+ em_wp(_jit, _LWZ(r0, rn(reg), lo));
+ unget_temp_gpr(_jit);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ ldr_i(_jit, r0, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r1 == rn(_R0)) {
+ if (r2 != rn(_R0)) {
+ em_wp(_jit, _LWZX(r0, r2, r1));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r1);
+ em_wp(_jit, _LWZX(r0, rn(reg), r2));
+ unget_temp_gpr(_jit);
+ }
+ } else {
+ em_wp(_jit, _LWZX(r0, r1, r2));
+ }
+}
+
+static void
+ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0) {
+ ldr_i(_jit, r0, r1);
+ } else if (can_sign_extend_short_p(i0)) {
+ if (r1 == rn(_R0)) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r1);
+ em_wp(_jit, _LW(r0, rn(reg), i0));
+ unget_temp_gpr(_jit);
+ } else {
+ em_wp(_jit, _LW(r0, r1, i0));
+ }
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ ldxr_i(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+# else
+static void
+ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0)) {
+ em_wp(_jit, _LWA(r0, rn(_R0), i0));
+ } else if (can_sign_extend_int_p(i0)) {
+ jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15));
+ jit_word_t lo = (int16_t)(i0 - (hi << 16));
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ em_wp(_jit, _LIS(rn(reg), hi));
+ em_wp(_jit, _LWA(r0, rn(reg), lo));
+ unget_temp_gpr(_jit);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ ldr_i(_jit, r0, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r1 == rn(_R0)) {
+ if (r2 != rn(_R0)) {
+ em_wp(_jit, _LWAX(r0, r2, r1));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r1);
+ em_wp(_jit, _LWAX(r0, rn(reg), r2));
+ unget_temp_gpr(_jit);
+ }
+ } else {
+ em_wp(_jit, _LWAX(r0, r1, r2));
+ }
+}
+
+static void
+ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0) {
+ ldr_i(_jit, r0, r1);
+ } else if (can_sign_extend_short_p(i0)) {
+ if (r1 == rn(_R0)) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r1);
+ em_wp(_jit, _LWA(r0, rn(reg), i0));
+ unget_temp_gpr(_jit);
+ } else {
+ em_wp(_jit, _LWA(r0, r1, i0));
+ }
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ ldxr_i(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LWZX(r0, rn(_R0), r1));
+}
+
+static void
+ldi_ui(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0)) {
+ em_wp(_jit, _LWZ(r0, rn(_R0), i0));
+ } else if (can_sign_extend_int_p(i0)) {
+ jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15));
+ jit_word_t lo = (int16_t)(i0 - (hi << 16));
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ em_wp(_jit, _LIS(rn(reg), hi));
+ em_wp(_jit, _LWZ(r0, rn(reg), lo));
+ unget_temp_gpr(_jit);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ ldr_ui(_jit, r0, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_ui(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r1 == rn(_R0)) {
+ if (r2 != rn(_R0)) {
+ em_wp(_jit, _LWZX(r0, r2, r1));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r1);
+ em_wp(_jit, _LWZX(r0, rn(reg), r2));
+ unget_temp_gpr(_jit);
+ }
+ } else {
+ em_wp(_jit, _LWZX(r0, r1, r2));
+ }
+}
+
+static void
+ldxi_ui(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0) {
+ ldr_i(_jit, r0, r1);
+ } else if (can_sign_extend_short_p(i0)) {
+ if (r1 == rn(_R0)) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r1);
+ em_wp(_jit, _LWZ(r0, rn(reg), i0));
+ unget_temp_gpr(_jit);
+ } else {
+ em_wp(_jit, _LWZ(r0, r1, i0));
+ }
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ ldxr_ui(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldr_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LDX(r0, rn(_R0), r1));
+}
+
+static void
+ldi_l(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0)) {
+ em_wp(_jit, _LD(r0, rn(_R0), i0));
+ } else if (can_sign_extend_int_p(i0)) {
+ jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15));
+ jit_word_t lo = (int16_t)(i0 - (hi << 16));
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ em_wp(_jit, _LIS(rn(reg), hi));
+ em_wp(_jit, _LD(r0, rn(reg), lo));
+ unget_temp_gpr(_jit);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ ldr_l(_jit, r0, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r1 == rn(_R0)) {
+ if (r2 != rn(_R0)) {
+ em_wp(_jit, _LDX(r0, r2, r1));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r1);
+ em_wp(_jit, _LDX(r0, rn(reg), r2));
+ unget_temp_gpr(_jit);
+ }
+ } else {
+ em_wp(_jit, _LDX(r0, r1, r2));
+ }
+}
+
+static void
+ldxi_l(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0) {
+ ldr_l(_jit, r0, r1);
+ } else if (can_sign_extend_short_p(i0)) {
+ if (r1 == rn(_R0)) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r1);
+ em_wp(_jit, _LD(r0, rn(reg), i0));
+ unget_temp_gpr(_jit);
+ } else {
+ em_wp(_jit, _LD(r0, r1, i0));
+ }
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ ldxr_l(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+# endif
+
+static void
+str_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _STBX(r1, rn(_R0), r0));
+}
+
+static void
+sti_c(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ if (can_sign_extend_short_p(i0)) {
+ em_wp(_jit, _STB(r0, rn(_R0), i0));
+ } else if (can_sign_extend_int_p(i0)) {
+ jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15));
+ jit_word_t lo = (int16_t)(i0 - (hi << 16));
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ em_wp(_jit, _LIS(rn(reg), hi));
+ em_wp(_jit, _STB(r0, rn(reg), lo));
+ unget_temp_gpr(_jit);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ str_c(_jit, rn(reg), r0);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+stxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == rn(_R0)) {
+ if (r1 != rn(_R0)) {
+ em_wp(_jit, _STBX(r2, r1, r0));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r0);
+ em_wp(_jit, _STBX(r2, rn(reg), r1));
+ unget_temp_gpr(_jit);
+ }
+ } else {
+ em_wp(_jit, _STBX(r2, r0, r1));
+ }
+}
+
+static void
+stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (i0 == 0) {
+ str_c(_jit, r0, r1);
+ } else if (can_sign_extend_short_p(i0)) {
+ if (r0 == rn(_R0)) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), i0);
+ em_wp(_jit, _STB(r1, rn(reg), i0));
+ unget_temp_gpr(_jit);
+ } else {
+ em_wp(_jit, _STB(r1, r0, i0));
+ }
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ stxr_c(_jit, rn(reg), r0, r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+str_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _STHX(r1, rn(_R0), r0));
+}
+
+static void
+sti_s(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ if (can_sign_extend_short_p(i0)) {
+ em_wp(_jit, _STH(r0, rn(_R0), i0));
+ } else if (can_sign_extend_int_p(i0)) {
+ jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15));
+ jit_word_t lo = (int16_t)(i0 - (hi << 16));
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ em_wp(_jit, _LIS(rn(reg), hi));
+ em_wp(_jit, _STH(r0, rn(reg), lo));
+ unget_temp_gpr(_jit);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit,rn(reg), i0);
+ str_s(_jit, rn(reg), r0);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+stxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == rn(_R0)) {
+ if (r1 != rn(_R0)) {
+ em_wp(_jit, _STHX(r2, r1, r0));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r0);
+ em_wp(_jit, _STHX(r2, rn(reg), r1));
+ unget_temp_gpr(_jit);
+ }
+ } else {
+ em_wp(_jit, _STHX(r2, r0, r1));
+ }
+}
+
+static void
+stxi_s(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (i0 == 0) {
+ str_s(_jit, r0, r1);
+ } else if (can_sign_extend_short_p(i0)) {
+ if (r0 == rn(_R0)) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), i0);
+ em_wp(_jit, _STH(r1, rn(reg), i0));
+ unget_temp_gpr(_jit);
+ } else {
+ em_wp(_jit, _STH(r1, r0, i0));
+ }
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ stxr_s(_jit, rn(reg), r0, r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+str_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _STWX(r1, rn(_R0), r0));
+}
+
+static void
+sti_i(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ if (can_sign_extend_short_p(i0)) {
+ em_wp(_jit, _STW(r0, rn(_R0), i0));
+ } else if (can_sign_extend_int_p(i0)) {
+ jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15));
+ jit_word_t lo = (int16_t)(i0 - (hi << 16));
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ em_wp(_jit, _LIS(rn(reg), hi));
+ em_wp(_jit, _STW(r0, rn(reg), lo));
+ unget_temp_gpr(_jit);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ str_i(_jit, rn(reg), r0);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+stxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == rn(_R0)) {
+ if (r1 != rn(_R0)) {
+ em_wp(_jit, _STWX(r2, r1, r0));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r0);
+ em_wp(_jit, _STWX(r2, rn(reg), r1));
+ unget_temp_gpr(_jit);
+ }
+ } else {
+ em_wp(_jit, _STWX(r2, r0, r1));
+ }
+}
+
+static void
+stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (i0 == 0) {
+ str_i(_jit, r0, r1);
+ } else if (can_sign_extend_short_p(i0)) {
+ if (r0 == rn(_R0)) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), i0);
+ em_wp(_jit, _STW(r1, rn(reg), i0));
+ unget_temp_gpr(_jit);
+ } else {
+ em_wp(_jit, _STW(r1, r0, i0));
+ }
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ stxr_i(_jit, rn(reg), r0, r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+# if __WORDSIZE == 64
+static void
+str_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _STDX(r1, rn(_R0), r0));
+}
+
+static void
+sti_l(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ if (can_sign_extend_short_p(i0)) {
+ em_wp(_jit, _STD(r0, rn(_R0), i0));
+ } else if (can_sign_extend_int_p(i0)) {
+ jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15));
+ jit_word_t lo = (int16_t)(i0 - (hi << 16));
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ em_wp(_jit, _LIS(rn(reg), hi));
+ em_wp(_jit, _STD(r0, rn(reg), lo));
+ unget_temp_gpr(_jit);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ str_l(_jit, rn(reg), r0);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+stxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == rn(_R0)) {
+ if (r1 != rn(_R0)) {
+ em_wp(_jit, _STDX(r2, r1, r0));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r0);
+ em_wp(_jit, _STDX(r2, rn(reg), r1));
+ unget_temp_gpr(_jit);
+ }
+ } else {
+ em_wp(_jit, _STDX(r2, r0, r1));
+ }
+}
+
+static void
+stxi_l(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (i0 == 0) {
+ str_l(_jit, r0, r1);
+ } else if (can_sign_extend_short_p(i0)) {
+ if (r0 == rn(_R0)) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), i0);
+ em_wp(_jit, _STD(r1, rn(reg), i0));
+ unget_temp_gpr(_jit);
+ } else {
+ em_wp(_jit, _STD(r1, r0, i0));
+ }
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ stxr_l(_jit, rn(reg), r0, r1);
+ unget_temp_gpr(_jit);
+ }
+}
+# endif
+
+static void
+jmpr(jit_state_t *_jit, int32_t r0)
+{
+ emit_u32(_jit, _MTCTR(r0));
+ emit_u32(_jit, _BCTR());
+}
+
+static void
+jmpr_with_link(jit_state_t *_jit, int32_t r0)
+{
+ // Some kind of linking stuff?
+ if (r0 != rn(_R12))
+ emit_u32(_jit, _MR(rn(_R12), r0));
+
+ emit_u32(_jit, _MR(rn(JIT_LR), r0));
+ emit_u32(_jit, _MTCTR(rn(_R12)));
+ emit_u32(_jit, _BCTRL());
+}
+
+static void
+jmpi_with_link(jit_state_t *_jit, jit_word_t i0)
+{
+ movi(_jit, rn(_R12), i0);
+ jmpr_with_link(_jit, rn(_R12));
+}
+
+static void
+jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ jmpr(_jit, rn(reg));
+ unget_temp_gpr(_jit);
+}
+
+static jit_reloc_t
+jmp(jit_state_t *_jit)
+{
+ return emit_jump(_jit, _B(0));
+}
+
+static void
+build_tmp_frame(jit_state_t *_jit)
+{
+ emit_u32(_jit, _STX(rn(JIT_FP), rn(JIT_SP), 0));
+ emit_u32(_jit, _STX(rn(JIT_LR), rn(JIT_SP), 16));
+ emit_u32(_jit, _STX(rn(_R2), rn(JIT_SP), 24));
+ emit_u32(_jit, _MR(rn(JIT_FP), rn(JIT_SP)));
+}
+
+static void
+destroy_tmp_frame(jit_state_t *_jit)
+{
+ emit_u32(_jit, _LXX(rn(_R2), rn(JIT_SP), 24));
+ emit_u32(_jit, _LXX(rn(JIT_FP), rn(JIT_SP), 0));
+}
+
+// Heavily assumes prepare_call_args() has been called beforehand
+static void
+callr(jit_state_t *_jit, int32_t r0)
+{
+ build_tmp_frame(_jit);
+
+ if (r0 != rn(_R12))
+ emit_u32(_jit, _MR(rn(_R12), r0));
+
+ emit_u32(_jit, _MTCTR(rn(_R12)));
+ emit_u32(_jit, _BCTRL());
+
+ destroy_tmp_frame(_jit);
+}
+
+/* assume fixed address or reachable address */
+static void
+calli(jit_state_t *_jit, jit_word_t i0)
+{
+ movi(_jit, rn(_R12), i0);
+ callr(_jit, rn(_R12));
+}
+
+static void
+ret(jit_state_t *_jit)
+{
+ em_wp(_jit, _BLR());
+}
+
+static void
+retr(jit_state_t *_jit, int32_t u)
+{
+ if (rn(JIT_RET) != u)
+ movr(_jit, rn(JIT_RET), u);
+
+ ret(_jit);
+}
+
+static void
+reti(jit_state_t *_jit, jit_word_t u)
+{
+ movi(_jit, rn(JIT_RET), u);
+ ret(_jit);
+}
+
+
+static void
+retval_c(jit_state_t *_jit, int32_t r0)
+{
+ extr_c(_jit, r0, rn(JIT_RET));
+}
+
+static void
+retval_uc(jit_state_t *_jit, int32_t r0)
+{
+ extr_uc(_jit, r0, rn(JIT_RET));
+}
+
+static void
+retval_s(jit_state_t *_jit, int32_t r0)
+{
+ extr_s(_jit, r0, rn(JIT_RET));
+}
+
+static void
+retval_us(jit_state_t *_jit, int32_t r0)
+{
+ extr_us(_jit, r0, rn(JIT_RET));
+}
+
+static void
+retval_i(jit_state_t *_jit, int32_t r0)
+{
+#if __WORDSIZE == 32
+ if (r0 != rn(JIT_RET))
+ movr(_jit, r0, rn(JIT_RET));
+#else
+ extr_i(_jit, r0, rn(JIT_RET));
+#endif
+}
+
+#if __WORDSIZE == 64
+static void
+retval_ui(jit_state_t *_jit, int32_t r0)
+{
+ extr_ui(_jit, r0, rn(JIT_RET));
+}
+
+static void
+retval_l(jit_state_t *_jit, int32_t r0)
+{
+ if (r0 != rn(JIT_RET))
+ movr(_jit, r0, rn(JIT_RET));
+}
+#endif
+
+static void
+ldr_atomic(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ emit_u32(_jit, _HWSYNC());
+ emit_u32(_jit, _LXX(r0, r1, 0));
+ emit_u32(_jit, _CMPX(r0, r0));
+ jit_reloc_t w = emit_atomic_jump(_jit, _BNE(0));
+ jit_patch_here(_jit, w);
+ emit_u32(_jit, _ISYNC());
+}
+
+static void
+str_atomic(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ emit_u32(_jit, _HWSYNC());
+ emit_u32(_jit, _STX(r1, r0, 0));
+}
+
+static void
+swap_atomic(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ // if r0 == r1, we might overwrite something if we didn't use temporaries
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+
+ emit_u32(_jit, _HWSYNC());
+ jit_pointer_t a = jit_address(_jit);
+ emit_u32(_jit, _LXARX(rn(t0), r1));
+ emit_u32(_jit, _STXCX(r2, r1));
+ jit_reloc_t w = emit_atomic_jump(_jit, _BNE(0));
+ jit_patch_there(_jit, w, a);
+ emit_u32(_jit, _ISYNC());
+ movr(_jit, r0, rn(t0));
+
+ unget_temp_gpr(_jit);
+}
+
+static void
+cas_atomic(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ jit_gpr_t t0 = get_temp_gpr(_jit);
+
+ emit_u32(_jit, _HWSYNC());
+ jit_pointer_t loop = jit_address(_jit);
+ emit_u32(_jit, _LXARX(rn(t0), r1));
+ emit_u32(_jit, _CMPX(r2, rn(t0)));
+
+ jit_reloc_t s = emit_atomic_jump(_jit, _BNE(0));
+
+ emit_u32(_jit, _STXCX(r3, r1));
+
+ jit_reloc_t w = emit_atomic_jump(_jit, _BNE(0));
+
+ jit_patch_here(_jit, s);
+
+ jit_patch_there(_jit, w, loop);
+
+ emit_u32(_jit, _ISYNC());
+ movr(_jit, r0, r1);
+ unget_temp_gpr(_jit);
+}
+
+static void
+pop_link_register(jit_state_t *_jit)
+{
+ em_wp(_jit, _MFLR(rn(_R0)));
+}
+
+static void
+push_link_register(jit_state_t *_jit)
+{
+ em_wp(_jit, _MTLR(rn(_R0)));
+}
+
+static void
+breakpoint(jit_state_t *_jit)
+{
+ (void)_jit;
+}
diff --git a/deps/lightening/lightening/ppc-fpu.c b/deps/lightening/lightening/ppc-fpu.c
new file mode 100644
index 0000000..392e9ea
--- /dev/null
+++ b/deps/lightening/lightening/ppc-fpu.c
@@ -0,0 +1,935 @@
+/*
+ * Copyright (C) 2012-2017 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+# define fn(x) jit_fpr_regno(x)
+
+#define _FA(o,d,a,b,c,x) FA(o,d,a,b,c,x,0)
+#define _FA_(o,d,a,b,c,x) FA(o,d,a,b,c,x,1)
+static uint32_t FA(int,int,int,int,int,int,int);
+
+#define _FXFL(o,m,b,x) FXFL(o,m,b,x,0)
+#define _FXFL_(o,m,b,x) FXFL(o,m,b,x,1)
+static uint32_t FXFL(int,int,int,int,int) maybe_unused;
+
+# define _FABS(d,b) _FX(63,d,0,b,264)
+# define _FABS_(d,b) _FX_(63,d,0,b,264)
+# define _FADD(d,a,b) _FA(63,d,a,b,0,21)
+# define _FADD_(d,a,b) _FA_(63,d,a,b,0,21)
+# define _FADDS(d,a,b) _FA(59,d,a,b,0,21)
+# define _FADDS_(d,a,b) _FA_(59,d,a,b,0,21)
+# define _FCFID(d,b) _FX(63,d,0,b,846)
+# define _FCMPO(cr,a,b) _FC(63,cr,0,a,b,32)
+# define _FCMPU(cr,a,b) _FC(63,cr,0,a,b,0)
+# define _FCTIW(d,b) _FX(63,d,0,b,14)
+# define _FCTIW_(d,b) _FX_(63,d,0,b,14)
+# define _FCTIWZ(d,b) _FX(63,d,0,b,15)
+# define _FCTIWZ_(d,b) _FX_(63,d,0,b,15)
+# define _FCTID(d,b) _FX(63,d,0,b,814)
+# define _FCTID_(d,b) _FX_(63,d,0,b,814)
+# define _FCTIDZ(d,b) _FX(63,d,0,b,815)
+# define _FCTIDZ_(d,b) _FX_(63,d,0,b,815)
+# define _FDIV(d,a,b) _FA(63,d,a,b,0,18)
+# define _FDIV_(d,a,b) _FA_(63,d,a,b,0,18)
+# define _FDIVS(d,a,b) _FA(59,d,a,b,0,18)
+# define _FDIVS_(d,a,b) _FA_(59,d,a,b,0,18)
+# define _FMADD(d,a,b,c) _FA(63,d,a,b,c,29)
+# define _FMADD_(d,a,b,c) _FA(63,d,a,b,c,29)
+# define _FMADDS(d,a,b,c) _FA(59,d,a,b,c,29)
+# define _FMADDS_(d,a,b,c) _FA(59,d,a,b,c,29)
+# define _FMR(d,b) _FX(63,d,0,b,72)
+# define _FMR_(d,b) _FX_(63,d,0,b,72)
+# define _FMSUB(d,a,b,c) _FA(63,d,a,b,c,28)
+# define _FMSUB_(d,a,b,c) _FA(63,d,a,b,c,28)
+# define _FMSUBS(d,a,b,c) _FA(59,d,a,b,c,28)
+# define _FMSUBS_(d,a,b,c) _FA(59,d,a,b,c,28)
+# define _FMUL(d,a,c) _FA(63,d,a,0,c,25)
+# define _FMUL_(d,a,c) _FA_(63,d,a,0,c,25)
+# define _FMULS(d,a,c) _FA(59,d,a,0,c,25)
+# define _FMULS_(d,a,c) _FA_(59,d,a,0,c,25)
+# define _FNABS(d,b) _FX(63,d,0,b,136)
+# define _FNABS_(d,b) _FX_(63,d,0,b,136)
+# define _FNEG(d,b) _FX(63,d,0,b,40)
+# define _FNEG_(d,b) _FX_(63,d,0,b,40)
+# define _FNMADD(d,a,b,c) _FA(63,d,a,b,c,31)
+# define _FNMADD_(d,a,b,c) _FA_(63,d,a,b,c,31)
+# define _FNMADDS(d,a,b,c) _FA(59,d,a,b,c,31)
+# define _FNMADDS_(d,a,b,c) _FA_(59,d,a,b,c,31)
+# define _FNMSUB(d,a,b,c) _FA(63,d,a,b,c,30)
+# define _FNMSUB_(d,a,b,c) _FA_(63,d,a,b,c,30)
+# define _FNMSUBS(d,a,b,c) _FA(59,d,a,b,c,30)
+# define _FNMSUBS_(d,a,b,c) _FA_(59,d,a,b,c,30)
+# define _FRES(d,b) _FA(59,d,0,b,0,24)
+# define _FRES_(d,b) _FA_(59,d,0,b,0,24)
+# define _FRSP(d,b) _FA(63,d,0,b,0,12)
+# define _FRSP_(d,b) _FA_(63,d,0,b,0,12)
+# define _FRSQTRE(d,b) _FA(63,d,0,b,0,26)
+# define _FRSQTRE_(d,b) _FA_(63,d,0,b,0,26)
+# define _FSEL(d,a,b,c) _FA(63,d,a,b,c,23)
+# define _FSEL_(d,a,b,c) _FA_(63,d,a,b,c,23)
+# define _FSQRT(d,b) _FA(63,d,0,b,0,22)
+# define _FSQRT_(d,b) _FA_(63,d,0,b,0,22)
+# define _FSQRTS(d,b) _FA(59,d,0,b,0,22)
+# define _FSQRTS_(d,b) _FA_(59,d,0,b,0,22)
+# define _FSUB(d,a,b) _FA(63,d,a,b,0,20)
+# define _FSUB_(d,a,b) _FA(63,d,a,b,0,20)
+# define _FSUBS(d,a,b) _FA(59,d,a,b,0,20)
+# define _FSUBS_(d,a,b) _FA(59,d,a,b,0,20)
+# define _LFD(d,a,s) _FDs(50,d,a,s)
+# define _LFDU(d,a,s) _FDs(51,d,a,s)
+# define _LFDUX(d,a,b) _FX(31,d,a,b,631)
+# define _LFDX(d,a,b) _FX(31,d,a,b,599)
+# define _LFS(d,a,s) _FDs(48,d,a,s)
+# define _LFSU(d,a,s) _FDs(49,d,a,s)
+# define _LFSUX(d,a,b) _FX(31,d,a,b,567)
+# define _LFSX(d,a,b) _FX(31,d,a,b,535)
+# define _MCRFS(d,s) _FXL(63,d<<2,(s)<<2,64)
+# define _MFFS(d) _FX(63,d,0,0,583)
+# define _MFFS_(d) _FX_(63,d,0,0,583)
+# define _MTFSB0(d) _FX(63,d,0,0,70)
+# define _MTFSB0_(d) _FX_(63,d,0,0,70)
+# define _MTFSB1(d) _FX(63,d,0,0,38)
+# define _MTFSB1_(d) _FX_(63,d,0,0,38)
+# define _MTFSF(m,b) _FXFL(63,m,b,711)
+# define _MTFSF_(m,b) _FXFL_(63,m,b,711)
+# define _MTFSFI(d,i) _FX(63,d<<2,0,i<<1,134)
+# define _MTFSFI_(d,i) _FX_(63,d<<2,0,i<<1,134)
+# define _STFD(s,a,d) _FDs(54,s,a,d)
+# define _STFDU(s,a,d) _FDs(55,s,a,d)
+# define _STFDUX(s,a,b) _FX(31,s,a,b,759)
+# define _STFDX(s,a,b) _FX(31,s,a,b,727)
+# define _STFIWX(s,a,b) _FX(31,s,a,b,983)
+# define _STFS(s,a,d) _FDs(52,s,a,d)
+# define _STFSU(s,a,d) _FDs(53,s,a,d)
+# define _STFSUX(s,a,b) _FX(31,s,a,b,695)
+# define _STFSX(s,a,b) _FX(31,s,a,b,663)
+
+static void movr_f(jit_state_t*,int32_t,int32_t);
+static void movr_d(jit_state_t*,int32_t,int32_t);
+
+static void movi_f(jit_state_t*,int32_t,jit_float32_t);
+static void movi_d(jit_state_t*,int32_t,jit_float64_t);
+
+static void extr_f(jit_state_t*,int32_t,int32_t);
+static void extr_d(jit_state_t*,int32_t,int32_t);
+
+static void truncr_f_i(jit_state_t*,int32_t,int32_t);
+static void truncr_d_i(jit_state_t*,int32_t,int32_t);
+
+# if __WORDSIZE == 64
+static void truncr_f_l(jit_state_t*,int32_t,int32_t);
+static void truncr_d_l(jit_state_t*,int32_t,int32_t);
+# endif
+
+static void extr_d_f(jit_state_t*,int32_t,int32_t);
+static void extr_f_d(jit_state_t*,int32_t,int32_t);
+
+static void absr_f(jit_state_t*,int32_t,int32_t);
+static void absr_d(jit_state_t*,int32_t,int32_t);
+
+static void negr_f(jit_state_t*,int32_t,int32_t);
+static void negr_d(jit_state_t*,int32_t,int32_t);
+
+static void sqrtr_f(jit_state_t*,int32_t,int32_t);
+static void sqrtr_d(jit_state_t*,int32_t,int32_t);
+
+static void addr_f(jit_state_t*,int32_t,int32_t,int32_t);
+static void addr_d(jit_state_t*,int32_t,int32_t,int32_t);
+
+static void subr_f(jit_state_t*,int32_t,int32_t,int32_t);
+static void subr_d(jit_state_t*,int32_t,int32_t,int32_t);
+
+static void mulr_f(jit_state_t*,int32_t,int32_t,int32_t);
+static void mulr_d(jit_state_t*,int32_t,int32_t,int32_t);
+
+static void divr_f(jit_state_t*,int32_t,int32_t,int32_t);
+static void divr_d(jit_state_t*,int32_t,int32_t,int32_t);
+
+static jit_reloc_t bltr_f(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bltr_d(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bler_f(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bler_d(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t beqr_f(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t beqr_d(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bger_f(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bger_d(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bgtr_f(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bgtr_d(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bner_f(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bner_d(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bunltr_f(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bunltr_d(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bunler_f(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bunler_d(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t buneqr_f(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t buneqr_d(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bunger_f(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bunger_d(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bungtr_f(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bungtr_d(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bltgtr_f(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bltgtr_d(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bordr_f(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bordr_d(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bunordr_f(jit_state_t*,int32_t,int32_t);
+static jit_reloc_t bunordr_d(jit_state_t*,int32_t,int32_t);
+
+static void ldr_f(jit_state_t*,int32_t,int32_t);
+static void ldi_f(jit_state_t*,int32_t,jit_word_t);
+
+static void ldxr_f(jit_state_t*,int32_t,int32_t,int32_t);
+static void ldxi_f(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+static void str_f(jit_state_t*,int32_t,int32_t);
+static void sti_f(jit_state_t*,jit_word_t,int32_t);
+
+static void stxr_f(jit_state_t*,int32_t,int32_t,int32_t);
+static void stxi_f(jit_state_t*,jit_word_t,int32_t,int32_t);
+
+static void ldr_d(jit_state_t*,int32_t,int32_t);
+static void ldi_d(jit_state_t*,int32_t,jit_word_t);
+
+static void ldxr_d(jit_state_t*,int32_t,int32_t,int32_t);
+static void ldxi_d(jit_state_t*,int32_t,int32_t,jit_word_t);
+
+static void str_d(jit_state_t*,int32_t,int32_t);
+static void sti_d(jit_state_t*,jit_word_t,int32_t);
+
+static void stxr_d(jit_state_t*,int32_t,int32_t,int32_t);
+static void stxi_d(jit_state_t*,jit_word_t,int32_t,int32_t);
+
+# define _u16(v) ((v) & 0xffff)
+static uint32_t
+FA(int o, int d, int a, int b, int c, int x, int r)
+{
+ assert(!(o & ~((1 << 6) - 1)));
+ assert(!(d & ~((1 << 5) - 1)));
+ assert(!(a & ~((1 << 5) - 1)));
+ assert(!(b & ~((1 << 5) - 1)));
+ assert(!(c & ~((1 << 5) - 1)));
+ assert(!(x & ~((1 << 5) - 1)));
+ assert(!(r & ~((1 << 1) - 1)));
+ instr_t i = {.A = {.po = o, .ft = d, .fa = a, .fb = b, .fc = c, .xo = x, .rc = r}};
+ return i.w;
+}
+
+static uint32_t
+FXFL(int o, int m, int b, int x, int r)
+{
+ assert(!(o & ~((1 << 6) - 1)));
+ assert(!(m & ~((1 << 8) - 1)));
+ assert(!(b & ~((1 << 5) - 1)));
+ assert(!(x & ~((1 << 10) - 1)));
+ assert(!(r & ~((1 << 1) - 1)));
+ instr_t i = {.XFL = {.po = o, .l = 0, .fm = m, .w = 0, .fb = b, .xo = x, .rc = r}};
+ return i.w;
+}
+
+static void
+movr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (r0 != r1)
+ em_wp(_jit, _FMR(r0,r1));
+}
+
+static void
+movr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ movr_d(_jit, r0, r1);
+}
+
+static void
+movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t i0)
+{
+ union {
+ int32_t i;
+ jit_float32_t f;
+ } data;
+
+ data.f = i0;
+ jit_fpr_t reg = get_temp_fpr(_jit);
+ movi(_jit, fn(reg), data.i & 0xffffffff);
+ stxi_i(_jit, -8, rn(_FP), fn(reg));
+ unget_temp_fpr(_jit);
+
+ ldxi_f(_jit, r0, rn(_FP), -8);
+}
+
+static void
+movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t i0)
+{
+ union {
+ int32_t i[2];
+ jit_word_t w;
+ jit_float64_t d;
+ } data;
+
+ data.d = i0;
+ jit_gpr_t reg = get_temp_gpr(_jit);
+# if __WORDSIZE == 32
+ movi(_jit, rn(reg), data.i[0]);
+ stxi_i(_jit, -4, rn(_FP), rn(reg));
+ movi(_jit, rn(reg), data.i[1]);
+ stxi_i(_jit, -8, rn(_FP), rn(reg));
+# else
+ movi(_jit, rn(reg), data.w);
+ stxi_l(_jit, -8, rn(_FP), rn(reg));
+# endif
+ unget_temp_gpr(_jit);
+ ldxi_d(_jit, r0, rn(_FP), -8);
+}
+
+/* should only work on newer ppc (fcfid is a ppc64 instruction) */
+static void
+extr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+# if __WORDSIZE == 32
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ rshi(_jit, rn(reg), r1, 31);
+ /* use reserved 8 bytes area */
+ stxi_i(_jit, -8, rn(_FP), r1);
+ stxi_i(_jit, -4, rn(_FP), rn(reg));
+ unget_temp_gpr(_jit);
+# else
+ stxi_l(_jit, -8, rn(_FP), r1);
+# endif
+ ldxi_d(_jit, r0, rn(_FP), -8);
+ em_wp(_jit, _FCFID(r0, r0));
+}
+
+static void
+extr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ extr_d(_jit, r0, r1);
+}
+
+static void
+extr_d_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FRSP(r0, r1));
+}
+
+static void
+extr_f_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ movr_d(_jit, r0, r1);
+}
+
+static void
+absr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FABS(r0, r1));
+}
+
+static void
+absr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ absr_d(_jit, r0, r1);
+}
+
+static void
+sqrtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FSQRT(r0, r1));
+}
+
+static void
+sqrtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FSQRTS(r0, r1));
+}
+
+static void
+addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _FADD(r0, r1, r2));
+}
+
+static void
+addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _FADDS(r0, r1, r2));
+}
+
+static void
+subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _FSUBS(r0, r1, r2));
+}
+
+static void
+subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _FSUB(r0, r1, r2));
+}
+
+static void
+negr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FNEG(r0, r1));
+}
+
+static void
+negr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ negr_d(_jit, r0, r1);
+}
+
+static void
+mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _FMULS(r0, r1, r2));
+}
+
+static void
+mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _FMUL(r0, r1, r2));
+}
+
+static void
+divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _FDIVS(r0, r1, r2));
+}
+
+static void
+divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ em_wp(_jit, _FDIV(r0, r1, r2));
+}
+
+static void
+truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_fpr_t reg = get_temp_fpr(_jit);
+ em_wp(_jit, _FCTIWZ(fn(reg), r1));
+ /* use reserved 8 bytes area */
+ stxi_d(_jit, -8, rn(_FP), fn(reg));
+#if __BYTE_ORDER == __BIG_ENDIAN
+ ldxi_i(_jit, r0, rn(_FP), -4);
+#else
+ ldxi_i(_jit, r0, rn(_FP), -8);
+#endif
+ unget_temp_fpr(_jit);
+}
+
+static void
+truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ truncr_d_i(_jit, r0, r1);
+}
+
+# if __WORDSIZE == 64
+static void
+truncr_d_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_fpr_t reg = get_temp_fpr(_jit);
+ em_wp(_jit, _FCTIDZ(fn(reg), r1));
+ /* use reserved 8 bytes area */
+ stxi_d(_jit, -8, rn(_FP), fn(reg));
+ ldxi_l(_jit, r0, rn(_FP), -8);
+ unget_temp_fpr(_jit);
+}
+
+static void
+truncr_f_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ truncr_d_l(_jit, r0, r1);
+}
+# endif
+
+
+static jit_reloc_t
+bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCMPO(CR_0, r0, r1));
+ return emit_cc_jump(_jit, _BLT(0));
+}
+
+static jit_reloc_t
+bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bltr_d(_jit, r0, r1);
+}
+
+static jit_reloc_t
+bler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCMPO(CR_0, r0, r1));
+ em_wp(_jit, _CREQV(CR_GT, CR_GT, CR_UN));
+ return emit_cc_jump(_jit, _BGT(0));
+}
+
+static jit_reloc_t
+bler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bler_d(_jit, r0, r1);
+}
+
+static jit_reloc_t
+beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCMPO(CR_0, r0, r1));
+ return emit_cc_jump(_jit, _BEQ(0));
+}
+
+static jit_reloc_t
+beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return beqr_d(_jit, r0, r1);
+}
+
+static jit_reloc_t
+bger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCMPO(CR_0, r0, r1));
+ em_wp(_jit, _CREQV(CR_LT, CR_LT, CR_UN));
+ return emit_cc_jump(_jit, _BLT(0));
+}
+
+static jit_reloc_t
+bger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bger_d(_jit, r0, r1);
+}
+
+static jit_reloc_t
+bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCMPO(CR_0, r0, r1));
+ return emit_cc_jump(_jit, _BGT(0));
+}
+
+static jit_reloc_t
+bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bgtr_d(_jit, r0, r1);
+}
+
+static jit_reloc_t
+bner_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCMPO(CR_0, r0, r1));
+ return emit_cc_jump(_jit, _BNE(0));
+}
+
+static jit_reloc_t
+bner_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bner_d(_jit, r0, r1);
+}
+
+static jit_reloc_t
+bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCMPU(CR_0, r0, r1));
+ em_wp(_jit, _CROR(CR_LT, CR_LT, CR_UN));
+ return emit_cc_jump(_jit, _BLT(0));
+}
+
+static jit_reloc_t
+bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bunltr_d(_jit, r0, r1);
+}
+
+static jit_reloc_t
+bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCMPU(CR_0, r0, r1));
+ return emit_cc_jump(_jit, _BLE(0));
+}
+
+static jit_reloc_t
+bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bunler_d(_jit, r0, r1);
+}
+
+static jit_reloc_t
+buneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCMPU(CR_0, r0, r1));
+ em_wp(_jit, _CROR(CR_EQ, CR_EQ, CR_UN));
+ return emit_cc_jump(_jit, _BEQ(0));
+}
+
+static jit_reloc_t
+buneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return buneqr_d(_jit, r0, r1);
+}
+
+static jit_reloc_t
+bunger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCMPU(CR_0, r0, r1));
+ return emit_cc_jump(_jit, _BGE(0));
+}
+
+static jit_reloc_t
+bunger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bunger_d(_jit, r0, r1);
+}
+
+static jit_reloc_t
+bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCMPU(CR_0, r0, r1));
+ em_wp(_jit, _CROR(CR_GT, CR_GT, CR_UN));
+ return emit_cc_jump(_jit, _BGT(0));
+}
+
+static jit_reloc_t
+bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bungtr_d(_jit, r0, r1);
+}
+
+static jit_reloc_t
+bltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCMPU(CR_0, r0, r1));
+ em_wp(_jit, _CROR(CR_EQ, CR_LT, CR_GT));
+ return emit_cc_jump(_jit, _BEQ(0));
+}
+
+static jit_reloc_t
+bltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bltgtr_d(_jit, r0, r1);
+}
+
+static jit_reloc_t
+bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCMPU(CR_0, r0, r1));
+ return emit_cc_jump(_jit, _BNU(0));
+}
+
+static jit_reloc_t
+bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bordr_d(_jit, r0, r1);
+}
+
+static jit_reloc_t
+bunordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _FCMPU(CR_0, r0, r1));
+ return emit_cc_jump(_jit, _BUN(0));
+}
+
+static jit_reloc_t
+bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return bunordr_d(_jit, r0, r1);
+}
+
+static void
+ldr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LFSX(r0, rn(_R0), r1));
+}
+
+static void
+ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0)) {
+ em_wp(_jit, _LFS(r0, rn(_R0), i0));
+ } else if (can_sign_extend_int_p(i0)) {
+ jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15));
+ jit_word_t lo = (int16_t)(i0 - (hi << 16));
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ em_wp(_jit, _LIS(rn(reg), hi));
+ em_wp(_jit, _LFS(r0, rn(reg), lo));
+ unget_temp_gpr(_jit);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ ldr_f(_jit, r0, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _LFDX(r0, rn(_R0), r1));
+}
+
+static void
+ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_short_p(i0)) {
+ em_wp(_jit, _LFD(r0, rn(_R0), i0));
+ } else if (can_sign_extend_int_p(i0)) {
+ jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15));
+ jit_word_t lo = (int16_t)(i0 - (hi << 16));
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ em_wp(_jit, _LIS(rn(reg), hi));
+ em_wp(_jit, _LFD(r0, rn(reg), lo));
+ unget_temp_gpr(_jit);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ ldr_d(_jit, r0, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r1 == rn(_R0)) {
+ if (r2 != rn(_R0)) {
+ em_wp(_jit, _LFSX(r0, r2, r1));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r1);
+ em_wp(_jit, _LFSX(r0, rn(reg), r2));
+ unget_temp_gpr(_jit);
+ }
+ } else {
+ em_wp(_jit, _LFSX(r0, r1, r2));
+ }
+}
+
+static void
+ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r1 == rn(_R0)) {
+ if (r2 != rn(_R0)) {
+ em_wp(_jit, _LFDX(r0, r2, r1));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r1);
+ em_wp(_jit, _LFDX(r0, rn(reg), r2));
+ unget_temp_gpr(_jit);
+ }
+ } else {
+ em_wp(_jit, _LFDX(r0, r1, r2));
+ }
+}
+
+static void
+ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0) {
+ ldr_f(_jit, r0, r1);
+ } else if (can_sign_extend_short_p(i0)) {
+ if (r1 == rn(_R0)) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r1);
+ em_wp(_jit, _LFS(r0, rn(reg), i0));
+ unget_temp_gpr(_jit);
+ } else {
+ em_wp(_jit, _LFS(r0, r1, i0));
+ }
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ ldxr_f(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0) {
+ ldr_d(_jit, r0, r1);
+ } else if (can_sign_extend_short_p(i0)) {
+ if (r1 == rn(_R0)) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r1);
+ em_wp(_jit, _LFD(r0, rn(reg), i0));
+ unget_temp_gpr(_jit);
+ } else {
+ em_wp(_jit, _LFD(r0, r1, i0));
+ }
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ ldxr_d(_jit, r0, r1, rn(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+str_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _STFSX(r1, rn(_R0), r0));
+}
+
+static void
+sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ if (can_sign_extend_short_p(i0)) {
+ em_wp(_jit, _STFS(r0, rn(_R0), i0));
+ } else if (can_sign_extend_int_p(i0)) {
+ jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15));
+ jit_word_t lo = (int16_t)(i0 - (hi << 16));
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ em_wp(_jit, _LIS(rn(reg), hi));
+ em_wp(_jit, _STFS(r0, rn(reg), lo));
+ unget_temp_gpr(_jit);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ str_f(_jit, rn(reg), r0);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+str_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ em_wp(_jit, _STFDX(r1, rn(_R0), r0));
+}
+
+static void
+sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ if (can_sign_extend_short_p(i0)) {
+ em_wp(_jit, _STFD(r0, rn(_R0), i0));
+ } else if (can_sign_extend_int_p(i0)) {
+ jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15));
+ jit_word_t lo = (int16_t)(i0 - (hi << 16));
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ em_wp(_jit, _LIS(rn(reg), hi));
+ em_wp(_jit, _STFD(r0, rn(reg), lo));
+ unget_temp_gpr(_jit);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ str_d(_jit, rn(reg), r0);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == rn(_R0)) {
+ if (r1 != rn(_R0)) {
+ em_wp(_jit, _STFSX(r2, r1, r0));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r1);
+ em_wp(_jit, _STFSX(r2, rn(reg), r0));
+ unget_temp_gpr(_jit);
+ }
+ } else {
+ em_wp(_jit, _STFSX(r2, r0, r1));
+ }
+}
+
+static void
+stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == rn(_R0)) {
+ if (r1 != rn(_R0)) {
+ em_wp(_jit, _STFDX(r2, r1, r0));
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), r0);
+ em_wp(_jit, _STFDX(r2, rn(reg), r1));
+ unget_temp_gpr(_jit);
+ }
+ } else {
+ em_wp(_jit, _STFDX(r2, r0, r1));
+ }
+}
+
+static void
+stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (i0 == 0) {
+ str_f(_jit, r0, r1);
+ } else if (can_sign_extend_short_p(i0)) {
+ if (r0 == rn(_R0)) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), i0);
+ em_wp(_jit, _STFS(r1, rn(reg), i0));
+ unget_temp_gpr(_jit);
+ } else {
+ em_wp(_jit, _STFS(r1, r0, i0));
+ }
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ stxr_f(_jit, rn(reg), r0, r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (i0 == 0) {
+ str_d(_jit, r0, r1);
+ } else if (can_sign_extend_short_p(i0)) {
+ if (r0 == rn(_R0)) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, rn(reg), i0);
+ em_wp(_jit, _STFD(r1, rn(reg), i0));
+ unget_temp_gpr(_jit);
+ } else {
+ em_wp(_jit, _STFD(r1, r0, i0));
+ }
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, rn(reg), i0);
+ stxr_d(_jit, rn(reg), r0, r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+retr_f(jit_state_t *_jit, int32_t r0)
+{
+ if (rn(JIT_RET) != r0)
+ movr_f(_jit, fn(JIT_FRET), r0);
+
+ ret(_jit);
+}
+
+static void
+retr_d(jit_state_t *_jit, int32_t r0)
+{
+ if (fn(JIT_FRET) != r0)
+ movr_d(_jit, fn(JIT_FRET), r0);
+
+ ret(_jit);
+}
+
+static void
+retval_d(jit_state_t *_jit, int32_t r0)
+{
+ if (r0 != fn(JIT_FRET))
+ movr_d(_jit, r0, fn(JIT_FRET));
+}
+
+static void
+retval_f(jit_state_t *_jit, int32_t r0)
+{
+ retval_d(_jit, r0);
+}
diff --git a/deps/lightening/lightening/ppc.c b/deps/lightening/lightening/ppc.c
new file mode 100644
index 0000000..7c8ec5d
--- /dev/null
+++ b/deps/lightening/lightening/ppc.c
@@ -0,0 +1,476 @@
+/*
+ * Copyright (C) 2012-2019 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+# define JIT_RA0 _R3
+# define JIT_FA0 _F1
+# define JIT_RET _R3
+# define JIT_FRET _F1
+
+# define rn(x) jit_gpr_regno(x)
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+# define C_DISP 0
+# define S_DISP 0
+# define I_DISP 0
+# define F_DISP 0
+#else
+# define C_DISP (__WORDSIZE >> 3) - sizeof(int8_t)
+# define S_DISP (__WORDSIZE >> 3) - sizeof(int16_t)
+# define I_DISP (__WORDSIZE >> 3) - sizeof(int32_t)
+# define F_DISP (__WORDSIZE >> 3) - sizeof(jit_float32_t)
+#endif
+
+static const jit_gpr_t abi_gpr_args[] = {
+ _R3, _R4, _R5, _R6, _R7, _R8, _R9, _R10
+};
+
+static const jit_fpr_t abi_fpr_args[] = {
+ _F1, _F2, _F3, _F4, _F5, _F6, _F7, _F8, _F9, _F10, _F11, _F12, _F13
+};
+
+static const int abi_gpr_arg_count = sizeof(abi_gpr_args) / sizeof(abi_gpr_args[0]);
+static const int abi_fpr_arg_count = sizeof(abi_fpr_args) / sizeof(abi_fpr_args[0]);
+
+/*
+ * Types
+ */
+typedef jit_pointer_t jit_va_list_t;
+
+/* libgcc */
+extern void __clear_cache(void *, void *);
+
+#define em_wp(_jit, x) emit_u32_with_pool(_jit, (x))
+
+typedef union {
+#if __BYTE_ORDER == __BIG_ENDIAN
+ struct {
+ uint32_t po:6;
+ uint32_t ft:5;
+ uint32_t fa:5;
+ uint32_t fb:5;
+ uint32_t fc:5;
+ uint32_t xo:5;
+ uint32_t rc:1;
+ } A;
+
+ struct {
+ uint32_t po:6;
+ uint32_t rt:5;
+ uint32_t ra:5;
+ uint32_t rb:5;
+ uint32_t u0:1;
+ uint32_t xo:9;
+ uint32_t u1:1;
+ } XO;
+
+ struct {
+ uint32_t po:6;
+ uint32_t rx:5;
+ uint32_t ra:5;
+ uint32_t d:16;
+ } D;
+
+ struct {
+ uint32_t po:6;
+ uint32_t f0:5;
+ uint32_t ra:5;
+ uint32_t rb:5;
+ uint32_t xo:10;
+ uint32_t u0:1;
+ } X;
+
+ struct {
+ uint32_t po:6;
+ int32_t li:24;
+ uint32_t aa:1;
+ uint32_t lk:1;
+ } I;
+
+ struct {
+ uint32_t po:6;
+ uint32_t bo:5;
+ uint32_t bi:5;
+ int32_t bd:14;
+ uint32_t aa:1;
+ uint32_t lk:1;
+ } B;
+
+ struct {
+ uint32_t po:6;
+ uint32_t bo:5;
+ uint32_t ba:5;
+ uint32_t bb:5;
+ uint32_t xo:10;
+ uint32_t lk:1;
+ } XL;
+
+ struct {
+ uint32_t po:6;
+ uint32_t rs:5;
+ uint32_t fx:10;
+ uint32_t xo:10;
+ uint32_t u0:1;
+ } XFX;
+
+ struct {
+ uint32_t po:6;
+ uint32_t l:1;
+ uint32_t fm:8;
+ uint32_t w:1;
+ uint32_t fb:5;
+ uint32_t xo:10;
+ uint32_t rc:1;
+ } XFL;
+
+ struct {
+ uint32_t po:6;
+ uint32_t rs:5;
+ uint32_t ra:5;
+ uint32_t rb:5;
+ uint32_t mb:5;
+ uint32_t me:5;
+ uint32_t rc:1;
+ } M;
+
+#if __WORDSIZE == 64
+ struct {
+ uint32_t po:6;
+ uint32_t rs:5;
+ uint32_t ra:5;
+ uint32_t rb:5;
+ uint32_t mx:6;
+ uint32_t xo:4;
+ uint32_t rc:1;
+ } MDS;
+
+ struct {
+ uint32_t po:6;
+ uint32_t rs:5;
+ uint32_t ra:5;
+ uint32_t s0:5;
+ uint32_t mx:6;
+ uint32_t xo:3;
+ uint32_t s1:1;
+ uint32_t rc:1;
+ } MD;
+
+ struct {
+ uint32_t po:6;
+ uint32_t rs:5;
+ uint32_t ra:5;
+ uint32_t s0:5;
+ uint32_t xo:9;
+ uint32_t s1:1;
+ uint32_t rc:1;
+ } XS;
+#endif
+#else
+ struct {
+ uint32_t rc:1;
+ uint32_t xo:5;
+ uint32_t fc:5;
+ uint32_t fb:5;
+ uint32_t fa:5;
+ uint32_t ft:5;
+ uint32_t po:6;
+ } A;
+
+ struct {
+ uint32_t u1:1;
+ uint32_t xo:9;
+ uint32_t u0:1;
+ uint32_t rb:5;
+ uint32_t ra:5;
+ uint32_t rt:5;
+ uint32_t po:6;
+ } XO;
+
+ struct {
+ uint32_t d:16;
+ uint32_t ra:5;
+ uint32_t rx:5;
+ uint32_t po:6;
+ } D;
+
+ struct {
+ uint32_t u0:1;
+ uint32_t xo:10;
+ uint32_t rb:5;
+ uint32_t ra:5;
+ uint32_t f0:5;
+ uint32_t po:6;
+ } X;
+
+ struct {
+ uint32_t lk:1;
+ uint32_t aa:1;
+ int32_t li:24;
+ uint32_t po:6;
+ } I;
+
+ struct {
+ uint32_t lk:1;
+ uint32_t aa:1;
+ int32_t bd:14;
+ uint32_t bi:5;
+ uint32_t bo:5;
+ uint32_t po:6;
+ } B;
+
+ struct {
+ uint32_t lk:1;
+ uint32_t xo:10;
+ uint32_t bb:5;
+ uint32_t ba:5;
+ uint32_t bo:5;
+ uint32_t po:6;
+ } XL;
+
+ struct {
+ uint32_t u0:1;
+ uint32_t xo:10;
+ uint32_t fx:10;
+ uint32_t rs:5;
+ uint32_t po:6;
+ } XFX;
+
+ struct {
+ uint32_t rc:1;
+ uint32_t xo:10;
+ uint32_t fb:5;
+ uint32_t w:1;
+ uint32_t fm:8;
+ uint32_t l:1;
+ uint32_t po:6;
+ } XFL;
+
+ struct {
+ uint32_t rc:1;
+ uint32_t me:5;
+ uint32_t mb:5;
+ uint32_t rb:5;
+ uint32_t ra:5;
+ uint32_t rs:5;
+ uint32_t po:6;
+ } M;
+
+#if __WORDSIZE == 64
+ struct {
+ uint32_t rc:1;
+ uint32_t xo:4;
+ uint32_t mx:6;
+ uint32_t rb:5;
+ uint32_t ra:5;
+ uint32_t rs:5;
+ uint32_t po:6;
+ } MDS;
+
+ struct {
+ uint32_t rc:1;
+ uint32_t s1:1;
+ uint32_t xo:3;
+ uint32_t mx:6;
+ uint32_t s0:5;
+ uint32_t ra:5;
+ uint32_t rs:5;
+ uint32_t po:6;
+ } MD;
+
+ struct {
+ uint32_t rc:1;
+ uint32_t s1:1;
+ uint32_t xo:9;
+ uint32_t s0:5;
+ uint32_t ra:5;
+ uint32_t rs:5;
+ uint32_t po:6;
+ } XS;
+#endif
+#endif
+ uint32_t w;
+} instr_t;
+
+#include "ppc-cpu.c"
+#include "ppc-fpu.c"
+
+struct abi_arg_iterator
+{
+ const jit_operand_t *args;
+ size_t argc;
+
+ size_t flags;
+
+ size_t arg_idx;
+ size_t gpr_idx;
+ size_t fpr_idx;
+
+ size_t stack_size;
+ size_t stack_padding;
+};
+
+static size_t page_size;
+
+/*
+ * Implementation
+ */
+static jit_bool_t
+jit_get_cpu(void)
+{
+ page_size = sysconf(_SC_PAGE_SIZE);
+ // FIXME check hardware fp support?
+ return 1;
+}
+
+static jit_bool_t
+jit_init(jit_state_t *_jit)
+{
+ return 1;
+}
+
+static size_t
+jit_initial_frame_size(void)
+{
+ return 32;
+}
+
+static size_t
+jit_stack_alignment()
+{
+ return 16;
+}
+
+static void
+jit_flush(void *fptr, void *tptr)
+{
+#if defined(__GNUC__)
+ __clear_cache((void *)fptr, (void *)tptr);
+#endif
+}
+
+static void
+patch_jmp_without_veneer(jit_state_t *_jit, uint32_t *loc)
+{
+ patch_jmp_offset(loc, _jit->pc.ui - loc);
+}
+
+static uint32_t *
+jmp_without_veneer(jit_state_t *_jit)
+{
+ uint32_t *loc = _jit->pc.ui;
+ emit_u32(_jit, _B(0));
+ return loc;
+}
+
+static void
+patch_load_from_pool_offset(uint32_t *loc, int32_t v)
+{
+ /*
+ * not used by this backend
+ */
+ (void)loc;
+ (void)v;
+ abort();
+}
+
+static int32_t
+read_load_from_pool_offset(uint32_t *loc)
+{
+ /*
+ * not used by this backend
+ */
+ (void)loc;
+ abort();
+ return 0;
+}
+
+static void
+jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc, jit_pointer_t addr)
+{
+ (void)_jit;
+ (void)reloc;
+ (void)addr;
+}
+
+static void *
+bless_function_pointer(void *ptr)
+{
+ return ptr;
+}
+
+static void
+reset_call_arg_iterator(struct abi_arg_iterator *iter, size_t argc,
+ const jit_operand_t *args)
+{
+ memset(iter, 0, sizeof(*iter));
+ iter->argc = argc;
+ iter->args = args;
+ iter->stack_size = 32;
+}
+
+static void
+reset_load_arg_iterator(struct abi_arg_iterator *iter, size_t argc,
+ const jit_operand_t *args)
+{
+ memset(iter, 0, sizeof(*iter));
+ iter->argc = argc;
+ iter->args = args;
+ // Skip over initial frame
+ iter->stack_size = 0;
+}
+
+static void
+next_abi_arg(struct abi_arg_iterator *iter, jit_operand_t *arg)
+{
+ ASSERT(iter->arg_idx < iter->argc);
+ enum jit_operand_abi abi = iter->args[iter->arg_idx].abi;
+ iter->arg_idx++;
+
+ if (is_gpr_arg(abi) && iter->gpr_idx < abi_gpr_arg_count) {
+ *arg = jit_operand_gpr(abi, abi_gpr_args[iter->gpr_idx++]);
+ return;
+ }
+
+ if (is_fpr_arg(abi) && iter->fpr_idx < abi_fpr_arg_count) {
+ *arg = jit_operand_fpr(abi, abi_fpr_args[iter->fpr_idx++]);
+ iter->gpr_idx++;
+ return;
+ }
+
+ // if this is the first time here, append register save area
+ if (!iter->flags) {
+ iter->stack_size += (iter->arg_idx - 1) * 8;
+ iter->flags = 1;
+ }
+
+ *arg = jit_operand_mem(abi, JIT_SP, iter->stack_size);
+ iter->stack_size += 8;
+}
+
+// Prepare _R0 to be saved to stack. Slightly hacky?
+static void
+jit_prolog(jit_state_t *_jit)
+{
+ pop_link_register(_jit);
+}
+
+static void
+jit_epilog(jit_state_t *_jit)
+{
+ push_link_register(_jit);
+}
diff --git a/deps/lightening/lightening/ppc.h b/deps/lightening/lightening/ppc.h
new file mode 100644
index 0000000..a6c8675
--- /dev/null
+++ b/deps/lightening/lightening/ppc.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2012-2017 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_ppc_h
+#define _jit_ppc_h
+
+#define JIT_NEEDS_LITERAL_POOL 1
+#define JIT_USE_IMMEDIATE_RELOC 1
+#define JIT_NEEDS_PROLOG 1
+#define JIT_ASYMMETRIC_STACK 1
+
+#if __powerpc__
+# if _CALL_ELF == 2
+/* __BYTE_ORDER == __LITTLE_ENDIAN */
+# define ABI_ELFv2 1
+# endif
+#endif
+
+#define _R0 JIT_GPR(0)
+#define _R1 JIT_GPR(1)
+#define _R2 JIT_GPR(2)
+#define _R3 JIT_GPR(3)
+#define _R4 JIT_GPR(4)
+#define _R5 JIT_GPR(5)
+#define _R6 JIT_GPR(6)
+#define _R7 JIT_GPR(7)
+#define _R8 JIT_GPR(8)
+#define _R9 JIT_GPR(9)
+#define _R10 JIT_GPR(10)
+#define _R11 JIT_GPR(11)
+#define _R12 JIT_GPR(12)
+#define _R13 JIT_GPR(13)
+#define _R14 JIT_GPR(14)
+#define _R15 JIT_GPR(15)
+#define _R16 JIT_GPR(16)
+#define _R17 JIT_GPR(17)
+#define _R18 JIT_GPR(18)
+#define _R19 JIT_GPR(19)
+#define _R20 JIT_GPR(20)
+#define _R21 JIT_GPR(21)
+#define _R22 JIT_GPR(22)
+#define _R23 JIT_GPR(23)
+#define _R24 JIT_GPR(24)
+#define _R25 JIT_GPR(25)
+#define _R26 JIT_GPR(26)
+#define _R27 JIT_GPR(27)
+#define _R28 JIT_GPR(28)
+#define _R29 JIT_GPR(29)
+#define _R30 JIT_GPR(30)
+#define _R31 JIT_GPR(31)
+
+#define _FP _R31
+
+#define _F0 JIT_FPR(0)
+#define _F1 JIT_FPR(1)
+#define _F2 JIT_FPR(2)
+#define _F3 JIT_FPR(3)
+#define _F4 JIT_FPR(4)
+#define _F5 JIT_FPR(5)
+#define _F6 JIT_FPR(6)
+#define _F7 JIT_FPR(7)
+#define _F8 JIT_FPR(8)
+#define _F9 JIT_FPR(9)
+#define _F10 JIT_FPR(10)
+#define _F11 JIT_FPR(11)
+#define _F12 JIT_FPR(12)
+#define _F13 JIT_FPR(13)
+#define _F14 JIT_FPR(14)
+#define _F15 JIT_FPR(15)
+#define _F16 JIT_FPR(16)
+#define _F17 JIT_FPR(17)
+#define _F18 JIT_FPR(18)
+#define _F19 JIT_FPR(19)
+#define _F20 JIT_FPR(20)
+#define _F21 JIT_FPR(21)
+#define _F22 JIT_FPR(22)
+#define _F23 JIT_FPR(23)
+#define _F24 JIT_FPR(24)
+#define _F25 JIT_FPR(25)
+#define _F26 JIT_FPR(26)
+#define _F27 JIT_FPR(27)
+#define _F28 JIT_FPR(28)
+#define _F29 JIT_FPR(29)
+#define _F30 JIT_FPR(30)
+#define _F31 JIT_FPR(31)
+
+#define JIT_R0 _R3
+#define JIT_R1 _R4
+#define JIT_R2 _R5
+#define JIT_R3 _R6
+#define JIT_R4 _R7
+#define JIT_R5 _R8
+#define JIT_R6 _R9
+#define JIT_R7 _R10
+
+#define JIT_V0 _R14
+#define JIT_V1 _R15
+#define JIT_V2 _R16
+#define JIT_V3 _R17
+#define JIT_V4 _R18
+#define JIT_V5 _R19
+#define JIT_V6 _R20
+#define JIT_V7 _R21
+#define JIT_V8 _R22
+#define JIT_V9 _R23
+#define JIT_V10 _R24
+#define JIT_V11 _R25
+#define JIT_V12 _R26
+#define JIT_V13 _R27
+#define JIT_TMP0 _R28
+#define JIT_TMP1 _R29
+#define JIT_TMP2 _R30
+
+#define JIT_FP _R31
+#define JIT_SP _R1
+
+// TODO shouldn't these be one-to-one?
+#define JIT_F0 _F1
+#define JIT_F1 _F2
+#define JIT_F2 _F3
+#define JIT_F3 _F4
+#define JIT_F4 _F5
+#define JIT_F5 _F6
+#define JIT_F6 _F7
+#define JIT_F7 _F8
+#define JIT_F8 _F9
+#define JIT_F9 _F10
+#define JIT_F10 _F11
+#define JIT_F11 _F12
+#define JIT_F12 _F13
+
+#define JIT_VF0 _F14
+#define JIT_VF1 _F15
+#define JIT_VF2 _F16
+#define JIT_VF3 _F17
+#define JIT_VF4 _F18
+#define JIT_VF5 _F19
+#define JIT_VF6 _F20
+#define JIT_VF7 _F21
+#define JIT_VF8 _F22
+#define JIT_VF9 _F23
+#define JIT_VF10 _F24
+#define JIT_VF11 _F25
+#define JIT_VF12 _F26
+#define JIT_VF13 _F27
+#define JIT_VF14 _F28
+#define JIT_VF15 _F29
+#define JIT_VF16 _F30
+#define JIT_VF17 _F31
+
+#define JIT_FTMP _F0
+
+#define JIT_LR _R0
+
+#define JIT_PLATFORM_CALLEE_SAVE_GPRS _R0, JIT_SP, JIT_FP, JIT_TMP0, JIT_TMP1, JIT_TMP2
+#define JIT_PLATFORM_CALLEE_SAVE_FPRS
+
+// At most, we need MTCTR & BCTR, so two instructions per jump.
+#define JIT_JMP_MAX_SIZE (sizeof(uint32_t) * 2)
+
+#if __WORDSIZE == 64
+#define JIT_LITERAL_MAX_SIZE ((sizeof(uint32_t) * 6) + JIT_JMP_MAX_SIZE)
+#else
+#define JIT_LITERAL_MAX_SIZE ((sizeof(uint32_t) * 2) + JIT_JMP_MAX_SIZE)
+#endif
+
+#define JIT_INST_MAX_SIZE JIT_JMP_MAX_SIZE
+
+// For example atomics are fairly long unbreakable sequences
+// that can cause the pretty tight literal pool limits to run over,
+// so make sure we give ourselves enough space to emit at least one
+// uninterrupted sequence.
+//
+// TODO: check if this is sound reasoning or just a hack.
+#define JIT_EXTRA_SPACE (16 * sizeof(uint32_t))
+
+#endif /* _jit_ppc_h */
diff --git a/deps/lightening/lightening/s390-cpu.c b/deps/lightening/lightening/s390-cpu.c
new file mode 100644
index 0000000..02f2675
--- /dev/null
+++ b/deps/lightening/lightening/s390-cpu.c
@@ -0,0 +1,3848 @@
+/*
+ * Copyright (C) 2013-2017 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+# if __WORDSIZE == 32
+# define ldr(r0,r1) ldr_i(r0,r1)
+# define ldxr(r0,r1,r2) ldxr_i(r0,r1,r2)
+# define ldxi(r0,r1,i0) ldxi_i(r0,r1,i0)
+# define stxi(i0,r0,r1) stxi_i(i0,r0,r1)
+# else
+# define ldr(r0,r1) ldr_l(r0,r1)
+# define ldxr(r0,r1,r2) ldxr_l(r0,r1,r2)
+# define ldxi(r0,r1,i0) ldxi_l(r0,r1,i0)
+# define stxi(i0,r0,r1) stxi_l(i0,r0,r1)
+# endif
+# define is(i) *_jit->pc.us++ = i
+# if __WORDSIZE == 32
+# define stack_framesize 96
+# else
+# define stack_framesize 160
+# endif
+# define _R0_REGNO 0
+# define _R1_REGNO 1
+# define _R7_REGNO 7
+# define _R13_REGNO 13
+# define _FP_REGNO _R13_REGNO
+# define _R14_REGNO 14
+# define _R15_REGNO 15
+# define u12_p(i0) ((i0) >= 0 && (i0) <= 4095)
+# define s16_p(i0) ((i0) >= -32768 && (i0) <= 32767)
+# define x16(i0) ((i0) & 0xffff)
+# define s20_p(i0) ((i0) >= -524288 && (i0) <= 524287)
+# define x20(i0) ((i0) & 0xfffff)
+# if __WORDSIZE == 32
+# define s32_p(i0) 1
+# else
+# define s32_p(i0) \
+ ((i0) >= -2147483648L && (i0) < 2147483647L)
+# endif
+
+/*
+ Condition Code Instruction (Mask) Bit Mask Value
+ 0 8 8
+ 1 9 4
+ 2 10 2
+ 3 11 1
+
+AGR:
+ 0 Zero
+ 1 < zero
+ 2 > zero
+ 3 Overflow
+--
+1 -> overflow CC_O
+14 -> no overflow CC_NO
+
+ALGR:
+ 0 Zero, no carry
+ 1 Not zero, no carry
+ 2 Zero, carry
+ 3 Not zero, carry
+--
+2|1 -> carry CC_NLE
+8|4 -> no carry CC_LE
+
+SGR:
+ 0 Zero
+ 1 < zero
+ 2 > zero
+ 3 Overflow
+--
+1 -> overflow CC_O
+14 -> no overflow CC_NO
+
+SLGR:
+ 0 --
+ 1 Not zero, borrow
+ 2 Zero, no borrow
+ 3 Not zero, no borrow
+--
+4 -> borrow CC_L
+11 -> no borrow CC_NL
+ */
+
+# define CC_NV 0x0
+# define CC_O 0x1
+# define CC_H 0x2
+# define CC_NLE 0x3
+# define CC_L 0x4
+# define CC_NHE 0x5
+# define CC_LH 0x6
+# define CC_NE 0x7
+# define CC_E 0x8
+# define CC_NLH 0x9
+# define CC_HE 0xA
+# define CC_NL 0xB
+# define CC_LE 0xC
+# define CC_NH 0xD
+# define CC_NO 0xE
+# define CC_AL 0xF
+# define _us uint16_t
+# define _ui uint32_t
+# define E_(Op) _E(_jit,Op)
+static void _E(jit_state_t*,_ui);
+# define I_(Op,I) _I(_jit,Op,I)
+static void _I(jit_state_t*,_ui,_ui);
+# define RR_(Op,R1,R2) _RR(_jit,Op,R1,R2)
+static void _RR(jit_state_t*,_ui,_ui,_ui);
+# define RRE_(Op,R1,R2) _RRE(_jit,Op,R1,R2)
+static void _RRE(jit_state_t*,_ui,_ui,_ui);
+# define RRF_(Op,R3,M4,R1,R2) _RRF(_jit,Op,R3,M4,R1,R2)
+static void _RRF(jit_state_t*,_ui,_ui,_ui,_ui,_ui);
+# define RX_(Op,R1,X2,B2,D2) _RX(_jit,Op,R1,X2,B2,D2)
+static void _RX(jit_state_t*,_ui,_ui,_ui,_ui,_ui);
+# define RXE_(Op,R1,X2,B2,D2,Op2) _RXE(_jit,Op,R1,X2,B2,D2,Op2)
+static void _RXE(jit_state_t*,_ui,_ui,_ui,_ui,_ui,_ui);
+# define RXF_(Op,R3,X2,B2,D2,R1,Op2) _RXF(_jit,Op,R3,X2,B2,D2,R1,Op2)
+static void _RXF(jit_state_t*,_ui,_ui,_ui,_ui,_ui,_ui,_ui);
+# define RXY_(Op,R1,X2,B2,D2,Op2) _RXY(_jit,Op,R1,X2,B2,D2,Op2)
+static void _RXY(jit_state_t*,_ui,_ui,_ui,_ui,_ui,_ui);
+# define RS_(Op,R1,R3,B2,D2) _RS(_jit,Op,R1,R3,B2,D2)
+static void _RS(jit_state_t*,_ui,_ui,_ui,_ui,_ui);
+# define RSY_(Op,R1,R3,B2,D2,Op2) RXY_(Op,R1,R3,B2,D2,Op2)
+# define RSL_(Op,L1,B1,D1,Op2) _RSL(_jit,Op,L1,B1,D1,Op2)
+static void _RSL(jit_state_t*,_ui,_ui,_ui,_ui,_ui);
+# define RSI_(Op,R1,R3,I2) _RSI(_jit,Op,R1,R3,I2)
+static void _RSI(jit_state_t*,_ui,_ui,_ui,_ui);
+# define RI_(Op,R1,Op2,I2) RSI_(Op,R1,Op2,I2)
+# define RIE_(Op,R1,R3,I2,Op2) _RIE(_jit,Op,R1,R3,I2,Op2)
+static void _RIE(jit_state_t*,_ui,_ui,_ui,_ui,_ui);
+# define RIL_(Op,R1,Op2,I2) _RIL(_jit,Op,R1,Op2,I2)
+static void _RIL(jit_state_t*,_ui,_ui,_ui,_ui);
+# define SI_(Op,I2,B1,D1) _SI(_jit,Op,I2,B1,D1)
+static void _SI(jit_state_t*,_ui,_ui,_ui,_ui);
+# define SIY_(Op,I2,B1,D1,Op2) _SIY(_jit,Op,I2,B1,D1,Op2)
+static void _SIY(jit_state_t*,_ui,_ui,_ui,_ui,_ui);
+# define S_(Op,B2,D2) _S(_jit,Op,B2,D2)
+static void _S(jit_state_t*,_ui,_ui,_ui);
+# define SSL_(Op,L,B1,D1,B2,D2) SS_(Op,(L)>>4,(L)&0xF,B1,D1,B2,D2)
+# define SS_(Op,LL,LH,B1,D1,B2,D2) _SS(_jit,Op,LL,LH,B1,D1,B2,D2)
+static void _SS(jit_state_t*,_ui,_ui,_ui,_ui,_ui,_ui,_ui);
+# define SSE_(Op,B1,D1,B2,D2) _SSE(_jit,Op,B1,D1,B2,D2)
+static void _SSE(jit_state_t*,_ui,_ui,_ui,_ui,_ui);
+# undef _us
+# undef _ui
+# define nop(c) _nop(_jit,c)
+static void _nop(jit_state_t*,int32_t);
+# if __WORDSIZE == 32
+# define ADD_(r0,r1) AR(r0,r1)
+# define ADDI_(r0,i0) AHI(r0,i0)
+# define ADDC_(r0,r1) ALR(r0,r1)
+# define ADDX_(r0,r1) ALCR(r0,r1)
+# define AND_(r0,r1) NR(r0,r1)
+# define CMP_(r0,r1) CR(r0,r1)
+# define CMPU_(r0,r1) CLR(r0,r1)
+# define DIVREM_(r0,r1) DR(r0,r1)
+# define DIVREMU_(r0,r1) DLR(r0,r1)
+# define OR_(r0,r1) OR(r0,r1)
+# define MUL_(r0,r1) MSR(r0,r1)
+# define MULI_(r0,i0) MHI(r0,i0)
+# define MULU_(r0,r1) MLR(r0,r1)
+# define SUB_(r0,r1) SR(r0,r1)
+# define SUBC_(r0,r1) SLR(r0,r1)
+# define SUBX_(r0,r1) SLBR(r0,r1)
+# define TEST_(r0,r1) LTR(r0,r1)
+# define XOR_(r0,r1) XR(r0,r1)
+# else
+# define ADD_(r0,r1) AGR(r0,r1)
+# define ADDI_(r0,i0) AGHI(r0,i0)
+# define ADDC_(r0,r1) ALGR(r0,r1)
+# define ADDX_(r0,r1) ALCGR(r0,r1)
+# define AND_(r0,r1) NGR(r0,r1)
+# define CMP_(r0,r1) CGR(r0,r1)
+# define CMPU_(r0,r1) CLGR(r0,r1)
+# define DIVREM_(r0,r1) DSGR(r0,r1)
+# define DIVREMU_(r0,r1) DLGR(r0,r1)
+# define MUL_(r0,r1) MSGR(r0,r1)
+# define MULI_(r0,i0) MGHI(r0,i0)
+# define MULU_(r0,r1) MLGR(r0,r1)
+# define OR_(r0,r1) OGR(r0,r1)
+# define SUB_(r0,r1) SGR(r0,r1)
+# define SUBC_(r0,r1) SLGR(r0,r1)
+# define SUBX_(r0,r1) SLBGR(r0,r1)
+# define TEST_(r0,r1) LTGR(r0,r1)
+# define XOR_(r0,r1) XGR(r0,r1)
+# endif
+/****************************************************************
+ * General Instructions *
+ ****************************************************************/
+/* ADD */
+# define AR(R1,R2) RR_(0x1A,R1,R2)
+# define AGR(R1,R2) RRE_(0xB908,R1,R2)
+# define AGFR(R1,R2) RRE_(0xB918,R1,R2)
+# define A(R1,D2,X2,B2) RX_(0x5A,R1,X2,B2,D2)
+# define AY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x5A)
+# define AG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x08)
+# define AGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x18)
+/* ADD HALFWORD */
+# define AH(R1,D2,X2,B2) RX_(0x4A,R1,X2,B2,D2)
+# define AHY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x7A)
+/* ADD HALFWORD IMMEDIATE */
+# define AHI(R1,I2) RI_(0xA7,R1,0xA,I2)
+# define AGHI(R1,I2) RI_(0xA7,R1,0xB,I2)
+/* ADD LOGICAL */
+# define ALR(R1,R2) RR_(0x1E,R1,R2)
+# define ALGR(R1,R2) RRE_(0xB90A,R1,R2)
+# define ALGFR(R1,R2) RRE_(0xB91A,R1,R2)
+# define AL(R1,D2,X2,B2) RX_(0x5E,R1,X2,B2,D2)
+# define ALY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x5E)
+# define ALG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x0A)
+# define ALGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x1A)
+/* ADD LOGICAL WITH CARRY */
+# define ALCR(R1,R2) RRE_(0xB998,R1,R2)
+# define ALCGR(R1,R2) RRE_(0xB988,R1,R2)
+# define ALC(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x98)
+# define ALCG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x88)
+/* AND */
+# define NR(R1,R2) RR_(0x14,R1,R2)
+# define NGR(R1,R2) RRE_(0xB980,R1,R2)
+# define N(R1,D2,X2,B2) RX_(0x54,R1,X2,B2,D2)
+# define NY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x54)
+# define NG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x80)
+# define NI(D1,B1,I2) SI_(0x94,I2,B1,D1)
+# define NIY(D1,B1,I2) SIY_(0xEB,I2,B1,D1,0x54)
+# define NC(D1,L,B1,D2,B2) SSL_(0xD4,L,B1,D1,B2,D2)
+/* AND IMMEDIATE */
+# define NIHH(R1,I2) RI_(0xA5,R1,0x4,I2)
+# define NIHL(R1,I2) RI_(0xA5,R1,0x5,I2)
+# define NILH(R1,I2) RI_(0xA5,R1,0x6,I2)
+# define NILL(R1,I2) RI_(0xA5,R1,0x7,I2)
+/* BRANCH AND LINK */
+# define BALR(R1,R2) RR_(0x05,R1,R2)
+# define BAL(R1,D2,X2,B2) RX_(0x45,R1,X2,B2,D2)
+/* BRANCH AND SAVE */
+# define BASR(R1,R2) RR_(0x0D,R1,R2)
+# define BAS(R1,D2,X2,B2) RX_(0x4D,R1,X2,B2,D2)
+/* BRANCH AND SAVE AND SET MODE */
+# define BASSM(R1,R2) RR_(0x0C,R1,R2)
+/* BRANCH AND SET MODE */
+# define BSM(R1,R2) RR_(0x0B,R1,R2)
+/* BRANCH ON CONDITION */
+# define BCR(M1,R2) RR_(0x07,M1,R2)
+# define BR(R2) BCR(CC_AL,R2)
+# define NOPR(R2) BCR(CC_NV,R2)
+# define BC(M1,D2,X2,B2) RX_(0x47,M1,X2,B2,D2)
+/* BRANCH ON COUNT */
+# define BCTR(R1,R2) RR_(0x06,R1,R2)
+# define BCTGR(R1,R2) RRE_(0xB946,R1,R2)
+# define BCT(R1,D2,X2,B2) RX_(0x46,R1,X2,B2,D2)
+# define BCTG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x46)
+/* BRANCH ON INDEX HIGH */
+# define BXH(R1,R3,D2,B2) RS_(0x86,R1,R3,B2,D2)
+# define BXHG(R1,R3,B2,D2) RSY_(0xEB,R1,R3,B2,D2,0x44)
+/* BRANCH ON INDEX LOW OR EQUAL */
+# define BXLE(R1,R3,D2,B2) RS_(0x87,R1,R3,B2,D2)
+# define BXLEG(R1,R3,B2,D2) RSY_(0xEB,R1,R3,B2,D2,0x45)
+/* BRANCH RELATIVE AND SAVE */
+# define BRAS(R1,I2) RI_(0xA7,R1,0x5,I2)
+/* BRANCH RELATIVE AND SAVE LONG */
+# define BRASL(R1,I2) RIL_(0xC0,R1,0x5,I2)
+/* BRANCH RELATIVE ON CONDITION */
+# define BRC(M1,I2) RI_(0xA7,M1,0x4,I2)
+# define J(I2) BRC(CC_AL,I2)
+/* BRANCH RELATIVE ON CONDITION LONG */
+# define BRCL(M1,I2) RIL_(0xC0,M1,0x4,I2)
+# define BRL(I2) BRCL(CC_AL,I2)
+/* BRANCH RELATIVE ON COUNT */
+# define BRCT(M1,I2) RI_(0xA7,M1,0x6,I2)
+# define BRCTG(M1,I2) RI_(0xA7,M1,0x7,I2)
+/* BRANCH RELATIVE ON INDEX HIGH */
+# define BRXH(R1,R3,I2) RSI_(0x84,R1,R3,I2)
+# define BRXHG(R1,R3,I2) RIE_(0xEC,R1,R3,I2,0x44)
+/* BRANCH RELATIVE ON INDEX LOW OR EQUAL */
+# define BRXLE(R1,R3,I2) RSI_(0x85,R1,R3,I2)
+# define BRXLEG(R1,R3,I2) RIE_(0xEC,R1,R3,I2,0x45)
+/* CHECKSUM */
+# define CKSUM(R1,R2) RRE_(0xB241,R1,R2)
+/* CIPHER MESAGE (KM) */
+# define KM(R1,R2) RRE_(0xB92E,R1,R2)
+/* CIPHER MESAGE WITH CHAINING (KMC) */
+# define KMC(R1,R2) RRE_(0xB92F,R1,R2)
+/* COMPARE */
+# define CR(R1,R2) RR_(0x19,R1,R2)
+# define CGR(R1,R2) RRE_(0xB920,R1,R2)
+# define CGFR(R1,R2) RRE_(0xB930,R1,R2)
+# define C(R1,D2,X2,B2) RX_(0x59,R1,X2,B2,D2)
+# define CY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x59)
+# define CG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x20)
+# define CGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x30)
+/* COMPARE AND FORM CODEWORD */
+# define CFC(D2,B2) S_(0xB21A,B2,D2)
+/* COMPARE AND SWAP */
+# define CS(R1,R3,D2,B2) RS_(0xBA,R1,R3,B2,D2)
+# define CSY(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x14)
+# define CSG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x30)
+/* COMPARE DOUBLE AND SWAP */
+# define CDS(R1,R3,D2,B2) RS_(0xBB,R1,R3,B2,D2)
+# define CSDY(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x31)
+# define CSDG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x3E)
+/* COMPARE HALFWORD */
+# define CH(R1,D2,X2,B2) RX_(0x49,R1,X2,B2,D2)
+# define CHY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x79)
+/* COMPARE HALFWORD IMMEDIATE */
+# define CHI(R1,I2) RI_(0xA7,R1,0xE,I2)
+# define CGHI(R1,I2) RI_(0xA7,R1,0xF,I2)
+/* COMPARE LOGICAL */
+# define CLR(R1,R2) RR_(0x15,R1,R2)
+# define CLGR(R1,R2) RRE_(0xB921,R1,R2)
+# define CLGFR(R1,R2) RRE_(0xB931,R1,R2)
+# define CL(R1,D2,X2,B2) RX_(0x55,R1,X2,B2,D2)
+# define CLY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x55)
+# define CLG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x21)
+# define CLGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x31)
+# define CLI(D1,B1,I2) SI_(0x95,I2,B1,D1)
+# define CLIY(D1,B1,I2) SIY_(0xEB,I2,B1,D1,0x55)
+# define CLC(D1,L,B1,D2,B2) SSL_(0xD5,L,B1,D1,B2,D2)
+/* COMPARE LOGICAL CHARACTERS UNDER MASK */
+# define CLM(R1,M3,D2,B2) RS_(0xBD,R1,M3,B2,D2)
+# define CLMY(R1,M3,D2,B2) RSY_(0xEB,R1,M3,B2,D2,0x21)
+# define CLMH(R1,M3,D2,B2) RSY_(0xEB,R1,M3,B2,D2,0x20)
+/* COMPARE LOGICAL LONG */
+# define CLCL(R1,R2) RR_(0x0F,R1,R2)
+/* COMPARE LOGICAL LONG EXTENDED */
+# define CLCLE(R1,R3,D2,B2) RS_(0xA9,R1,R3,B2,D2)
+/* COMPARE LOGICAL LONG UNICODE */
+# define CLCLU(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x8F)
+/* COMPARE LOGICAL STRING */
+# define CLST(R1,R2) RRE_(0xB25D,R1,R2)
+/* COMPARE UNTIL SUBSTRING EQUAL */
+# define CUSE(R1,R2) RRE_(0xB257,R1,R2)
+/* COMPRESSION CALL */
+# define CMPSC(R1,R2) RRE_(0xB263,R1,R2)
+/* COMPUTE INTERMEDIATE MESSAGE DIGEST (KIMD) */
+# define KIMD(R1,R2) RRE_(0xB93E,R1,R2)
+/* COMPUTE LAST MESSAGE DIGEST (KIMD) */
+# define KLMD(R1,R2) RRE_(0xB93F,R1,R2)
+/* COMPUTE MESSAGE AUTHENTICATION CODE (KMAC) */
+# define KMAC(R1,R2) RRE_(0xB91E,R1,R2)
+/* CONVERT TO BINARY */
+# define CVB(R1,D2,X2,B2) RX_(0x4F,R1,X2,B2,D2)
+# define CVBY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x06)
+# define CVBG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x0e)
+/* CONVERT TO DECIMAL */
+# define CVD(R1,D2,X2,B2) RX_(0x4E,R1,X2,B2,D2)
+# define CVDY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x26)
+# define CVDG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x2E)
+/* CONVERT UNICODE TO UTF-8 */
+# define CUUTF(R1,R2) RRE_(0xB2A6,R1,R2)
+/* CONVERT UTF-8 TO UNICODE */
+# define CUTFU(R1,R2) RRE_(0xB2A7,R1,R2)
+/* COPY ACCESS */
+# define CPYA(R1,R2) RRE_(0xB24D,R1,R2)
+/* DIVIDE */
+# define DR(R1,R2) RR_(0x1D,R1,R2)
+# define D(R1,D2,X2,B2) RX_(0x5D,R1,X2,B2,D2)
+/* DIVIDE LOGICAL */
+# define DLR(R1,R2) RRE_(0xB997,R1,R2)
+# define DLGR(R1,R2) RRE_(0xB987,R1,R2)
+# define DL(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x97)
+# define DLG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x87)
+/* DIVIDE SINGLE */
+# define DSGR(R1,R2) RRE_(0xB90D,R1,R2)
+# define DSGFR(R1,R2) RRE_(0xB91D,R1,R2)
+# define DSG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x0D)
+# define DSGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x1D)
+/* EXCLUSIVE OR */
+# define XR(R1,R2) RR_(0x17,R1,R2)
+# define XGR(R1,R2) RRE_(0xB982,R1,R2)
+# define X(R1,D2,X2,B2) RX_(0x57,R1,X2,B2,D2)
+# define XY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x57)
+# define XG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x82)
+# define XI(D1,B1,I2) SI_(0x97,I2,B1,D1)
+# define XIY(D1,B1,I2) SIY_(0xEB,I2,B1,D1,0x57)
+# define XC(D1,L,B1,D2,B2) SSL_(0xD7,L,B1,D1,B2,D2)
+/* EXECUTE */
+# define EX(R1,D2,X2,B2) RX_(0x44,R1,X2,B2,D2)
+/* EXTRACT ACCESS */
+# define EAR(R1,R2) RRE_(0xB24F,R1,R2)
+/* EXTRACT PSW */
+# define EPSW(R1,R2) RRE_(0xB98D,R1,R2)
+/* INSERT CHARACTER */
+# define IC(R1,D2,X2,B2) RX_(0x43,R1,X2,B2,D2)
+# define ICY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x73)
+/* INSERT CHARACTERS UNDER MASK */
+# define ICM(R1,M3,D2,B2) RS_(0xBF,R1,M3,B2,D2)
+# define ICMY(R1,M3,D2,B2) RSY_(0xEB,R1,M3,B2,D2,0x81)
+# define ICMH(R1,M3,D2,B2) RSY_(0xEB,R1,M3,B2,D2,0x80)
+/* INSERT IMMEDIATE */
+# define IIHH(R1,I2) RI_(0xA5,R1,0x0,I2)
+# define IIHL(R1,I2) RI_(0xA5,R1,0x1,I2)
+# define IILH(R1,I2) RI_(0xA5,R1,0x2,I2)
+# define IILL(R1,I2) RI_(0xA5,R1,0x3,I2)
+/* INSERT PROGRAM MASK */
+# define IPM(R1) RRE_(0xB222,R1,0)
+/* LOAD */
+# define LR(R1,R2) RR_(0x18,R1,R2)
+# define LGR(R1,R2) RRE_(0xB904,R1,R2)
+# define LGFR(R1,R2) RRE_(0xB914,R1,R2)
+# define L(R1,D2,X2,B2) RX_(0x58,R1,X2,B2,D2)
+# define LY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x58)
+# define LG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x04)
+# define LGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x14)
+/* LOAD ACCESS MULTIPLE */
+# define LAM(R1,R3,D2,B2) RS_(0x9A,R1,R3,B2,D2)
+# define LAMY(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x9A)
+/* LOAD ADDRESS */
+# define LA(R1,D2,X2,B2) RX_(0x41,R1,X2,B2,D2)
+# define LAY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x71)
+/* LOAD ADDRESS EXTENDED */
+# define LAE(R1,D2,X2,B2) RX_(0x51,R1,X2,B2,D2)
+/* LOAD ADDRESS RELATIVE LONG */
+# define LARL(R1,I2) RIL_(0xC0,R1,0x0,I2)
+/* LOAD AND TEST */
+# define LTR(R1,R2) RR_(0x12,R1,R2)
+# define LTGR(R1,R2) RRE_(0xB902,R1,R2)
+# define LTGFR(R1,R2) RRE_(0xB912,R1,R2)
+/* LOAD BYTE */
+# define LGBR(R1,R2) RRE_(0xB906,R1,R2) /* disasm */
+# define LB(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x76)
+# define LGB(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x77)
+/* LOAD COMPLEMENT */
+# define LCR(R1,R2) RR_(0x13,R1,R2)
+# define LCGR(R1,R2) RRE_(0xB903,R1,R2)
+# define LCGFR(R1,R2) RRE_(0xB913,R1,R2)
+/* LOAD HALFWORD */
+# define LH(R1,D2,X2,B2) RX_(0x48,R1,X2,B2,D2)
+# define LHY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x78)
+# define LGHR(R1,R2) RRE_(0xB907,R1,R2) /* disasm */
+# define LGH(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x15)
+/* LOAD HALFWORD IMMEDIATE */
+# define LHI(R1,I2) RI_(0xA7,R1,0x8,I2)
+# define LGHI(R1,I2) RI_(0xA7,R1,0x9,I2)
+/* LOAD LOGICAL */
+# define LLGFR(R1,R2) RRE_(0xB916,R1,R2)
+# define LLGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x16)
+/* LOAD LOGICAL CHARACTER */
+# define LLGCR(R1,R2) RRE_(0xB984,R1,R2) /* disasm */
+# define LLGC(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x90)
+/* LOAD LOGICAL HALFWORD */
+# define LLGHR(R1,R2) RRE_(0xB985,R1,R2) /* disasm */
+# define LLGH(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x91)
+/* LOAD LOGICAL IMMEDIATE */
+# define LLIHH(R1,I2) RI_(0xA5,R1,0xC,I2)
+# define LLIHL(R1,I2) RI_(0xA5,R1,0xD,I2)
+# define LLILH(R1,I2) RI_(0xA5,R1,0xE,I2)
+# define LLILL(R1,I2) RI_(0xA5,R1,0xF,I2)
+/* LOAD LOGICAL THIRTY ONE BITS */
+# define LLGTR(R1,R2) RRE_(0xB917,R1,R2)
+# define LLGT(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x17)
+/* LOAD MULTIPLE */
+# define LM(R1,R3,D2,B2) RS_(0x98,R1,R3,B2,D2)
+# define LMY(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x98)
+# define LMG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x04)
+/* LOAD MULTIPLE DISJOINT */
+# define LMD(R1,R3,D2,B2,D4,B4) SS_(0xEF,R1,R3,B2,D2,B4,D4)
+/* LOAD MULTIPLE HIGH */
+# define LMH(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x96)
+/* LOAD NEGATIVE */
+# define LNR(R1,R2) RR_(0x11,R1,R2)
+# define LNGR(R1,R2) RRE_(0xB901,R1,R2)
+# define LNGFR(R1,R2) RRE_(0xB911,R1,R2)
+/* LOAD PAIR FROM QUADWORD */
+# define LPQ(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x8F)
+/* LOAD POSITIVE */
+# define LPR(R1,R2) RR_(0x10,R1,R2)
+# define LPGR(R1,R2) RRE_(0xB900,R1,R2)
+# define LPGFR(R1,R2) RRE_(0xB910,R1,R2)
+/* LOAD REVERSED */
+# define LRVR(R1,R2) RRE_(0xB91F,R1,R2)
+# define LRVGR(R1,R2) RRE_(0xB90F,R1,R2)
+# define LRVH(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x1F)
+# define LRV(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x1E)
+# define LRVG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x0F)
+/* MONITOR CALL */
+# define MC(D1,B1,I2) SI_(0xAF,I2,B1,D1)
+/* MOVE */
+# define MVI(D1,B1,I2) SI_(0x92,I2,B1,D1)
+# define MVIY(D1,B1,I2) SIY_(0xEB,I2,B1,D1,0x52)
+# define MVC(D1,L,B1,D2,B2) SSL_(0xD2,L,B1,D1,B2,D2)
+/* MOVE INVERSE */
+# define MVCIN(D1,L,B1,D2,B2) SSL_(0xE8,L,B1,D1,B2,D2)
+/* MOVE LONG */
+# define MVCL(R1,R2) RR_(0x0E,R1,R2)
+/* MOVE LONG EXTENDED */
+# define MVCLE(R1,R3,D2,B2) RS_(0xA8,R1,R3,B2,D2)
+/* MOVE LONG UNICODE */
+# define MVCLU(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x8E)
+/* MOVE NUMERICS */
+# define MVN(D1,L,B1,D2,B2) SSL_(0xD1,L,B1,D1,B2,D2)
+/* MOVE STRING */
+# define MVST(R1,R2) RRE_(0xB255,R1,R2)
+/* MOVE WITH OFFSET */
+# define MVO(D1,L1,B1,D2,L2,B2) SS_(0xF1,L1,L2,B1,D1,B2,D2)
+/* MOVE ZONES */
+# define MVZ(D1,L,B1,D2,B2) SSL_(0xD3,L,B1,D1,B2,D2)
+/* MULTIPLY */
+# define MR(R1,R2) RR_(0x1C,R1,R2)
+# define M(R1,D2,X2,B2) RX_(0x5C,R1,X2,B2,D2)
+/* MULTIPLY HALFWORD */
+# define MH(R1,D2,X2,B2) RX_(0x4C,R1,X2,B2,D2)
+/* MULTIPLY HALFWORD IMMEDIATE */
+# define MHI(R1,I2) RI_(0xA7,R1,0xC,I2)
+# define MGHI(R1,I2) RI_(0xA7,R1,0xD,I2)
+/* MULTIPLY LOGICAL */
+# define MLR(R1,R2) RRE_(0xB996,R1,R2)
+# define MLGR(R1,R2) RRE_(0xB986,R1,R2)
+# define ML(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x96)
+# define MLG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x86)
+/* MULTIPLY SINGLE */
+# define MSR(R1,R2) RRE_(0xB252,R1,R2)
+# define MSGR(R1,R2) RRE_(0xB90C,R1,R2)
+# define MSGFR(R1,R2) RRE_(0xB91C,R1,R2)
+# define MS(R1,D2,X2,B2) RX_(0x71,R1,X2,B2,D2)
+# define MSY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x51)
+# define MSG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x0C)
+# define MSGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x1C)
+/* OR */
+# define OR(R1,R2) RR_(0x16,R1,R2)
+# define OGR(R1,R2) RRE_(0xB981,R1,R2)
+# define O(R1,D2,X2,B2) RX_(0x56,R1,X2,B2,D2)
+# define OY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x56)
+# define OG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x81)
+# define OI(D1,B1,I2) SI_(0x96,I2,B1,D1)
+# define OIY(D1,B1,I2) SIY_(0xEB,I2,B1,D1,0x56)
+# define OC(D1,L,B1,D2,B2) SSL_(0xD6,L,B1,D1,B2,D2)
+/* OR IMMEDIATE */
+# define OIHH(R1,I2) RI_(0xA5,R1,0x8,I2)
+# define OIHL(R1,I2) RI_(0xA5,R1,0x9,I2)
+# define OILH(R1,I2) RI_(0xA5,R1,0xA,I2)
+# define OILL(R1,I2) RI_(0xA5,R1,0xB,I2)
+/* PACK */
+# define PACK(D1,L1,B1,D2,L2,B2) SS_(0xF2,L1,L2,B1,D1,B2,D2)
+/* PACK ASCII */
+# define PKA(D1,B1,D2,L2,B2) SSL_(0xE9,L2,B1,D1,B2,D2)
+/* PACK UNICODE */
+# define PKU(D1,B1,D2,L2,B2) SSL_(0xE1,L2,B1,D1,B2,D2)
+/* PERFORM LOCKED OPERATION */
+# define PLO(R1,D2,B2,R3,D4,B4) SS_(0xEE,R1,R3,B2,D2,B4,D4)
+/* ROTATE LEFT SINGLE LOGICAL */
+# define RLL(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x1D)
+# define RLLG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x1C)
+/* SEARCH STRING */
+# define SRST(R1,R2) RRE_(0xB25E,R1,R2)
+/* SET ACCESS */
+# define SAR(R1,R2) RRE_(0xB24E,R1,R2)
+/* SET ADDRESSING MODE */
+# define SAM24() E_(0x10C)
+# define SAM31() E_(0x10D)
+# define SAM64() E_(0x10E)
+/* SET PROGRAM MASK */
+# define SPM(R1) RR_(0x04,R1,0)
+/* SHIFT LEFT DOUBLE */
+# define SLDA(R1,D2,B2) RS_(0x8F,R1,0,B2,D2)
+/* SHIFT LEFT DOUBLE LOGICAL */
+# define SLDL(R1,D2,B2) RS_(0x8D,R1,0,B2,D2)
+/* SHIFT LEFT SINGLE */
+# define SLA(R1,D2,B2) RS_(0x8B,R1,0,B2,D2)
+# define SLAG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x0B)
+/* SHIFT LEFT SINGLE LOGICAL */
+# define SLL(R1,D2,B2) RS_(0x89,R1,0,B2,D2)
+# define SLLG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x0D)
+/* SHIFT RIGHT DOUBLE */
+# define SRDA(R1,D2,B2) RS_(0x8E,R1,0,B2,D2)
+/* SHIFT RIGHT DOUBLE LOGICAL */
+# define SRDL(R1,D2,B2) RS_(0x8C,R1,0,B2,D2)
+/* SHIFT RIGHT SINGLE */
+# define SRA(R1,D2,B2) RS_(0x8A,R1,0,B2,D2)
+# define SRAG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x0A)
+/* SHIFT RIGHT SINGLE LOGICAL */
+# define SRL(R1,D2,B2) RS_(0x88,R1,0,B2,D2)
+# define SRLG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x0C)
+/* STORE */
+# define ST(R1,D2,X2,B2) RX_(0x50,R1,X2,B2,D2)
+# define STY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x50)
+# define STG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x24)
+/* STORE ACCESS MULTIPLE */
+# define STAM(R1,R3,D2,B2) RS_(0x9B,R1,R3,B2,D2)
+# define STAMY(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x9B)
+/* STORE CHARACTER */
+# define STC(R1,D2,X2,B2) RX_(0x42,R1,X2,B2,D2)
+# define STCY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x72)
+/* STORE CHARACTERS UNDER MASK */
+# define STCM(R1,M3,D2,B2) RS_(0xBE,R1,M3,B2,D2)
+# define STCMY(R1,M3,D2,B2) RSY_(0xEB,R1,M3,B2,D2,0x2D)
+# define STCMH(R1,M3,D2,B2) RSY_(0xEB,R1,M3,B2,D2,0x2C)
+/* STORE CLOCK */
+# define STCK(D2,B2) S_(0xB205,B2,D2)
+/* STORE CLOCK EXTENDED */
+# define STCKE(D2,B2) S_(0xB278,B2,D2)
+/* STORE HALFWORD */
+# define STH(R1,D2,X2,B2) RX_(0x40,R1,X2,B2,D2)
+# define STHY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x70)
+/* STORE MULTIPLE */
+# define STM(R1,R3,D2,B2) RS_(0x90,R1,R3,B2,D2)
+# define STMY(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x90)
+# define STMG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x24)
+/* STORE MULTIPLE HIGH */
+# define STMH(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x26)
+/* STORE PAIR TO QUADWORD */
+# define STPQ(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x8E)
+/* STORE REVERSED */
+# define STRVH(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x3F)
+# define STRV(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x3E)
+# define STRVG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x2F)
+/* SUBTRACT */
+# define SR(R1,R2) RR_(0x1B,R1,R2)
+# define SGR(R1,R2) RRE_(0xB909,R1,R2)
+# define SGFR(R1,R2) RRE_(0xB919,R1,R2)
+# define S(R1,D2,X2,B2) RX_(0x5B,R1,X2,B2,D2)
+# define SY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x5B)
+# define SG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x09)
+# define SGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x19)
+/* SUBTRACT HALFWORD */
+# define SH(R1,D2,X2,B2) RX_(0x4B,R1,X2,B2,D2)
+# define SHY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x7B)
+/* SUBTRACT LOGICAL */
+# define SLR(R1,R2) RR_(0x1F,R1,R2)
+# define SLGR(R1,R2) RRE_(0xB90B,R1,R2)
+# define SLGFR(R1,R2) RRE_(0xB91B,R1,R2)
+# define SL(R1,D2,X2,B2) RX_(0x5F,R1,X2,B2,D2)
+# define SLY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x5F)
+# define SLG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x0B)
+# define SLGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x1B)
+/* SUBTRACT LOGICAL WITH BORROW */
+# define SLBR(R1,R2) RRE_(0xB999,R1,R2)
+# define SLBGR(R1,R2) RRE_(0xB989,R1,R2)
+# define SLB(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x99)
+# define SLBG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x89)
+/* SUPERVISOR CALL */
+# define SVC(I) I_(0xA,I)
+/* TEST ADDRESSING MODE */
+# define TAM() E_(0x10B)
+/* TEST AND SET */
+# define TS(D2,B2) RS_(0x93,0,0,B2,D2)
+/* TEST UNDER MASK (TEST UNDER MASK HIGH, TEST UNDER MASK LOW) */
+# define TM(D1,B1,I2) SI_(0x91,I2,B1,D1)
+# define TMY(D1,B1,I2) SIY_(0xEB,I2,B1,D1,0x51)
+# define TMHH(R1,I2) RI_(0xA7,R1,0x2,I2)
+# define TMHL(R1,I2) RI_(0xA7,R1,0x3,I2)
+# define TMLH(R1,I2) RI_(0xA7,R1,0x0,I2)
+# define TMH(R1,I2) TMLH(R1,I2)
+# define TMLL(R1,I2) RI_(0xA7,R1,0x1,I2)
+# define TML(R1,I2) TMLL(R1,I2)
+/* TRANSLATE */
+# define TR(D1,L,B1,D2,B2) SSL_(0xDC,L,B1,D1,B2,D2)
+/* TRANSLATE AND TEST */
+# define TRT(D1,L,B1,D2,B2) SSL_(0xDD,L,B1,D1,B2,D2)
+/* TRANSLATE EXTENDED */
+# define TRE(R1,R2) RRE_(0xB2A5,R1,R2)
+/* TRANSLATE ONE TO ONE */
+# define TROO(R1,R2) RRE_(0xB993,R1,R2)
+/* TRANSLATE ONE TO TWO */
+# define TROT(R1,R2) RRE_(0xB992,R1,R2)
+/* TRANSLATE TWO TO ONE */
+# define TRTO(R1,R2) RRE_(0xB991,R1,R2)
+/* TRANSLATE TWO TO TWO */
+# define TRTT(R1,R2) RRE_(0xB990,R1,R2)
+/* UNPACK */
+# define UNPK(D1,L1,B1,D2,L2,B2) SS_(0xF3,L1,L2,B1,D1,B2,D2)
+/* UNPACK ASCII */
+# define UNPKA(D1,L1,B1,D2,L2,B2) SS_(0xEA,L1,L2,B1,D1,B2,D2)
+/* UNPACK UNICODE */
+# define UNPKU(D1,L1,B1,D2,L2,B2) SS_(0xE2,L1,L2,B1,D1,B2,D2)
+/* UPDATE TREE */
+# define UPT() E_(0x0102)
+/****************************************************************
+ * Decimal Instructions *
+ ****************************************************************/
+/* ADD DECIMAL */
+# define AP(D1,L1,B1,D2,L2,B2) SS_(0xFA,L1,L2,B1,D1,B2,D2)
+/* COMPARE DECIMAL */
+# define CP(D1,L1,B1,D2,L2,B2) SS_(0xF9,L1,L2,B1,D1,B2,D2)
+/* DIVIDE DECIMAL */
+# define DP(D1,L1,B1,D2,L2,B2) SS_(0xFD,L1,L2,B1,D1,B2,D2)
+/* EDIT */
+# define ED(D1,L,B1,D2,B2) SSL_(0xDE,L,B1,D1,B2,D2)
+/* EDIT AND MARK */
+# define EDMK(D1,L,B1,D2,B2) SSL_(0xDE,L,B1,D1,B2,D2)
+/* MULTIPLY DECIMAL */
+# define MP(D1,L1,B1,D2,L2,B2) SS_(0xFC,L1,L2,B1,D1,B2,D2)
+/* SHIFT AND ROUND DECIMAL */
+# define SRP(D1,L1,B1,D2,L2,B2) SS_(0xF0,L1,L2,B1,D1,B2,D2)
+/* SUBTRACE DECIMAL */
+# define SP(D1,L1,B1,D2,L2,B2) SS_(0xFB,L1,L2,B1,D1,B2,D2)
+/* TEST DECIMAL */
+# define TP(D1,L1,B1) RSL_(0xEB,L1,B1,D1,0xC0)
+/* ZERO AND ADD */
+# define ZAP(D1,L1,B1,D2,L2,B2) SS_(0xF8,L1,L2,B1,D1,B2,D2)
+/****************************************************************
+ * Control Instructions *
+ ****************************************************************/
+/* BRANCH AND SET AUTHORITY */
+# define BSA(R1,R2) RRE_(0xB25A,R1,R2)
+/* BRANCH AND STACK */
+# define BAKR(R1,R2) RRE_(0xB240,R1,R2)
+/* BRANCH IN SUBSPACE GROUP */
+# define BSG(R1,R2) RRE_(0xB258,R1,R2)
+/* COMPARE AND SWAP AND PURGE */
+# define CSP(R1,R2) RRE_(0xB250,R1,R2)
+# define CSPG(R1,R2) RRE_(0xB98A,R1,R2)
+/* DIAGNOSE */
+# define DIAG() SI_(0x83,0,0,0)
+/* EXTRACT AND SET EXTENDED AUTHORITY */
+# define ESEA(R1) RRE_(0xB99D,R1,0)
+/* EXTRACT PRIMARY ASN */
+# define EPAR(R1) RRE_(0xB226,R1,0)
+/* EXTRACT SECONDARY ASN */
+# define ESAR(R1) RRE_(0xB227,R1,0)
+/* EXTRACT STACKED REGISTERS */
+# define EREG(R1,R2) RRE_(0xB249,R1,R2)
+# define EREGG(R1,R2) RRE_(0xB90E,R1,R2)
+/* EXTRACT STACKED STATE */
+# define ESTA(R1,R2) RRE_(0xB24A,R1,R2)
+/* INSERT ADDRESS SPACE CONTROL */
+# define IAC(R1) RRE_(0xB224,R1,0)
+/* INSERT PSW KEY */
+# define IPK() S_(0xB20B,0,0)
+/* INSERT STORAGE KEY EXTENDED */
+# define ISKE(R1,R2) RRE_(0xB229,R1,R2)
+/* INSERT VIRTUAL STORAGE KEY */
+# define IVSK(R1,R2) RRE_(0xB223,R1,R2)
+/* INVALIDATE DAT TABLE ENTRY */
+# define IDTE(R1,R2,R3) RRF_(0xB98E,R3,0,R1,R2)
+/* INVALIDATE PAGE TABLE ENTRY */
+# define IPTE(R1,R2) RRE_(0xB221,R1,R2)
+/* LOAD ADDRESS SPACE PARAMETER */
+# define LASP(D1,B1,D2,B2) SSE_(0xE500,B1,D1,B2,D2)
+/* LOAD CONTROL */
+# define LCTL(R1,R3,D2,B2) RS_(0xB7,R1,R3,B2,D2)
+# define LCTLG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x2F)
+/* LOAD PSW */
+# define LPSW(D2,B2) SI_(0x82,0,B2,D2)
+/* LOAD PSW EXTENDED */
+# define LPSWE(D2,B2) S_(0xB2B2,B2,D2)
+/* LOAD REAL ADDRESS */
+# define LRA(R1,D2,X2,B2) RX_(0xB1,R1,X2,B2,D2)
+# define LRAY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x13)
+# define LRAG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x03)
+/* LOAD USING REAL ADDRESS */
+# define LURA(R1,R2) RRE_(0xB24B,R1,R2)
+# define LURAG(R1,R2) RRE_(0xB905,R1,R2)
+/* MODIFY STACKED STATE */
+# define MSTA(R1) RRE_(0xB247,R1,0)
+/* MOVE PAGE */
+# define MVPG(R1,R2) RRE_(0xB254,R1,R2)
+/* MOVE TO PRIMARY */
+# define MVCP(D1,R1,B1,D2,B2,R3) SS_(0xDA,R1,R3,B1,D1,B2,D2)
+/* MOVE TO SECONDARY */
+# define MVCS(D1,R1,B1,D2,B2,R3) SS_(0xDB,R1,R3,B1,D1,B2,D2)
+/* MOVE WITH DESTINATION KEY */
+# define MVCDK(D1,B1,D2,B2) SSE_(0xE50F,B1,D1,B2,D2)
+/* MOVE WITH KEY */
+# define MVCK(D1,R1,B1,D2,B2,R3) SS_(0xD9,R1,R3,B1,D1,B2,D2)
+/* MOVE WITH SOURCE KEY */
+# define MVCSK(D1,B1,D2,B2) SSE_(0xE50E,B1,D1,B2,D2)
+/* PAGE IN */
+# define PGIN(R1,R2) RRE_(0xB22E,R1,R2)
+/* PAGE OUT */
+# define PGOUT(R1,R2) RRE_(0xB22F,R1,R2)
+/* PROGRAM CALL */
+# define PC(D2,B2) S_(0xB218,B2,D2)
+/* PROGRAM RETURN */
+# define PR() E_(0x0101)
+/* PROGRAM TRANSFER */
+# define PT(R1,R2) RRE_(0xB228,R1,R2)
+/* PURGE ALB */
+# define PALB() RRE_(0xB248,0,0)
+/* PURGE TLB */
+# define PTLB() S_(0xB20D,0,0)
+/* RESET REFERENCE BIT EXTENDED */
+# define RRBE(R1,R2) RRE_(0xB22A,R1,R2)
+/* RESUME PROGRAM */
+# define RP(D2,B2) S_(0xB277,B2,D2)
+/* SET ADDRESS SPACE CONTROL */
+# define SAC(D2,B2) S_(0xB219,B2,D2)
+/* SET ADDRESS SPACE CONTROL FAST */
+# define SACF(D2,B2) S_(0xB279,B2,D2)
+/* SET CLOCK */
+# define SCK(D2,B2) S_(0xB204,B2,D2)
+/* SET CLOCK COMPARATOR */
+# define SCKC(D2,B2) S_(0xB206,B2,D2)
+/* SET CLOCK PROGRAMMABLE FIELD */
+# define SCKPF() E_(0x0107)
+/* SET CPU TIMER */
+# define SPT(D2,B2) S_(0xB208,B2,D2)
+/* SET PREFIX */
+# define SPX(D2,B2) S_(0xB210,B2,D2)
+/* SET PSW FROM ADDRESS */
+# define SPKA(D2,B2) S_(0xB20A,B2,D2)
+/* SET SECONDARY ASN */
+# define SSAR(R1) RRE_(0xB225,R1,0)
+/* SET STORAGE KEY EXTENDED */
+# define SSKE(R1,R2) RRE_(0xB22B,R1,R2)
+/* SET SYSTEM MASK */
+# define SSM(D2,B2) SI_(0x80,0,B2,D2)
+/* SIGNAL PROCESSOR */
+# define SIGP(R1,R3,D2,B2) RS_(0xAE,R1,R3,B2,D2)
+/* STORE CLOCK COMPARATOR */
+# define STCKC(D2,B2) S_(0xB207,B2,D2)
+/* STORE CONTROL */
+# define STCTL(R1,R3,D2,B2) RS_(0xB6,R1,R3,B2,D2)
+# define STCTG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x25)
+/* STORE CPU ADDRESS */
+# define STAP(D2,B2) S_(0xB212,B2,D2)
+/* STORE CPU ID */
+# define STIDP(D2,B2) S_(0xB202,B2,D2)
+/* STORE CPU TIMER */
+# define STPT(D2,B2) S_(0xB209,B2,D2)
+/* STORE FACILITY LIST */
+# define STFL(D2,B2) S_(0xB2B1,B2,D2)
+/* STORE PREFIX */
+# define STPX(D2,B2) S_(0xB211,B2,D2)
+/* STORE REAL ADDRES */
+# define STRAG(D1,B1,D2,B2) SSE_(0xE502,B1,D1,B2,D2)
+/* STORE SYSTEM INFORMATION */
+# define STSI(D2,B2) S_(0xB27D,B2,D2)
+/* STORE THEN AND SYSTEM MASK */
+# define STNSM(D1,B1,I2) SI_(0xAC,I2,B1,D1)
+/* STORE THEN OR SYSTEM MASK */
+# define STOSM(D1,B1,I2) SI_(0xAD,I2,B1,D1)
+/* STORE USING REAL ADDRESS */
+# define STURA(R1,R2) RRE_(0xB246,R1,R2)
+# define STURG(R1,R2) RRE_(0xB925,R1,R2)
+/* TEST ACCESS */
+# define TAR(R1,R2) RRE_(0xB24C,R1,R2)
+/* TEST BLOCK */
+# define TB(R1,R2) RRE_(0xB22C,R1,R2)
+/* TEST PROTECTION */
+# define TPROT(D1,B1,D2,B2) SSE_(0xE501,B1,D1,B2,D2)
+/* TRACE */
+# define TRACE(R1,R3,D2,B2) RS_(0x99,R1,R3,B2,D2)
+# define TRACG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x0F)
+/* TRAP */
+# define TRAP2() E_(0x01FF)
+# define TRAP4(D2,B2) S_(0xB2FF,B2,D2)
+/****************************************************************
+ * I/O Instructions *
+ ****************************************************************/
+/* CANCEL SUBCHANNEL */
+# define XSCH() S_(0xB276,0,0)
+/* CLEAR SUBCHANNEL */
+# define CSCH() S_(0xB230,0,0)
+/* HALT SUBCHANNEL */
+# define HSCH() S_(0xB231,0,0)
+/* MODIFY SUBCHANNEL */
+# define MSCH(D2,B2) S_(0xB232,B2,D2)
+/* RESET CHANNEL PATH */
+# define RCHP() S_(0xB23B,0,0)
+/* RESUME SUBCHANNEL */
+# define RSCH() S_(0xB238,0,0)
+/* SET ADDRESS LIMIT */
+# define SAL() S_(0xB237,0,0)
+/* SET CHANNEL MONITOR */
+# define SCHM() S_(0xB23C,0,0)
+/* START SUBCHANNEL */
+# define SSCH(D2,B2) S_(0xB233,B2,D2)
+/* STORE CHANNEL PATH STATUS */
+# define STCPS(D2,B2) S_(0xB23A,B2,D2)
+/* STORE CHANNEL REPORT WORD */
+# define STCRW(D2,B2) S_(0xB239,B2,D2)
+/* STORE SUBCHANNEL */
+# define STSCH(D2,B2) S_(0xB234,B2,D2)
+/* TEST PENDING INTERRUPTION */
+# define TPI(D2,B2) S_(0xB236,B2,D2)
+/* TEST SUBCHANNEL */
+# define TSCH(D2,B2) S_(0xB235,B2,D2)
+# define xdivr(r0,r1) _xdivr(_jit,r0,r1)
+static int32_t _xdivr(jit_state_t*,int32_t,int32_t);
+# define xdivr_u(r0,r1) _xdivr_u(_jit,r0,r1)
+static int32_t _xdivr_u(jit_state_t*,int32_t,int32_t);
+# define xdivi(r0,i0) _xdivi(_jit,r0,i0)
+static int32_t _xdivi(jit_state_t*,int32_t,jit_word_t);
+# define xdivi_u(r0,i0) _xdivi_u(_jit,r0,i0)
+static int32_t _xdivi_u(jit_state_t*,int32_t,jit_word_t);
+# define crr(cc,r0,r1,r2) _crr(_jit,cc,r0,r1,r2)
+static void _crr(jit_state_t*,
+ int32_t,int32_t,int32_t,int32_t);
+# define cri(cc,r0,r1,i0) _cri(_jit,cc,r0,r1,i0)
+static void _cri(jit_state_t*,
+ int32_t,int32_t,int32_t,jit_word_t);
+# define crr_u(cc,r0,r1,r2) _crr_u(_jit,cc,r0,r1,r2)
+static void _crr_u(jit_state_t*,
+ int32_t,int32_t,int32_t,int32_t);
+# define cri_u(cc,r0,r1,i0) _cri_u(_jit,cc,r0,r1,i0)
+static void _cri_u(jit_state_t*,
+ int32_t,int32_t,int32_t,jit_word_t);
+# define brr(cc,i0,r0,r1) _brr(_jit,cc,i0,r0,r1)
+static void _brr(jit_state_t*,int32_t,
+ jit_word_t,int32_t,int32_t);
+# define brr_p(cc,i0,r0,r1) _brr_p(_jit,cc,i0,r0,r1)
+static jit_word_t _brr_p(jit_state_t*,int32_t,
+ jit_word_t,int32_t,int32_t);
+# define bri(cc,i0,r0,i1) _bri(_jit,cc,i0,r0,i1)
+static void _bri(jit_state_t*,int32_t,
+ jit_word_t,int32_t,jit_word_t);
+# define bri_p(cc,i0,r0,i1) _bri_p(_jit,cc,i0,r0,i1)
+static jit_word_t _bri_p(jit_state_t*,int32_t,
+ jit_word_t,int32_t,jit_word_t);
+# define brr_u(cc,i0,r0,r1) _brr_u(_jit,cc,i0,r0,r1)
+static void _brr_u(jit_state_t*,int32_t,
+ jit_word_t,int32_t,int32_t);
+# define brr_u_p(cc,i0,r0,r1) _brr_u_p(_jit,cc,i0,r0,r1)
+static jit_word_t _brr_u_p(jit_state_t*,int32_t,
+ jit_word_t,int32_t,int32_t);
+# define bri_u(cc,i0,r0,i1) _bri_u(_jit,cc,i0,r0,i1)
+static void _bri_u(jit_state_t*,int32_t,
+ jit_word_t,int32_t,jit_word_t);
+# define bri_u_p(cc,i0,r0,i1) _bri_u_p(_jit,cc,i0,r0,i1)
+static jit_word_t _bri_u_p(jit_state_t*,int32_t,
+ jit_word_t,int32_t,jit_word_t);
+# define baddr(c,s,i0,r0,r1) _baddr(_jit,c,s,i0,r0,r1)
+static void _baddr(jit_state_t*,int32_t,jit_bool_t,
+ jit_word_t,int32_t,int32_t);
+# define baddr_p(c,s,i0,r0,r1) _baddr_p(_jit,c,s,i0,r0,r1)
+static jit_word_t _baddr_p(jit_state_t*,int32_t,jit_bool_t,
+ jit_word_t,int32_t,int32_t);
+# define baddi(c,s,i0,r0,i1) _baddi(_jit,c,s,i0,r0,i1)
+static void _baddi(jit_state_t*,int32_t,jit_bool_t,
+ jit_word_t,int32_t,jit_word_t);
+# define baddi_p(c,s,i0,r0,i1) _baddi_p(_jit,c,s,i0,r0,i1)
+static jit_word_t _baddi_p(jit_state_t*,int32_t,jit_bool_t,
+ jit_word_t,int32_t,jit_word_t);
+# define bsubr(c,s,i0,r0,r1) _bsubr(_jit,c,s,i0,r0,r1)
+static void _bsubr(jit_state_t*,int32_t,jit_bool_t,
+ jit_word_t,int32_t,int32_t);
+# define bsubr_p(c,s,i0,r0,r1) _bsubr_p(_jit,c,s,i0,r0,r1)
+static jit_word_t _bsubr_p(jit_state_t*,int32_t,jit_bool_t,
+ jit_word_t,int32_t,int32_t);
+# define bsubi(c,s,i0,r0,i1) _bsubi(_jit,c,s,i0,r0,i1)
+static void _bsubi(jit_state_t*,int32_t,jit_bool_t,
+ jit_word_t,int32_t,jit_word_t);
+# define bsubi_p(c,s,i0,r0,i1) _bsubi_p(_jit,c,s,i0,r0,i1)
+static jit_word_t _bsubi_p(jit_state_t*,int32_t,jit_bool_t,
+ jit_word_t,int32_t,jit_word_t);
+# define bmxr(cc,i0,r0,r1) _bmxr(_jit,cc,i0,r0,r1)
+static void _bmxr(jit_state_t*,int32_t,
+ jit_word_t,int32_t,int32_t);
+# define bmxr_p(cc,i0,r0,r1) _bmxr_p(_jit,cc,i0,r0,r1)
+static jit_word_t _bmxr_p(jit_state_t*,int32_t,
+ jit_word_t,int32_t,int32_t);
+# define bmxi(cc,i0,r0,i1) _bmxi(_jit,cc,i0,r0,i1)
+static void _bmxi(jit_state_t*,int32_t,
+ jit_word_t,int32_t,jit_word_t);
+# define bmxi_p(cc,i0,r0,i1) _bmxi_p(_jit,cc,i0,r0,i1)
+static jit_word_t _bmxi_p(jit_state_t*,int32_t,
+ jit_word_t,int32_t,jit_word_t);
+# define movr(r0,r1) _movr(_jit,r0,r1)
+static void _movr(jit_state_t*,int32_t,int32_t);
+# define movi(r0,i0) _movi(_jit,r0,i0)
+static void _movi(jit_state_t*,int32_t,jit_word_t);
+# define movi_p(r0,i0) _movi_p(_jit,r0,i0)
+static jit_word_t _movi_p(jit_state_t*,int32_t,jit_word_t);
+# define addr(r0,r1,r2) _addr(_jit,r0,r1,r2)
+static void _addr(jit_state_t*,int32_t,int32_t,int32_t);
+# define addi(r0,r1,i0) _addi(_jit,r0,r1,i0)
+static void _addi(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define addcr(r0,r1,r2) _addcr(_jit,r0,r1,r2)
+static void _addcr(jit_state_t*,int32_t,int32_t,int32_t);
+# define addci(r0,r1,i0) _addci(_jit,r0,r1,i0)
+static void _addci(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define addxr(r0,r1,r2) _addxr(_jit,r0,r1,r2)
+static void _addxr(jit_state_t*,int32_t,int32_t,int32_t);
+# define addxi(r0,r1,i0) _addxi(_jit,r0,r1,i0)
+static void _addxi(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define subr(r0,r1,r2) _subr(_jit,r0,r1,r2)
+static void _subr(jit_state_t*,int32_t,int32_t,int32_t);
+# define subi(r0,r1,i0) _subi(_jit,r0,r1,i0)
+static void _subi(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define subcr(r0,r1,r2) _subcr(_jit,r0,r1,r2)
+static void _subcr(jit_state_t*,int32_t,int32_t,int32_t);
+# define subci(r0,r1,i0) _subci(_jit,r0,r1,i0)
+static void _subci(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define subxr(r0,r1,r2) _subxr(_jit,r0,r1,r2)
+static void _subxr(jit_state_t*,int32_t,int32_t,int32_t);
+# define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0)
+static void _subxi(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0)
+static void _rsbi(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define mulr(r0,r1,r2) _mulr(_jit,r0,r1,r2)
+static void _mulr(jit_state_t*,int32_t,int32_t,int32_t);
+# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
+static void _muli(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3)
+static void _qmulr(jit_state_t*,int32_t,
+ int32_t,int32_t,int32_t);
+# define qmuli(r0,r1,r2,i0) _qmuli(_jit,r0,r1,r2,i0)
+static void _qmuli(jit_state_t*,int32_t,
+ int32_t,int32_t,jit_word_t);
+# define qmulr_u(r0,r1,r2,r3) _qmulr_u(_jit,r0,r1,r2,r3)
+static void _qmulr_u(jit_state_t*,int32_t,
+ int32_t,int32_t,int32_t);
+# define qmuli_u(r0,r1,r2,i0) _qmuli_u(_jit,r0,r1,r2,i0)
+static void _qmuli_u(jit_state_t*,int32_t,
+ int32_t,int32_t,jit_word_t);
+# define divr(r0,r1,r2) _divr(_jit,r0,r1,r2)
+static void _divr(jit_state_t*,int32_t,int32_t,int32_t);
+# define divi(r0,r1,i0) _divi(_jit,r0,r1,i0)
+static void _divi(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define divr_u(r0,r1,r2) _divr_u(_jit,r0,r1,r2)
+static void _divr_u(jit_state_t*,int32_t,int32_t,int32_t);
+# define divi_u(r0,r1,i0) _divi_u(_jit,r0,r1,i0)
+static void _divi_u(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define remr(r0,r1,r2) _remr(_jit,r0,r1,r2)
+static void _remr(jit_state_t*,int32_t,int32_t,int32_t);
+# define remi(r0,r1,i0) _remi(_jit,r0,r1,i0)
+static void _remi(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define remr_u(r0,r1,r2) _remr_u(_jit,r0,r1,r2)
+static void _remr_u(jit_state_t*,int32_t,int32_t,int32_t);
+# define remi_u(r0,r1,i0) _remi_u(_jit,r0,r1,i0)
+static void _remi_u(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define qdivr(r0,r1,r2,r3) _qdivr(_jit,r0,r1,r2,r3)
+static void _qdivr(jit_state_t*,int32_t,
+ int32_t,int32_t,int32_t);
+# define qdivi(r0,r1,r2,i0) _qdivi(_jit,r0,r1,r2,i0)
+static void _qdivi(jit_state_t*,int32_t,
+ int32_t,int32_t,jit_word_t);
+# define qdivr_u(r0,r1,r2,r3) _qdivr_u(_jit,r0,r1,r2,r3)
+static void _qdivr_u(jit_state_t*,int32_t,
+ int32_t,int32_t,int32_t);
+# define qdivi_u(r0,r1,r2,i0) _qdivi_u(_jit,r0,r1,r2,i0)
+static void _qdivi_u(jit_state_t*,int32_t,
+ int32_t,int32_t,jit_word_t);
+# if __WORDSIZE == 32
+# define lshr(r0,r1,r2) _lshr(_jit,r0,r1,r2)
+static void _lshr(jit_state_t*,int32_t,int32_t,int32_t);
+# else
+# define lshr(r0,r1,r2) SLLG(r0,r1,0,r2)
+# endif
+# define lshi(r0,r1,i0) _lshi(_jit,r0,r1,i0)
+static void _lshi(jit_state_t*,int32_t,int32_t,jit_word_t);
+# if __WORDSIZE == 32
+# define rshr(r0,r1,r2) _rshr(_jit,r0,r1,r2)
+static void _rshr(jit_state_t*,int32_t,int32_t,int32_t);
+# else
+# define rshr(r0,r1,r2) SRAG(r0,r1,0,r2)
+# endif
+# define rshi(r0,r1,i0) _rshi(_jit,r0,r1,i0)
+static void _rshi(jit_state_t*,int32_t,int32_t,jit_word_t);
+# if __WORDSIZE == 32
+# define rshr_u(r0,r1,r2) _rshr_u(_jit,r0,r1,r2)
+static void _rshr_u(jit_state_t*,int32_t,int32_t,int32_t);
+# else
+# define rshr_u(r0,r1,r2) SRLG(r0,r1,0,r2)
+# endif
+# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0)
+static void _rshi_u(jit_state_t*,int32_t,int32_t,jit_word_t);
+# if __WORDSIZE == 32
+# define negr(r0,r1) LCR(r0,r1)
+# else
+# define negr(r0,r1) LCGR(r0,r1)
+# endif
+# define comr(r0,r1) _comr(_jit,r0,r1)
+static void _comr(jit_state_t*,int32_t,int32_t);
+# define andr(r0,r1,r2) _andr(_jit,r0,r1,r2)
+static void _andr(jit_state_t*,int32_t,int32_t,int32_t);
+# define andi(r0,r1,i0) _andi(_jit,r0,r1,i0)
+static void _andi(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define orr(r0,r1,r2) _orr(_jit,r0,r1,r2)
+static void _orr(jit_state_t*,int32_t,int32_t,int32_t);
+# define ori(r0,r1,i0) _ori(_jit,r0,r1,i0)
+static void _ori(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define xorr(r0,r1,r2) _xorr(_jit,r0,r1,r2)
+static void _xorr(jit_state_t*,int32_t,int32_t,int32_t);
+# define xori(r0,r1,i0) _xori(_jit,r0,r1,i0)
+static void _xori(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define bswapr_us(r0,r1) extr_us(r0,r1)
+# if __WORDSIZE == 32
+# define bswapr_ui(r0,r1) movr(r0,r1)
+# else
+# define bswapr_ui(r0,r1) extr_ui(r0,r1)
+# define bswapr, 2019_ul(r0,r1) movr(r0,r1)
+# endif
+# define extr_c(r0,r1) LGBR(r0,r1)
+# define extr_uc(r0,r1) LLGCR(r0,r1)
+# define extr_s(r0,r1) LGHR(r0,r1)
+# define extr_us(r0,r1) LLGHR(r0,r1)
+# if __WORDSIZE == 64
+# define extr_i(r0,r1) LGFR(r0,r1)
+# define extr_ui(r0,r1) LLGFR(r0,r1)
+# endif
+# define ldr_c(r0,r1) LGB(r0,0,0,r1)
+# define ldi_c(r0,i0) _ldi_c(_jit,r0,i0)
+static void _ldi_c(jit_state_t*,int32_t,jit_word_t);
+# define ldxr_c(r0,r1,r2) _ldxr_c(_jit,r0,r1,r2)
+static void _ldxr_c(jit_state_t*,int32_t,int32_t,int32_t);
+# define ldxi_c(r0,r1,i0) _ldxi_c(_jit,r0,r1,i0)
+static void _ldxi_c(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define ldr_uc(r0,r1) LLGC(r0,0,0,r1)
+# define ldi_uc(r0,i0) _ldi_uc(_jit,r0,i0)
+static void _ldi_uc(jit_state_t*,int32_t,jit_word_t);
+# define ldxr_uc(r0,r1,r2) _ldxr_uc(_jit,r0,r1,r2)
+static void _ldxr_uc(jit_state_t*,int32_t,int32_t,int32_t);
+# define ldxi_uc(r0,r1,i0) _ldxi_uc(_jit,r0,r1,i0)
+static void _ldxi_uc(jit_state_t*,int32_t,int32_t,jit_word_t);
+# if __WORDSIZE == 32
+# define ldr_s(r0,r1) LH(r0,0,0,r1)
+# else
+# define ldr_s(r0,r1) LGH(r0,0,0,r1)
+# endif
+# define ldi_s(r0,i0) _ldi_s(_jit,r0,i0)
+static void _ldi_s(jit_state_t*,int32_t,jit_word_t);
+# define ldxr_s(r0,r1,r2) _ldxr_s(_jit,r0,r1,r2)
+static void _ldxr_s(jit_state_t*,int32_t,int32_t,int32_t);
+# define ldxi_s(r0,r1,i0) _ldxi_s(_jit,r0,r1,i0)
+static void _ldxi_s(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define ldr_us(r0,r1) LLGH(r0,0,0,r1)
+# define ldi_us(r0,i0) _ldi_us(_jit,r0,i0)
+static void _ldi_us(jit_state_t*,int32_t,jit_word_t);
+# define ldxr_us(r0,r1,r2) _ldxr_us(_jit,r0,r1,r2)
+static void _ldxr_us(jit_state_t*,int32_t,int32_t,int32_t);
+# define ldxi_us(r0,r1,i0) _ldxi_us(_jit,r0,r1,i0)
+static void _ldxi_us(jit_state_t*,int32_t,int32_t,jit_word_t);
+# if __WORDSIZE == 32
+# define ldr_i(r0,r1) LLGF(r0,0,0,r1)
+# else
+# define ldr_i(r0,r1) LGF(r0,0,0,r1)
+# endif
+# define ldi_i(r0,i0) _ldi_i(_jit,r0,i0)
+static void _ldi_i(jit_state_t*,int32_t,jit_word_t);
+# define ldxr_i(r0,r1,r2) _ldxr_i(_jit,r0,r1,r2)
+static void _ldxr_i(jit_state_t*,int32_t,int32_t,int32_t);
+# define ldxi_i(r0,r1,i0) _ldxi_i(_jit,r0,r1,i0)
+static void _ldxi_i(jit_state_t*,int32_t,int32_t,jit_word_t);
+# if __WORDSIZE == 64
+# define ldr_ui(r0,r1) LLGF(r0,0,0,r1)
+# define ldi_ui(r0,i0) _ldi_ui(_jit,r0,i0)
+static void _ldi_ui(jit_state_t*,int32_t,jit_word_t);
+# define ldxr_ui(r0,r1,r2) _ldxr_ui(_jit,r0,r1,r2)
+static void _ldxr_ui(jit_state_t*,int32_t,int32_t,int32_t);
+# define ldxi_ui(r0,r1,i0) _ldxi_ui(_jit,r0,r1,i0)
+static void _ldxi_ui(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define ldr_l(r0,r1) LG(r0,0,0,r1)
+# define ldi_l(r0,i0) _ldi_l(_jit,r0,i0)
+static void _ldi_l(jit_state_t*,int32_t,jit_word_t);
+# define ldxr_l(r0,r1,r2) _ldxr_l(_jit,r0,r1,r2)
+static void _ldxr_l(jit_state_t*,int32_t,int32_t,int32_t);
+# define ldxi_l(r0,r1,i0) _ldxi_l(_jit,r0,r1,i0)
+static void _ldxi_l(jit_state_t*,int32_t,int32_t,jit_word_t);
+# endif
+# define str_c(r0,r1) STC(r1,0,0,r0)
+# define sti_c(i0,r0) _sti_c(_jit,i0,r0)
+static void _sti_c(jit_state_t*,jit_word_t,int32_t);
+# define stxr_c(r0,r1,r2) _stxr_c(_jit,r0,r1,r2)
+static void _stxr_c(jit_state_t*,int32_t,int32_t,int32_t);
+# define stxi_c(i0,r0,r1) _stxi_c(_jit,i0,r0,r1)
+static void _stxi_c(jit_state_t*,jit_word_t,int32_t,int32_t);
+# define str_s(r0,r1) STH(r1,0,0,r0)
+# define sti_s(i0,r0) _sti_s(_jit,i0,r0)
+static void _sti_s(jit_state_t*,jit_word_t,int32_t);
+# define stxr_s(r0,r1,r2) _stxr_s(_jit,r0,r1,r2)
+static void _stxr_s(jit_state_t*,int32_t,int32_t,int32_t);
+# define stxi_s(i0,r0,r1) _stxi_s(_jit,i0,r0,r1)
+static void _stxi_s(jit_state_t*,jit_word_t,int32_t,int32_t);
+# define str_i(r0,r1) ST(r1,0,0,r0)
+# define sti_i(i0,r0) _sti_i(_jit,i0,r0)
+static void _sti_i(jit_state_t*,jit_word_t,int32_t);
+# define stxr_i(r0,r1,r2) _stxr_i(_jit,r0,r1,r2)
+static void _stxr_i(jit_state_t*,int32_t,int32_t,int32_t);
+# define stxi_i(i0,r0,r1) _stxi_i(_jit,i0,r0,r1)
+static void _stxi_i(jit_state_t*,jit_word_t,int32_t,int32_t);
+# if __WORDSIZE == 64
+# define str_l(r0,r1) STG(r1,0,0,r0)
+# define sti_l(i0,r0) _sti_l(_jit,i0,r0)
+static void _sti_l(jit_state_t*,jit_word_t,int32_t);
+# define stxr_l(r0,r1,r2) _stxr_l(_jit,r0,r1,r2)
+static void _stxr_l(jit_state_t*,int32_t,int32_t,int32_t);
+# define stxi_l(i0,r0,r1) _stxi_l(_jit,i0,r0,r1)
+static void _stxi_l(jit_state_t*,jit_word_t,int32_t,int32_t);
+# endif
+# define ltr(r0,r1,r2) crr(CC_L,r0,r1,r2)
+# define lti(r0,r1,i0) cri(CC_L,r0,r1,i0)
+# define ltr_u(r0,r1,r2) crr_u(CC_L,r0,r1,r2)
+# define lti_u(r0,r1,i0) cri_u(CC_L,r0,r1,i0)
+# define ler(r0,r1,r2) crr(CC_LE,r0,r1,r2)
+# define lei(r0,r1,i0) cri(CC_LE,r0,r1,i0)
+# define ler_u(r0,r1,r2) crr_u(CC_LE,r0,r1,r2)
+# define lei_u(r0,r1,i0) cri_u(CC_LE,r0,r1,i0)
+# define eqr(r0,r1,r2) crr(CC_E,r0,r1,r2)
+# define eqi(r0,r1,i0) cri(CC_E,r0,r1,i0)
+# define ger(r0,r1,r2) crr(CC_HE,r0,r1,r2)
+# define gei(r0,r1,i0) cri(CC_HE,r0,r1,i0)
+# define ger_u(r0,r1,r2) crr_u(CC_HE,r0,r1,r2)
+# define gei_u(r0,r1,i0) cri_u(CC_HE,r0,r1,i0)
+# define gtr(r0,r1,r2) crr(CC_H,r0,r1,r2)
+# define gti(r0,r1,i0) cri(CC_H,r0,r1,i0)
+# define gtr_u(r0,r1,r2) crr_u(CC_H,r0,r1,r2)
+# define gti_u(r0,r1,i0) cri_u(CC_H,r0,r1,i0)
+# define ner(r0,r1,r2) crr(CC_NE,r0,r1,r2)
+# define nei(r0,r1,i0) cri(CC_NE,r0,r1,i0)
+# define bltr(i0,r0,r1) brr(CC_L,i0,r0,r1)
+# define bltr_p(i0,r0,r1) brr_p(CC_L,i0,r0,r1)
+# define blti(i0,r0,i1) bri(CC_L,i0,r0,i1)
+# define blti_p(i0,r0,i1) bri_p(CC_L,i0,r0,i1)
+# define bltr_u(i0,r0,r1) brr_u(CC_L,i0,r0,r1)
+# define bltr_u_p(i0,r0,r1) brr_u_p(CC_L,i0,r0,r1)
+# define blti_u(i0,r0,i1) bri_u(CC_L,i0,r0,i1)
+# define blti_u_p(i0,r0,i1) bri_u_p(CC_L,i0,r0,i1)
+# define bler(i0,r0,r1) brr(CC_LE,i0,r0,r1)
+# define bler_p(i0,r0,r1) brr_p(CC_LE,i0,r0,r1)
+# define blei(i0,r0,i1) bri(CC_LE,i0,r0,i1)
+# define blei_p(i0,r0,i1) bri_p(CC_LE,i0,r0,i1)
+# define bler_u(i0,r0,r1) brr_u(CC_LE,i0,r0,r1)
+# define bler_u_p(i0,r0,r1) brr_u_p(CC_LE,i0,r0,r1)
+# define blei_u(i0,r0,i1) bri_u(CC_LE,i0,r0,i1)
+# define blei_u_p(i0,r0,i1) bri_u_p(CC_LE,i0,r0,i1)
+# define beqr(i0,r0,r1) brr(CC_E,i0,r0,r1)
+# define beqr_p(i0,r0,r1) brr_p(CC_E,i0,r0,r1)
+# define beqi(i0,r0,i1) bri(CC_E,i0,r0,i1)
+# define beqi_p(i0,r0,i1) bri_p(CC_E,i0,r0,i1)
+# define bger(i0,r0,r1) brr(CC_HE,i0,r0,r1)
+# define bger_p(i0,r0,r1) brr_p(CC_HE,i0,r0,r1)
+# define bgei(i0,r0,i1) bri(CC_HE,i0,r0,i1)
+# define bgei_p(i0,r0,i1) bri_p(CC_HE,i0,r0,i1)
+# define bger_u(i0,r0,r1) brr_u(CC_HE,i0,r0,r1)
+# define bger_u_p(i0,r0,r1) brr_u_p(CC_HE,i0,r0,r1)
+# define bgei_u(i0,r0,i1) bri_u(CC_HE,i0,r0,i1)
+# define bgei_u_p(i0,r0,i1) bri_u_p(CC_HE,i0,r0,i1)
+# define bgtr(i0,r0,r1) brr(CC_H,i0,r0,r1)
+# define bgtr_p(i0,r0,r1) brr_p(CC_H,i0,r0,r1)
+# define bgti(i0,r0,i1) bri(CC_H,i0,r0,i1)
+# define bgti_p(i0,r0,i1) bri_p(CC_H,i0,r0,i1)
+# define bgtr_u(i0,r0,r1) brr_u(CC_H,i0,r0,r1)
+# define bgtr_u_p(i0,r0,r1) brr_u_p(CC_H,i0,r0,r1)
+# define bgti_u(i0,r0,i1) bri_u(CC_H,i0,r0,i1)
+# define bgti_u_p(i0,r0,i1) bri_u_p(CC_H,i0,r0,i1)
+# define bner(i0,r0,r1) brr(CC_NE,i0,r0,r1)
+# define bner_p(i0,r0,r1) brr_p(CC_NE,i0,r0,r1)
+# define bnei(i0,r0,i1) bri(CC_NE,i0,r0,i1)
+# define bnei_p(i0,r0,i1) bri_p(CC_NE,i0,r0,i1)
+# define boaddr(i0,r0,r1) baddr(CC_O,1,i0,r0,r1)
+# define boaddr_p(i0,r0,r1) baddr_p(CC_O,1,i0,r0,r1)
+# define boaddi(i0,r0,i1) baddi(CC_O,1,i0,r0,i1)
+# define boaddi_p(i0,r0,i1) baddi_p(CC_O,1,i0,r0,i1)
+# define boaddr_u(i0,r0,r1) baddr(CC_NLE,0,i0,r0,r1)
+# define boaddr_u_p(i0,r0,r1) baddr_p(CC_NLE,0,i0,r0,r1)
+# define boaddi_u(i0,r0,i1) baddi(CC_NLE,0,i0,r0,i1)
+# define boaddi_u_p(i0,r0,i1) baddi_p(CC_NLE,0,i0,r0,i1)
+# define bxaddr(i0,r0,r1) baddr(CC_NO,1,i0,r0,r1)
+# define bxaddr_p(i0,r0,r1) baddr_p(CC_NO,1,i0,r0,r1)
+# define bxaddi(i0,r0,i1) baddi(CC_NO,1,i0,r0,i1)
+# define bxaddi_p(i0,r0,i1) baddi_p(CC_NO,1,i0,r0,i1)
+# define bxaddr_u(i0,r0,r1) baddr(CC_LE,0,i0,r0,r1)
+# define bxaddr_u_p(i0,r0,r1) baddr_p(CC_LE,0,i0,r0,r1)
+# define bxaddi_u(i0,r0,i1) baddi(CC_LE,0,i0,r0,i1)
+# define bxaddi_u_p(i0,r0,i1) baddi_p(CC_LE,0,i0,r0,i1)
+# define bosubr(i0,r0,r1) bsubr(CC_O,1,i0,r0,r1)
+# define bosubr_p(i0,r0,r1) bsubr_p(CC_O,1,i0,r0,r1)
+# define bosubi(i0,r0,i1) bsubi(CC_O,1,i0,r0,i1)
+# define bosubi_p(i0,r0,i1) bsubi_p(CC_O,1,i0,r0,i1)
+# define bosubr_u(i0,r0,r1) bsubr(CC_L,0,i0,r0,r1)
+# define bosubr_u_p(i0,r0,r1) bsubr_p(CC_L,0,i0,r0,r1)
+# define bosubi_u(i0,r0,i1) bsubi(CC_L,0,i0,r0,i1)
+# define bosubi_u_p(i0,r0,i1) bsubi_p(CC_L,0,i0,r0,i1)
+# define bxsubr(i0,r0,r1) bsubr(CC_NO,1,i0,r0,r1)
+# define bxsubr_p(i0,r0,r1) bsubr_p(CC_NO,1,i0,r0,r1)
+# define bxsubi(i0,r0,i1) bsubi(CC_NO,1,i0,r0,i1)
+# define bxsubi_p(i0,r0,i1) bsubi_p(CC_NO,1,i0,r0,i1)
+# define bxsubr_u(i0,r0,r1) bsubr(CC_NL,0,i0,r0,r1)
+# define bxsubr_u_p(i0,r0,r1) bsubr_p(CC_NL,0,i0,r0,r1)
+# define bxsubi_u(i0,r0,i1) bsubi(CC_NL,0,i0,r0,i1)
+# define bxsubi_u_p(i0,r0,i1) bsubi_p(CC_NL,0,i0,r0,i1)
+# define bmsr(i0,r0,r1) bmxr(CC_NE,i0,r0,r1)
+# define bmsr_p(i0,r0,r1) bmxr_p(CC_NE,i0,r0,r1)
+# define bmsi(i0,r0,i1) bmxi(CC_NE,i0,r0,i1)
+# define bmsi_p(i0,r0,i1) bmxi_p(CC_NE,i0,r0,i1)
+# define bmcr(i0,r0,r1) bmxr(CC_E,i0,r0,r1)
+# define bmcr_p(i0,r0,r1) bmxr_p(CC_E,i0,r0,r1)
+# define bmci(i0,r0,i1) bmxi(CC_E,i0,r0,i1)
+# define bmci_p(i0,r0,i1) bmxi_p(CC_E,i0,r0,i1)
+# define jmpr(r0) BR(r0)
+# define jmpi(i0) _jmpi(_jit,i0)
+static void _jmpi(jit_state_t*,jit_word_t);
+# define jmpi_p(i0) _jmpi_p(_jit,i0)
+static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
+# define callr(r0) BALR(_R14_REGNO,r0)
+# define calli(i0) _calli(_jit,i0)
+static void _calli(jit_state_t*,jit_word_t);
+# define calli_p(i0) _calli_p(_jit,i0)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+# define prolog(i0) _prolog(_jit,i0)
+static void _prolog(jit_state_t*,jit_node_t*);
+# define epilog(i0) _epilog(_jit,i0)
+static void _epilog(jit_state_t*,jit_node_t*);
+# define vastart(r0) _vastart(_jit, r0)
+static void _vastart(jit_state_t*, int32_t);
+# define vaarg(r0, r1) _vaarg(_jit, r0, r1)
+static void _vaarg(jit_state_t*, int32_t, int32_t);
+# define patch_at(instr,label) _patch_at(_jit,instr,label)
+static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
+#endif
+
+#if CODE
+# define _us uint16_t
+# define _ui uint32_t
+static void
+_E(jit_state_t *_jit, _ui Op)
+{
+ union {
+ struct {
+ _us op;
+ } b;
+ _us s;
+ } i0;
+ i0.b.op = Op;
+ assert(i0.b.op == Op);
+ is(i0.s);
+}
+
+static void
+_I(jit_state_t *_jit, _ui Op, _ui I)
+{
+ union {
+ struct {
+ _us op : 8;
+ _us i : 8;
+ } b;
+ _us s;
+ } i0;
+ i0.b.op = Op;
+ i0.b.i = I;
+ assert(i0.b.op == Op);
+ assert(i0.b.i == I);
+ is(i0.s);
+}
+
+static void
+_RR(jit_state_t *_jit, _ui Op, _ui R1, _ui R2)
+{
+ union {
+ struct {
+ _us op : 8;
+ _us r1 : 4;
+ _us r2 : 4;
+ } b;
+ _us s;
+ } i0;
+ i0.b.op = Op;
+ i0.b.r1 = R1;
+ i0.b.r2 = R2;
+ assert(i0.b.op == Op);
+ assert(i0.b.r1 == R1);
+ assert(i0.b.r2 == R2);
+ is(i0.s);
+}
+
+static void
+_RRE(jit_state_t *_jit, _ui Op, _ui R1, _ui R2)
+{
+ union {
+ struct {
+ _us op;
+ } b;
+ _us s;
+ } i0;
+ union {
+ struct {
+ _us _ : 8;
+ _us r1 : 4;
+ _us r2 : 4;
+ } b;
+ _us s;
+ } i1;
+ i0.b.op = Op;
+ i1.b._ = 0;
+ i1.b.r1 = R1;
+ i1.b.r2 = R2;
+ assert(i0.b.op == Op);
+ assert(i1.b.r1 == R1);
+ assert(i1.b.r2 == R2);
+ is(i0.s);
+ is(i1.s);
+}
+
+static void
+_RRF(jit_state_t *_jit, _ui Op, _ui R3, _ui M4, _ui R1, _ui R2)
+{
+ union {
+ struct {
+ _us op;
+ } b;
+ _us s;
+ } i0;
+ union {
+ struct {
+ _us r3 : 4;
+ _us m4 : 4;
+ _us r1 : 4;
+ _us r2 : 4;
+ } b;
+ _us s;
+ } i1;
+ i0.b.op = Op;
+ i1.b.r3 = R3;
+ i1.b.m4 = M4;
+ i1.b.r1 = R1;
+ i1.b.r2 = R2;
+ assert(i0.b.op == Op);
+ assert(i1.b.r3 == R3);
+ assert(i1.b.m4 == M4);
+ assert(i1.b.r1 == R1);
+ assert(i1.b.r2 == R2);
+ is(i0.s);
+ is(i1.s);
+}
+
+static void
+_RX(jit_state_t *_jit, _ui Op, _ui R1, _ui X2, _ui B2, _ui D2)
+{
+ union {
+ struct {
+ _us op : 8;
+ _us r1 : 4;
+ _us x2 : 4;
+ } b;
+ _us s;
+ } i0;
+ union {
+ struct {
+ _us b2 : 4;
+ _us d2 : 12;
+ } b;
+ _us s;
+ } i1;
+ i0.b.op = Op;
+ i0.b.r1 = R1;
+ i0.b.x2 = X2;
+ i1.b.b2 = B2;
+ i1.b.d2 = D2;
+ assert(i0.b.op == Op);
+ assert(i0.b.r1 == R1);
+ assert(i0.b.x2 == X2);
+ assert(i1.b.b2 == B2);
+ assert(i1.b.d2 == D2);
+ is(i0.s);
+ is(i1.s);
+}
+
+static void
+_RXE(jit_state_t *_jit, _ui Op, _ui R1, _ui X2, _ui B2, _ui D2, _ui Op2)
+{
+ union {
+ struct {
+ _us op : 8;
+ _us r1 : 4;
+ _us x2 : 4;
+ } b;
+ _us s;
+ } i0;
+ union {
+ struct {
+ _us b2 : 4;
+ _us d2 : 12;
+ } b;
+ _ui s;
+ } i1;
+ union {
+ struct {
+ _us _ : 8;
+ _us op : 8;
+ } b;
+ _us s;
+ } i2;
+ i2.b._ = 0;
+ i0.b.op = Op;
+ i0.b.r1 = R1;
+ i0.b.x2 = X2;
+ i1.b.b2 = B2;
+ i1.b.d2 = D2;
+ i2.b.op = Op2;
+ assert(i0.b.op == Op);
+ assert(i0.b.r1 == R1);
+ assert(i0.b.x2 == X2);
+ assert(i1.b.b2 == B2);
+ assert(i1.b.d2 == D2);
+ assert(i2.b.op == Op2);
+ is(i0.s);
+ is(i1.s);
+ is(i2.s);
+}
+
+static void
+_RXF(jit_state_t *_jit, _ui Op, _ui R3, _ui X2, _ui B2, _ui D2, _ui R1, _ui Op2)
+{
+ union {
+ struct {
+ _us op : 8;
+ _us r3 : 4;
+ _us x2 : 4;
+ } b;
+ _us s;
+ } i0;
+ union {
+ struct {
+ _us b2 : 4;
+ _us d2 : 12;
+ } b;
+ _us s;
+ } i1;
+ union {
+ struct {
+ _us r1 : 4;
+ _us _ : 4;
+ _us op : 8;
+ } b;
+ _us s;
+ } i2;
+ i2.b._ = 0;
+ i0.b.op = Op;
+ i0.b.r3 = R3;
+ i0.b.x2 = X2;
+ i1.b.b2 = B2;
+ i1.b.d2 = D2;
+ i2.b.r1 = R1;
+ i2.b.op = Op2;
+ assert(i0.b.op == Op);
+ assert(i0.b.r3 == R3);
+ assert(i0.b.x2 == X2);
+ assert(i1.b.b2 == B2);
+ assert(i1.b.d2 == D2);
+ assert(i2.b.r1 == R1);
+ assert(i2.b.op == Op2);
+ is(i0.s);
+ is(i1.s);
+ is(i2.s);
+}
+
+static void
+_RXY(jit_state_t *_jit, _ui Op, _ui R1, _ui X2, _ui B2, _ui D2, _ui Op2)
+{
+ union {
+ struct {
+ _us op : 8;
+ _us r1 : 4;
+ _us x2 : 4;
+ } b;
+ _us s;
+ } i0;
+ union {
+ struct {
+ _us b2 : 4;
+ _us dl : 12;
+ } b;
+ _us s;
+ } i1;
+ union {
+ struct {
+ _us dh : 8;
+ _us op : 8;
+ } b;
+ _us s;
+ } i2;
+ i0.s = i1.s = i2.s = 0;
+ i0.b.op = Op;
+ i0.b.r1 = R1;
+ i0.b.x2 = X2;
+ i1.b.b2 = B2;
+ i1.b.dl = D2 & 0xfff;
+ i2.b.dh = D2 >> 12;
+ i2.b.op = Op2;
+ assert(i0.b.op == Op);
+ assert(i0.b.r1 == R1);
+ assert(i0.b.x2 == X2);
+ assert(i1.b.b2 == B2);
+ assert(i2.b.dh == D2 >> 12);
+ assert(i2.b.op == Op2);
+ is(i0.s);
+ is(i1.s);
+ is(i2.s);
+}
+
+static void
+_RS(jit_state_t *_jit, _ui Op, _ui R1, _ui R3, _ui B2, _ui D2)
+{
+ union {
+ struct {
+ _us op : 8;
+ _us r1 : 4;
+ _us r3 : 4;
+ } b;
+ _us s;
+ } i0;
+ union {
+ struct {
+ _us b2 : 4;
+ _us d2 : 12;
+ } b;
+ _us s;
+ } i1;
+ i0.s = i1.s = 0;
+ i0.b.op = Op;
+ i0.b.r1 = R1;
+ i0.b.r3 = R3;
+ i1.b.b2 = B2;
+ i1.b.d2 = D2;
+ assert(i0.b.op == Op);
+ assert(i0.b.r1 == R1);
+ assert(i0.b.r3 == R3);
+ assert(i1.b.b2 == B2);
+ assert(i1.b.d2 == D2);
+ is(i0.s);
+ is(i1.s);
+}
+
+static void
+_RSL(jit_state_t *_jit, _ui Op, _ui L1, _ui B1, _ui D1, _ui Op2)
+{
+ union {
+ struct {
+ _us op : 8;
+ _us l1 : 4;
+ _us _ : 4;
+ } b;
+ _us s;
+ } i0;
+ union {
+ struct {
+ _us b1 : 4;
+ _us d1 : 12;
+ } b;
+ _us s;
+ } i1;
+ union {
+ struct {
+ _us _ : 8;
+ _us op : 8;
+ } b;
+ _us s;
+ } i2;
+ i0.b._ = 0;
+ i2.b._ = 0;
+ i0.b.op = Op;
+ i0.b.l1 = L1;
+ i1.b.b1 = B1;
+ i1.b.d1 = D1;
+ i2.b.op = Op2;
+ assert(i0.b.op == Op);
+ assert(i0.b.l1 == L1);
+ assert(i1.b.b1 == B1);
+ assert(i1.b.d1 == D1);
+ assert(i2.b.op == Op2);
+ is(i0.s);
+ is(i1.s);
+ is(i2.s);
+}
+
+static void
+_RSI(jit_state_t *_jit, _ui Op, _ui R1, _ui R3, _ui I2)
+{
+ union {
+ struct {
+ _us op : 8;
+ _us r1 : 4;
+ _us r3 : 4;
+ } b;
+ _us s;
+ } i0;
+ union {
+ struct {
+ _us i2;
+ } b;
+ _us s;
+ } i1;
+ i0.b.op = Op;
+ i0.b.r1 = R1;
+ i0.b.r3 = R3;
+ i1.b.i2 = I2;
+ assert(i0.b.op == Op);
+ assert(i0.b.r1 == R1);
+ assert(i0.b.r3 == R3);
+ assert(i1.b.i2 == I2);
+ is(i0.s);
+ is(i1.s);
+}
+
+static void
+_RIE(jit_state_t *_jit, _ui Op, _ui R1, _ui R3, _ui I2, _ui Op2)
+{
+ union {
+ struct {
+ _us op : 8;
+ _us r1 : 4;
+ _us r3 : 4;
+ } b;
+ _us s;
+ } i0;
+ union {
+ struct {
+ _us i2;
+ } b;
+ _us s;
+ } i1;
+ union {
+ struct {
+ _us _ : 8;
+ _us op : 8;
+ } b;
+ _us s;
+ } i2;
+ i2.b._ = 0;
+ i0.b.op = Op;
+ i0.b.r1 = R1;
+ i0.b.r3 = R3;
+ i1.b.i2 = I2;
+ i2.b.op = Op2;
+ assert(i0.b.op == Op);
+ assert(i0.b.r1 == R1);
+ assert(i0.b.r3 == R3);
+ assert(i1.b.i2 == I2);
+ assert(i2.b.op == Op2);
+ is(i0.s);
+ is(i1.s);
+ is(i2.s);
+}
+
+static void
+_RIL(jit_state_t *_jit, _ui Op, _ui R1, _ui Op2, _ui I2)
+{
+ union {
+ struct {
+ _us o1 : 8;
+ _us r1 : 4;
+ _us o2 : 4;
+ } b;
+ _us s;
+ } i0;
+ union {
+ struct {
+ _ui ih : 16;
+ _ui il : 16;
+ } b;
+ _ui i;
+ } i12;
+ i0.b.o1 = Op;
+ i0.b.r1 = R1;
+ i0.b.o2 = Op2;
+ i12.i = I2;
+ assert(i0.b.o1 == Op);
+ assert(i0.b.r1 == R1);
+ assert(i0.b.o2 == Op2);
+ is(i0.s);
+ is(i12.b.ih);
+ is(i12.b.il);
+}
+
+static void
+_SI(jit_state_t *_jit, _ui Op, _ui I2, _ui B1, _ui D1)
+{
+ union {
+ struct {
+ _us op : 8;
+ _us i2 : 8;
+ } b;
+ _us s;
+ } i0;
+ union {
+ struct {
+ _us b1 : 4;
+ _us d1 : 12;
+ } b;
+ _us s;
+ } i1;
+ i0.b.op = Op;
+ i0.b.i2 = I2;
+ i1.b.b1 = B1;
+ i1.b.d1 = D1;
+ assert(i0.b.op == Op);
+ assert(i0.b.i2 == I2);
+ assert(i1.b.b1 == B1);
+ assert(i1.b.d1 == D1);
+ is(i0.s);
+ is(i1.s);
+}
+
+static void
+_SIY(jit_state_t *_jit, _ui Op, _ui I2, _ui B1, _ui D1, _ui Op2)
+{
+ union {
+ struct {
+ _us op : 8;
+ _us i2 : 8;
+ } b;
+ _us s;
+ } i0;
+ union {
+ struct {
+ _us b1 : 4;
+ _us dl : 12;
+ } b;
+ _us s;
+ } i1;
+ union {
+ struct {
+ _us dh : 8;
+ _us op : 8;
+ } b;
+ _us s;
+ } i2;
+ i0.b.op = Op;
+ i0.b.i2 = I2;
+ i1.b.b1 = B1;
+ i1.b.dl = D1 & 0xfff;
+ i2.b.dh = D1 >> 8;
+ i2.b.op = Op2;
+ assert(i0.b.op == Op);
+ assert(i0.b.i2 == I2);
+ assert(i1.b.b1 == B1);
+ assert(i2.b.dh == D1 >> 8);
+ assert(i2.b.op == Op2);
+ is(i0.s);
+ is(i1.s);
+ is(i2.s);
+}
+
+static void
+_S(jit_state_t *_jit, _ui Op, _ui B2, _ui D2)
+{
+ union {
+ struct {
+ _us op;
+ } b;
+ _us s;
+ } i0;
+ union {
+ struct {
+ _us b2 : 4;
+ _us d2 : 12;
+ } b;
+ _us s;
+ } i1;
+ i0.b.op = Op;
+ i1.b.b2 = B2;
+ i1.b.d2 = D2;
+ assert(i0.b.op == Op);
+ assert(i1.b.b2 == B2);
+ assert(i1.b.d2 == D2);
+ is(i0.s);
+ is(i1.s);
+}
+
+static void
+_SS(jit_state_t *_jit, _ui Op, _ui LL, _ui LH, _ui B1, _ui D1, _ui B2, _ui D2)
+{
+ union {
+ struct {
+ _us op : 8;
+ _us ll : 4;
+ _us lh : 4;
+ } b;
+ _us s;
+ } i0;
+ union {
+ struct {
+ _us b1 : 4;
+ _us d1 : 12;
+ } b;
+ _us s;
+ } i1;
+ union {
+ struct {
+ _us b2 : 4;
+ _us d2 : 12;
+ } b;
+ _us s;
+ } i2;
+ i0.b.op = Op;
+ i0.b.ll = LL;
+ i0.b.lh = LH;
+ i1.b.b1 = B1;
+ i1.b.d1 = D1;
+ i2.b.b2 = B2;
+ i2.b.d2 = D2;
+ assert(i0.b.op == Op);
+ assert(i0.b.ll == LL);
+ assert(i0.b.lh == LH);
+ assert(i1.b.b1 == B1);
+ assert(i1.b.d1 == D1);
+ assert(i2.b.b2 == B2);
+ assert(i2.b.d2 == D2);
+ is(i0.s);
+ is(i1.s);
+ is(i2.s);
+}
+
+static void
+_SSE(jit_state_t *_jit, _ui Op, _ui B1, _ui D1, _ui B2, _ui D2)
+{
+ union {
+ struct {
+ _us op;
+ } b;
+ _us s;
+ } i0;
+ union {
+ struct {
+ _us b1 : 4;
+ _us d1 : 12;
+ } b;
+ _us s;
+ } i1;
+ union {
+ struct {
+ _us b2 : 4;
+ _us d2 : 12;
+ } b;
+ _us s;
+ } i2;
+ i0.b.op = Op;
+ i1.b.b1 = B1;
+ i1.b.d1 = D1;
+ i2.b.b2 = B2;
+ i2.b.d2 = D2;
+ assert(i0.b.op == Op);
+ assert(i1.b.b1 == B1);
+ assert(i1.b.d1 == D1);
+ assert(i2.b.b2 == B2);
+ assert(i2.b.d2 == D2);
+ is(i0.s);
+ is(i1.s);
+ is(i2.s);
+}
+# undef _us
+# undef _ui
+
+static void
+_nop(jit_state_t *_jit, int32_t c)
+{
+ assert(c >= 0 && !(c & 1));
+ while (c) {
+ NOPR(_R7_REGNO);
+ c -= 2;
+ }
+}
+
+static int32_t
+_xdivr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ int32_t regno;
+ regno = jit_get_reg_pair();
+#if __WORDSIZE == 32
+ movr(rn(regno), r0);
+ SRDA(rn(regno), 32, 0);
+#else
+ movr(rn(regno) + 1, r0);
+#endif
+ DIVREM_(rn(regno), r1);
+ jit_unget_reg_pair(regno);
+ return (regno);
+}
+
+static int32_t
+_xdivr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ int32_t regno;
+ regno = jit_get_reg_pair();
+#if __WORDSIZE == 32
+ movr(rn(regno), r0);
+ SRDL(rn(regno), 32, 0);
+#else
+ movr(rn(regno) + 1, r0);
+#endif
+ movi(rn(regno), 0);
+ DIVREMU_(rn(regno), r1);
+ jit_unget_reg_pair(regno);
+ return (regno);
+}
+
+static int32_t
+_xdivi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ int32_t imm, regno;
+ regno = jit_get_reg_pair();
+ imm = jit_get_reg(jit_class_gpr);
+#if __WORDSIZE == 32
+ movr(rn(regno), r0);
+ SRDA(rn(regno), 32, 0);
+#else
+ movr(rn(regno) + 1, r0);
+#endif
+ movi(rn(imm), i0);
+ DIVREM_(rn(regno), rn(imm));
+ jit_unget_reg(imm);
+ jit_unget_reg_pair(regno);
+ return (regno);
+}
+
+static int32_t
+_xdivi_u(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ /* cannot overlap because operand is 128-bit */
+ int32_t imm, regno;
+ regno = jit_get_reg_pair();
+ imm = jit_get_reg(jit_class_gpr);
+#if __WORDSIZE == 32
+ movr(rn(regno), r0);
+ SRDL(rn(regno), 32, 0);
+#else
+ movr(rn(regno) + 1, r0);
+#endif
+ movi(rn(regno), 0);
+ movi(rn(imm), i0);
+ DIVREMU_(rn(regno), rn(imm));
+ jit_unget_reg(imm);
+ jit_unget_reg_pair(regno);
+ return (regno);
+}
+
+static void
+_crr(jit_state_t *_jit, int32_t cc,
+ int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_word_t w;
+ int32_t reg, rg;
+ if (r0 == r1 || r0 == r2) {
+ reg = jit_get_reg(jit_class_gpr);
+ rg = rn(reg);
+ }
+ else
+ rg = r0;
+ movi(rg, 1);
+ CMP_(r1, r2);
+ w = _jit->pc.w;
+ BRC(cc, 0);
+ movi(rg, 0);
+ patch_at(w, _jit->pc.w);
+ if (r0 == r1 || r0 == r2) {
+ movr(r0, rg);
+ jit_unget_reg(reg);
+ }
+}
+
+static void
+_cri(jit_state_t *_jit, int32_t cc,
+ int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ crr(cc, r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_crr_u(jit_state_t *_jit, int32_t cc,
+ int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_word_t w;
+ int32_t reg, rg;
+ if (r0 == r1 || r0 == r2) {
+ reg = jit_get_reg(jit_class_gpr);
+ rg = rn(reg);
+ }
+ else
+ rg = r0;
+ movi(rg, 1);
+ CMPU_(r1, r2);
+ w = _jit->pc.w;
+ BRC(cc, 0);
+ movi(rg, 0);
+ patch_at(w, _jit->pc.w);
+ if (r0 == r1 || r0 == r2) {
+ movr(r0, rg);
+ jit_unget_reg(reg);
+ }
+}
+
+static void
+_cri_u(jit_state_t *_jit, int32_t cc,
+ int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ crr_u(cc, r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_brr(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, int32_t r1)
+{
+ jit_word_t d;
+ CMP_(r0, r1);
+ d = (i0 - _jit->pc.w) >> 1;
+ if (s16_p(d))
+ BRC(cc, x16(d));
+ else {
+ assert(s32_p(d));
+ BRCL(cc, d);
+ }
+}
+
+static jit_word_t
+_brr_p(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, int32_t r1)
+{
+ jit_word_t w;
+ CMP_(r0, r1);
+ w = _jit->pc.w;
+ BRCL(cc, 0);
+ return (w);
+}
+
+static void
+_bri(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, jit_word_t i1)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+ movi(rn(reg), i1);
+ brr(cc, i0, r0, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static jit_word_t
+_bri_p(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, jit_word_t i1)
+{
+ jit_word_t w;
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+ movi(rn(reg), i1);
+ w = brr_p(cc, i0, r0, rn(reg));
+ jit_unget_reg(reg);
+ return (w);
+}
+
+static void
+_brr_u(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, int32_t r1)
+{
+ jit_word_t d;
+ CMPU_(r0, r1);
+ d = (i0 - _jit->pc.w) >> 1;
+ if (s16_p(d))
+ BRC(cc, x16(d));
+ else {
+ assert(s32_p(d));
+ BRCL(cc, d);
+ }
+}
+
+static jit_word_t
+_brr_u_p(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, int32_t r1)
+{
+ jit_word_t w;
+ CMPU_(r0, r1);
+ w = _jit->pc.w;
+ BRCL(cc, 0);
+ return (w);
+}
+
+static void
+_bri_u(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, jit_word_t i1)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+ movi(rn(reg), i1);
+ brr_u(cc, i0, r0, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static jit_word_t
+_bri_u_p(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, jit_word_t i1)
+{
+ jit_word_t w;
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+ movi(rn(reg), i1);
+ w = brr_u_p(cc, i0, r0, rn(reg));
+ jit_unget_reg(reg);
+ return (w);
+}
+
+static void
+_baddr(jit_state_t *_jit, int32_t c, jit_bool_t s,
+ jit_word_t i0, int32_t r0, int32_t r1)
+{
+ jit_word_t d;
+ if (s) addr(r0, r0, r1);
+ else addcr(r0, r0, r1);
+ d = (i0 - _jit->pc.w) >> 1;
+ if (s16_p(d))
+ BRC(c, x16(d));
+ else {
+ assert(s32_p(d));
+ BRCL(c, d);
+ }
+}
+
+static void
+_baddi(jit_state_t *_jit, int32_t c, jit_bool_t s,
+ jit_word_t i0, int32_t r0, jit_word_t i1)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+ movi(rn(reg), i1);
+ baddr(c, s, i0, r0, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static jit_word_t
+_baddr_p(jit_state_t *_jit, int32_t c, jit_bool_t s,
+ jit_word_t i0, int32_t r0, int32_t r1)
+{
+ jit_word_t d, w;
+ if (s) addr(r0, r0, r1);
+ else addcr(r0, r0, r1);
+ d = (i0 - _jit->pc.w) >> 1;
+ w = _jit->pc.w;
+ BRCL(c, d);
+ return (w);
+}
+
+static jit_word_t
+_baddi_p(jit_state_t *_jit, int32_t c, jit_bool_t s,
+ jit_word_t i0, int32_t r0, jit_word_t i1)
+{
+ jit_word_t w;
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+ movi(rn(reg), i1);
+ w = baddr_p(c, s, i0, r0, rn(reg));
+ jit_unget_reg(reg);
+ return (w);
+}
+
+static void
+_bsubr(jit_state_t *_jit, int32_t c, jit_bool_t s,
+ jit_word_t i0, int32_t r0, int32_t r1)
+{
+ jit_word_t d;
+ if (s) subr(r0, r0, r1);
+ else subcr(r0, r0, r1);
+ d = (i0 - _jit->pc.w) >> 1;
+ if (s16_p(d))
+ BRC(c, x16(d));
+ else {
+ assert(s32_p(d));
+ BRCL(c, d);
+ }
+}
+
+static void
+_bsubi(jit_state_t *_jit, int32_t c, jit_bool_t s,
+ jit_word_t i0, int32_t r0, jit_word_t i1)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+ movi(rn(reg), i1);
+ bsubr(c, s, i0, r0, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static jit_word_t
+_bsubr_p(jit_state_t *_jit, int32_t c, jit_bool_t s,
+ jit_word_t i0, int32_t r0, int32_t r1)
+{
+ jit_word_t d, w;
+ if (s) subr(r0, r0, r1);
+ else subcr(r0, r0, r1);
+ d = (i0 - _jit->pc.w) >> 1;
+ w = _jit->pc.w;
+ BRCL(c, d);
+ return (w);
+}
+
+static jit_word_t
+_bsubi_p(jit_state_t *_jit, int32_t c, jit_bool_t s,
+ jit_word_t i0, int32_t r0, jit_word_t i1)
+{
+ jit_word_t w;
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+ movi(rn(reg), i1);
+ w = bsubr_p(c, s, i0, r0, rn(reg));
+ jit_unget_reg(reg);
+ return (w);
+}
+
+static void
+_bmxr(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, int32_t r1)
+{
+ jit_word_t d;
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movr(rn(reg), r0);
+ andr(rn(reg), rn(reg), r1);
+ TEST_(rn(reg), rn(reg));
+ jit_unget_reg(reg);
+ d = (i0 - _jit->pc.w) >> 1;
+ if (s16_p(d))
+ BRC(cc, x16(d));
+ else {
+ assert(s32_p(d));
+ BRCL(cc, d);
+ }
+}
+
+static jit_word_t
+_bmxr_p(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, int32_t r1)
+{
+ jit_word_t w;
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movr(rn(reg), r0);
+ andr(rn(reg), rn(reg), r1);
+ TEST_(rn(reg), rn(reg));
+ jit_unget_reg(reg);
+ w = _jit->pc.w;
+ BRCL(cc, 0);
+ return (w);
+}
+
+static void
+_bmxi(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, jit_word_t i1)
+{
+ jit_word_t d;
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i1);
+ andr(rn(reg), rn(reg), r0);
+ TEST_(rn(reg), rn(reg));
+ jit_unget_reg(reg);
+ d = (i0 - _jit->pc.w) >> 1;
+ if (s16_p(d))
+ BRC(cc, x16(d));
+ else {
+ assert(s32_p(d));
+ BRCL(cc, d);
+ }
+}
+
+static jit_word_t
+_bmxi_p(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, jit_word_t i1)
+{
+ jit_word_t w;
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i1);
+ andr(rn(reg), rn(reg), r0);
+ TEST_(rn(reg), rn(reg));
+ jit_unget_reg(reg);
+ w = _jit->pc.w;
+ BRCL(cc, 0);
+ return (w);
+}
+
+static void
+_movr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+#if __WORDSIZE == 32
+ if (r0 != r1)
+ LR(r0, r1);
+#else
+ if (r0 != r1)
+ LGR(r0, r1);
+#endif
+}
+
+static void
+_movi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_word_t d;
+#if __WORDSIZE == 64
+ int32_t bits;
+#endif
+ d = (i0 - _jit->pc.w) >> 1;
+ if (s16_p(i0)) {
+#if __WORDSIZE == 32
+ LHI(r0, x16(i0));
+#else
+ LGHI(r0, x16(i0));
+#endif
+ }
+ /* easy way of loading a large amount of 32 bit values and
+ * usually address of constants */
+ else if (!(i0 & 1) &&
+#if __WORDSIZE == 32
+ i0 > 0
+#else
+ s32_p(d)
+#endif
+ )
+ LARL(r0, d);
+ else {
+#if __WORDSIZE == 32
+ LHI(r0, x16(i0));
+ IILH(r0, x16((jit_uword_t)i0 >> 16));
+#else
+ bits = 0;
+ if (i0 & 0xffffL) bits |= 1;
+ if (i0 & 0xffff0000L) bits |= 2;
+ if (i0 & 0xffff00000000L) bits |= 4;
+ if (i0 & 0xffff000000000000L) bits |= 8;
+ if (bits != 15) LGHI(r0, 0);
+ if (bits & 1) IILL(r0, x16(i0));
+ if (bits & 2) IILH(r0, x16((jit_uword_t)i0 >> 16));
+ if (bits & 4) IIHL(r0, x16((jit_uword_t)i0 >> 32));
+ if (bits & 8) IIHH(r0, x16((jit_uword_t)i0 >> 48));
+#endif
+ }
+}
+
+static jit_word_t
+_movi_p(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ jit_word_t w;
+ w = _jit->pc.w;
+#if __WORDSIZE == 32
+ LHI(r0, x16(i0));
+#else
+ IILL(r0, x16(i0));
+#endif
+ IILH(r0, x16((jit_uword_t)i0 >> 16));
+#if __WORDSIZE == 64
+ IIHL(r0, x16((jit_uword_t)i0 >> 32));
+ IIHH(r0, x16((jit_uword_t)i0 >> 48));
+#endif
+ return (w);
+}
+
+static void
+_addr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2)
+ ADD_(r0, r1);
+ else {
+ movr(r0, r1);
+ ADD_(r0, r2);
+ }
+}
+
+static void
+_addi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ if (r0 == r1 && s16_p(i0))
+ ADDI_(r0, x16(i0));
+#if __WORDSIZE == 64
+ else if (s20_p(i0))
+ LAY(r0, x20(i0), 0, r1);
+#endif
+ else {
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ addr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+ }
+}
+
+static void
+_addcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2)
+ ADDC_(r0, r1);
+ else {
+ movr(r0, r1);
+ ADDC_(r0, r2);
+ }
+}
+
+static void
+_addci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ addcr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_addxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2)
+ ADDX_(r0, r1);
+ else {
+ movr(r0, r1);
+ ADDX_(r0, r2);
+ }
+}
+
+static void
+_addxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ addxr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_subr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t reg;
+ if (r0 == r2) {
+ reg = jit_get_reg(jit_class_gpr);
+ movr(rn(reg), r2);
+ movr(r0, r1);
+ SUB_(r0, rn(reg));
+ jit_unget_reg(reg);
+ }
+ else {
+ movr(r0, r1);
+ SUB_(r0, r2);
+ }
+}
+
+static void
+_subi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ if (r0 == r1 && s16_p(-i0))
+ ADDI_(r0, x16(-i0));
+#if __WORDSIZE == 64
+ else if (s20_p(-i0))
+ LAY(r0, x20(-i0), 0, r1);
+#endif
+ else {
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ subr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+ }
+}
+
+static void
+_subcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t reg;
+ if (r0 == r2) {
+ reg = jit_get_reg(jit_class_gpr);
+ movr(rn(reg), r2);
+ movr(r0, r1);
+ SUBC_(r0, rn(reg));
+ jit_unget_reg(reg);
+ }
+ else {
+ movr(r0, r1);
+ SUBC_(r0, r2);
+ }
+}
+
+static void
+_subci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ subcr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_subxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t reg;
+ if (r0 == r2) {
+ reg = jit_get_reg(jit_class_gpr);
+ movr(rn(reg), r2);
+ movr(r0, r1);
+ SUBX_(r0, rn(reg));
+ jit_unget_reg(reg);
+ }
+ else {
+ movr(r0, r1);
+ SUBX_(r0, r2);
+ }
+}
+
+static void
+_subxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ subxr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_rsbi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ subi(r0, r1, i0);
+ negr(r0, r0);
+}
+
+static void
+_mulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2)
+ MUL_(r0, r1);
+ else {
+ movr(r0, r1);
+ MUL_(r0, r2);
+ }
+}
+
+static void
+_muli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ if (s16_p(i0)) {
+ movr(r0, r1);
+ MULI_(r0, x16(i0));
+ }
+ else {
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ mulr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+ }
+}
+
+static void
+_qmulr(jit_state_t *_jit,
+ int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ int32_t reg;
+ /* The only invalid condition is r0 == r1 */
+ int32_t t2, t3, s2, s3;
+ if (r2 == r0 || r2 == r1) {
+ s2 = jit_get_reg(jit_class_gpr);
+ t2 = rn(s2);
+ movr(t2, r2);
+ }
+ else
+ t2 = r2;
+ if (r3 == r0 || r3 == r1) {
+ s3 = jit_get_reg(jit_class_gpr);
+ t3 = rn(s3);
+ movr(t3, r3);
+ }
+ else
+ t3 = r3;
+ qmulr_u(r0, r1, r2, r3);
+ reg = jit_get_reg(jit_class_gpr);
+ /**/
+ rshi(rn(reg), t2, 63);
+ mulr(rn(reg), rn(reg), t3);
+ addr(r1, r1, rn(reg));
+ /**/
+ rshi(rn(reg), t3, 63);
+ mulr(rn(reg), rn(reg), t2);
+ addr(r1, r1, rn(reg));
+ jit_unget_reg(reg);
+ if (t2 != r2)
+ jit_unget_reg(s2);
+ if (t3 != r3)
+ jit_unget_reg(s3);
+}
+
+static void
+_qmuli(jit_state_t *_jit,
+ int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ qmulr(r0, r1, r2, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_qmulr_u(jit_state_t *_jit,
+ int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ int32_t regno;
+ regno = jit_get_reg_pair();
+ movr(rn(regno) + 1, r2);
+ MULU_(rn(regno), r3);
+ movr(r0, rn(regno) + 1);
+ movr(r1, rn(regno));
+ jit_unget_reg_pair(regno);
+}
+
+static void
+_qmuli_u(jit_state_t *_jit,
+ int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+ int32_t regno;
+ regno = jit_get_reg_pair();
+ movr(rn(regno) + 1, r2);
+ movi(rn(regno), i0);
+ MULU_(rn(regno), rn(regno));
+ movr(r0, rn(regno) + 1);
+ movr(r1, rn(regno));
+ jit_unget_reg_pair(regno);
+}
+
+static void
+_divr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t regno;
+ regno = xdivr(r1, r2);
+ movr(r0, rn(regno) + 1);
+}
+
+static void
+_divi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t regno;
+ regno = xdivi(r1, i0);
+ movr(r0, rn(regno) + 1);
+}
+
+static void
+_divr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t regno;
+ regno = xdivr_u(r1, r2);
+ movr(r0, rn(regno) + 1);
+}
+
+static void
+_divi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t regno;
+ regno = xdivi_u(r1, i0);
+ movr(r0, rn(regno) + 1);
+}
+
+static void
+_remr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t regno;
+ regno = xdivr(r1, r2);
+ movr(r0, rn(regno));
+}
+
+static void
+_remi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t regno;
+ regno = xdivi(r1, i0);
+ movr(r0, rn(regno));
+}
+
+static void
+_remr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t regno;
+ regno = xdivr_u(r1, r2);
+ movr(r0, rn(regno));
+}
+
+static void
+_remi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t regno;
+ regno = xdivi_u(r1, i0);
+ movr(r0, rn(regno));
+}
+
+static void
+_qdivr(jit_state_t *_jit,
+ int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ int32_t regno;
+ regno = xdivr(r2, r3);
+ movr(r0, rn(regno) + 1);
+ movr(r1, rn(regno));
+}
+
+static void
+_qdivi(jit_state_t *_jit,
+ int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+ int32_t regno;
+ regno = xdivi(r2, i0);
+ movr(r0, rn(regno) + 1);
+ movr(r1, rn(regno));
+}
+
+static void
+_qdivr_u(jit_state_t *_jit,
+ int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ int32_t regno;
+ regno = xdivr_u(r2, r3);
+ movr(r0, rn(regno) + 1);
+ movr(r1, rn(regno));
+}
+
+static void
+_qdivi_u(jit_state_t *_jit,
+ int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+ int32_t regno;
+ regno = xdivi_u(r2, i0);
+ movr(r0, rn(regno) + 1);
+ movr(r1, rn(regno));
+}
+
+# if __WORDSIZE == 32
+static void
+_lshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t reg;
+ if (r0 == r2) {
+ reg = jit_get_reg_but_zero(0);
+ movr(rn(reg), r2);
+ movr(r0, r1);
+ SLL(r0, 0, rn(reg));
+ jit_unget_reg_but_zero(reg);
+ }
+ else {
+ movr(r0, r1);
+ SLL(r0, 0, r2);
+ }
+}
+#endif
+
+static void
+_lshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ lshr(r0, r1, rn(reg));
+ jit_unget_reg_but_zero(reg);
+}
+
+# if __WORDSIZE == 32
+static void
+_rshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t reg;
+ if (r0 == r2) {
+ reg = jit_get_reg_but_zero(0);
+ movr(rn(reg), r2);
+ movr(r0, r1);
+ SRA(r0, 0, rn(reg));
+ jit_unget_reg_but_zero(reg);
+ }
+ else {
+ movr(r0, r1);
+ SRA(r0, 0, r2);
+ }
+}
+#endif
+
+static void
+_rshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ rshr(r0, r1, rn(reg));
+ jit_unget_reg_but_zero(reg);
+}
+
+# if __WORDSIZE == 32
+static void
+_rshr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t reg;
+ if (r0 == r2) {
+ reg = jit_get_reg_but_zero(0);
+ movr(rn(reg), r2);
+ movr(r0, r1);
+ SRL(r0, 0, rn(reg));
+ jit_unget_reg_but_zero(reg);
+ }
+ else {
+ movr(r0, r1);
+ SRL(r0, 0, r2);
+ }
+}
+#endif
+
+static void
+_rshi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ rshr_u(r0, r1, rn(reg));
+ jit_unget_reg_but_zero(reg);
+}
+
+static void
+_comr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), -1);
+ movr(r0, r1);
+ XOR_(r0, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_andr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2)
+ AND_(r0, r1);
+ else {
+ movr(r0, r1);
+ AND_(r0, r2);
+ }
+}
+
+static void
+_andi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ movr(r0, r1);
+ NILL(r0, x16(i0));
+ NILH(r0, x16((jit_uword_t)i0 >> 16));
+#if __WORDSIZE == 64
+ NIHL(r0, x16((jit_uword_t)i0 >> 32));
+ NIHH(r0, x16((jit_uword_t)i0 >> 48));
+#endif
+}
+
+static void
+_orr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2)
+ OR_(r0, r1);
+ else {
+ movr(r0, r1);
+ OR_(r0, r2);
+ }
+}
+
+static void
+_ori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ movr(r0, r1);
+ OILL(r0, x16(i0));
+ OILH(r0, x16((jit_uword_t)i0 >> 16));
+#if __WORDSIZE == 64
+ OIHL(r0, x16((jit_uword_t)i0 >> 32));
+ OIHH(r0, x16((jit_uword_t)i0 >> 48));
+#endif
+}
+
+static void
+_xorr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2)
+ XOR_(r0, r1);
+ else {
+ movr(r0, r1);
+ XOR_(r0, r2);
+ }
+}
+
+static void
+_xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ xorr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_ldi_c(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ movi(r0, i0);
+ ldr_c(r0, r0);
+}
+
+static void
+_ldxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2) {
+ addr(r0, r0, r1);
+ ldr_c(r0, r0);
+ }
+ else {
+ movr(r0, r1);
+ addr(r0, r0, r2);
+ ldr_c(r0, r0);
+ }
+}
+
+static void
+_ldxi_c(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ if (s20_p(i0)) {
+#if __WORDSIZE == 32
+ LB(r0, x20(i0), 0, r1);
+#else
+ LGB(r0, x20(i0), 0, r1);
+#endif
+ }
+ else if (r0 != r1) {
+ movi(r0, i0);
+ addr(r0, r0, r1);
+ ldr_c(r0, r0);
+ }
+ else {
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ addr(rn(reg), rn(reg), r1);
+ ldr_c(r0, rn(reg));
+ jit_unget_reg_but_zero(reg);
+ }
+}
+
+static void
+_ldi_uc(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ movi(r0, i0);
+ ldr_uc(r0, r0);
+}
+
+static void
+_ldxr_uc(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2) {
+ addr(r0, r0, r1);
+ ldr_uc(r0, r0);
+ }
+ else {
+ movr(r0, r1);
+ addr(r0, r0, r2);
+ ldr_uc(r0, r0);
+ }
+}
+
+static void
+_ldxi_uc(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ if (s20_p(i0))
+ LLGC(r0, x20(i0), 0, r1);
+ else if (r0 != r1) {
+ movi(r0, i0);
+ addr(r0, r0, r1);
+ ldr_uc(r0, r0);
+ }
+ else {
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ addr(rn(reg), rn(reg), r1);
+ ldr_uc(r0, rn(reg));
+ jit_unget_reg_but_zero(reg);
+ }
+}
+
+static void
+_ldi_s(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ movi(r0, i0);
+ ldr_s(r0, r0);
+}
+
+static void
+_ldxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2) {
+ addr(r0, r0, r1);
+ ldr_s(r0, r0);
+ }
+ else {
+ movr(r0, r1);
+ addr(r0, r0, r2);
+ ldr_s(r0, r0);
+ }
+}
+
+static void
+_ldxi_s(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+#if __WORDSIZE == 32
+ if (u12_p(i0))
+ LH(r0, i0, 0, r1);
+ else
+#endif
+ if (s20_p(i0)) {
+#if __WORDSIZE == 32
+ LHY(r0, x20(i0), 0, r1);
+#else
+ LGH(r0, x20(i0), 0, r1);
+#endif
+ }
+ else if (r0 != r1) {
+ movi(r0, i0);
+ addr(r0, r0, r1);
+ ldr_s(r0, r0);
+ }
+ else {
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ addr(rn(reg), rn(reg), r1);
+ ldr_s(r0, rn(reg));
+ jit_unget_reg_but_zero(reg);
+ }
+}
+
+static void
+_ldi_us(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ movi(r0, i0);
+ ldr_us(r0, r0);
+}
+
+static void
+_ldxr_us(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2) {
+ addr(r0, r0, r1);
+ ldr_us(r0, r0);
+ }
+ else {
+ movr(r0, r1);
+ addr(r0, r0, r2);
+ ldr_us(r0, r0);
+ }
+}
+
+static void
+_ldxi_us(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ if (s20_p(i0))
+ LLGH(r0, x20(i0), 0, r1);
+ else if (r0 != r1) {
+ movi(r0, i0);
+ addr(r0, r0, r1);
+ ldr_us(r0, r0);
+ }
+ else {
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ addr(rn(reg), rn(reg), r1);
+ ldr_us(r0, rn(reg));
+ jit_unget_reg_but_zero(reg);
+ }
+}
+
+static void
+_ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ movi(r0, i0);
+ ldr_i(r0, r0);
+}
+
+static void
+_ldxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2) {
+ addr(r0, r0, r1);
+ ldr_i(r0, r0);
+ }
+ else {
+ movr(r0, r1);
+ addr(r0, r0, r2);
+ ldr_i(r0, r0);
+ }
+}
+
+static void
+_ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ if (s20_p(i0))
+ LGF(r0, x20(i0), 0, r1);
+ else if (r0 != r1) {
+ movi(r0, i0);
+ addr(r0, r0, r1);
+ ldr_i(r0, r0);
+ }
+ else {
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ addr(rn(reg), rn(reg), r1);
+ ldr_i(r0, rn(reg));
+ jit_unget_reg_but_zero(reg);
+ }
+}
+
+#if __WORDSIZE == 64
+static void
+_ldi_ui(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ movi(r0, i0);
+ ldr_ui(r0, r0);
+}
+
+static void
+_ldxr_ui(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2) {
+ addr(r0, r0, r1);
+ ldr_ui(r0, r0);
+ }
+ else {
+ movr(r0, r1);
+ addr(r0, r0, r2);
+ ldr_ui(r0, r0);
+ }
+}
+
+static void
+_ldxi_ui(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ if (s20_p(i0))
+ LLGF(r0, x20(i0), 0, r1);
+ else if (r0 != r1) {
+ movi(r0, i0);
+ addr(r0, r0, r1);
+ ldr_ui(r0, r0);
+ }
+ else {
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ addr(rn(reg), rn(reg), r1);
+ ldr_ui(r0, rn(reg));
+ jit_unget_reg_but_zero(reg);
+ }
+}
+
+static void
+_ldi_l(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ movi(r0, i0);
+ ldr_l(r0, r0);
+}
+
+static void
+_ldxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2) {
+ addr(r0, r0, r1);
+ ldr_l(r0, r0);
+ }
+ else {
+ movr(r0, r1);
+ addr(r0, r0, r2);
+ ldr_l(r0, r0);
+ }
+}
+
+static void
+_ldxi_l(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ if (s20_p(i0))
+ LG(r0, x20(i0), 0, r1);
+ else if (r0 != r1) {
+ movi(r0, i0);
+ addr(r0, r0, r1);
+ ldr_l(r0, r0);
+ }
+ else {
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ addr(rn(reg), rn(reg), r1);
+ ldr_l(r0, rn(reg));
+ jit_unget_reg_but_zero(reg);
+ }
+}
+#endif
+
+static void
+_sti_c(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ str_c(rn(reg), r0);
+ jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ movr(rn(reg), r0);
+ addr(rn(reg), rn(reg), r1);
+ str_c(rn(reg), r2);
+ jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ int32_t reg;
+ if (u12_p(i0))
+ STC(r1, i0, 0, r0);
+ else if (s20_p(i0))
+ STCY(r1, x20(i0), 0, r0);
+ else {
+ reg = jit_get_reg_but_zero(0);
+ addi(rn(reg), r0, i0);
+ str_c(rn(reg), r1);
+ jit_unget_reg_but_zero(reg);
+ }
+}
+
+static void
+_sti_s(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ str_s(rn(reg), r0);
+ jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ movr(rn(reg), r0);
+ addr(rn(reg), rn(reg), r1);
+ str_s(rn(reg), r2);
+ jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxi_s(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ int32_t reg;
+ if (u12_p(i0))
+ STH(r1, i0, 0, r0);
+ else if (s20_p(i0))
+ STHY(r1, x20(i0), 0, r0);
+ else {
+ reg = jit_get_reg_but_zero(0);
+ addi(rn(reg), r0, i0);
+ str_s(rn(reg), r1);
+ jit_unget_reg_but_zero(reg);
+ }
+}
+
+static void
+_sti_i(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ str_i(rn(reg), r0);
+ jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ movr(rn(reg), r0);
+ addr(rn(reg), rn(reg), r1);
+ str_i(rn(reg), r2);
+ jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ int32_t reg;
+ if (u12_p(i0))
+ ST(r1, i0, 0, r0);
+ else if (s20_p(i0))
+ STY(r1, x20(i0), 0, r0);
+ else {
+ reg = jit_get_reg_but_zero(0);
+ addi(rn(reg), r0, i0);
+ str_i(rn(reg), r1);
+ jit_unget_reg_but_zero(reg);
+ }
+}
+
+#if __WORDSIZE == 64
+static void
+_sti_l(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ str_l(rn(reg), r0);
+ jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ movr(rn(reg), r0);
+ addr(rn(reg), rn(reg), r1);
+ str_l(rn(reg), r2);
+ jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxi_l(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ int32_t reg;
+ if (s20_p(i0))
+ STG(r1, x20(i0), 0, r0);
+ else {
+ reg = jit_get_reg_but_zero(0);
+ addi(rn(reg), r0, i0);
+ str_l(rn(reg), r1);
+ jit_unget_reg_but_zero(reg);
+ }
+}
+#endif
+
+static void
+_jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+ jit_word_t d;
+ int32_t reg;
+ d = (i0 - _jit->pc.w) >> 1;
+ if (s16_p(d))
+ J(x16(d));
+ else if (s32_p(d))
+ BRL(d);
+ else {
+ reg = jit_get_reg_but_zero(jit_class_nospill);
+ movi(rn(reg), i0);
+ jmpr(rn(reg));
+ jit_unget_reg_but_zero(reg);
+ }
+}
+
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+{
+ jit_word_t w;
+ int32_t reg;
+ reg = jit_get_reg_but_zero(jit_class_nospill);
+ w = movi_p(rn(reg), i0);
+ jmpr(rn(reg));
+ jit_unget_reg_but_zero(reg);
+ return (w);
+}
+
+static void
+_calli(jit_state_t *_jit, jit_word_t i0)
+{
+ jit_word_t d;
+ int32_t reg;
+ d = (i0 - _jit->pc.w) >> 1;
+ if (s32_p(d))
+ BRASL(_R14_REGNO, d);
+ else {
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ callr(rn(reg));
+ jit_unget_reg_but_zero(reg);
+ }
+}
+
+static jit_word_t
+_calli_p(jit_state_t *_jit, jit_word_t i0)
+{
+ jit_word_t w;
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ w = movi_p(rn(reg), i0);
+ callr(rn(reg));
+ jit_unget_reg_but_zero(reg);
+ return (w);
+}
+
+static int32_t gprs[] = {
+ _R2, _R3, _R4, _R5,
+ _R6, _R7, _R8, _R9, _R10, _R11, _R12, _R13
+};
+
+static void
+_prolog(jit_state_t *_jit, jit_node_t *i0)
+{
+ int32_t regno, offset;
+ if (_jitc->function->define_frame || _jitc->function->assume_frame) {
+ int32_t frame = -_jitc->function->frame;
+ assert(_jitc->function->self.aoff >= frame);
+ if (_jitc->function->assume_frame)
+ return;
+ _jitc->function->self.aoff = frame;
+ }
+ if (_jitc->function->allocar)
+ _jitc->function->self.aoff &= -8;
+ _jitc->function->stack = ((_jitc->function->self.alen -
+ /* align stack at 8 bytes */
+ _jitc->function->self.aoff) + 7) & -8;
+ /* *IFF* a non variadic function,
+ * Lightning does not reserve stack space for spilling arguments
+ * in registers.
+ * S390x, as per gcc, has 8 stack slots for spilling arguments,
+ * (%r6 is callee save) and uses an alloca like approach to save
+ * callee save fpr registers.
+ * Since argument registers are not saved in any lightning port,
+ * use the 8 slots to spill any modified fpr register, and still
+ * use the same stack frame logic as gcc.
+ * Save at least %r13 to %r15, as %r13 is used as frame pointer.
+ * *IFF* a variadic function, a "standard" stack frame, with
+ * fpr registers saved in an alloca'ed area, is used.
+ */
+ if ((_jitc->function->self.call & jit_call_varargs) &&
+ jit_arg_reg_p(_jitc->function->vagp))
+ regno = _jitc->function->vagp;
+ else {
+ for (regno = 4; regno < jit_size(gprs) - 1; regno++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, gprs[regno]))
+ break;
+ }
+ }
+#if __WORDSIZE == 32
+# define FP_OFFSET 64
+ if (_jitc->function->self.call & jit_call_varargs)
+ offset = regno * 4 + 8;
+ else
+ offset = (regno - 4) * 4 + 32;
+ STM(rn(gprs[regno]), _R15_REGNO, x20(offset), _R15_REGNO);
+#else
+# define FP_OFFSET 128
+ if (_jitc->function->self.call & jit_call_varargs)
+ offset = regno * 8 + 16;
+ else
+ offset = (regno - 4) * 8 + 48;
+ STMG(rn(gprs[regno]), _R15_REGNO, x20(offset), _R15_REGNO);
+#endif
+
+#define SPILL(R, O) \
+ do { \
+ if (jit_regset_tstbit(&_jitc->function->regset, R)) \
+ stxi_d(O, _R15_REGNO, rn(R)); \
+ } while (0)
+ if (_jitc->function->self.call & jit_call_varargs) {
+ for (regno = _jitc->function->vafp; jit_arg_f_reg_p(regno); ++regno)
+ stxi_d(FP_OFFSET + regno * 8, _R15_REGNO, rn(_F0 - regno));
+ SPILL(_F8, _jitc->function->vaoff + offsetof(jit_va_list_t, f8));
+ SPILL(_F9, _jitc->function->vaoff + offsetof(jit_va_list_t, f9));
+ SPILL(_F10, _jitc->function->vaoff + offsetof(jit_va_list_t, f10));
+ SPILL(_F11, _jitc->function->vaoff + offsetof(jit_va_list_t, f11));
+ SPILL(_F12, _jitc->function->vaoff + offsetof(jit_va_list_t, f12));
+ SPILL(_F13, _jitc->function->vaoff + offsetof(jit_va_list_t, f13));
+ SPILL(_F14, _jitc->function->vaoff + offsetof(jit_va_list_t, f14));
+ }
+ else {
+ /* First 4 in low address */
+#if __WORDSIZE == 32
+ SPILL(_F10, 0);
+ SPILL(_F11, 8);
+ SPILL(_F12, 16);
+ SPILL(_F13, 24);
+ /* gpr registers here */
+ SPILL(_F14, 72);
+ SPILL(_F8, 80);
+ SPILL(_F9, 88);
+#else
+ SPILL(_F10, 16);
+ SPILL(_F11, 24);
+ SPILL(_F12, 32);
+ SPILL(_F13, 48);
+ /* Last 3 in high address */
+ SPILL(_F14, 136);
+ SPILL(_F8, 144);
+ SPILL(_F9, 152);
+#endif
+ }
+#undef SPILL
+ movr(_R13_REGNO, _R15_REGNO);
+ subi(_R15_REGNO, _R15_REGNO, stack_framesize + _jitc->function->stack);
+ if (_jitc->function->allocar) {
+ regno = jit_get_reg(jit_class_gpr);
+ movi(rn(regno), _jitc->function->self.aoff);
+ stxi_i(_jitc->function->aoffoff, _R13_REGNO, rn(regno));
+ jit_unget_reg(regno);
+ }
+}
+
+static void
+_epilog(jit_state_t *_jit, jit_node_t *i0)
+{
+ int32_t regno, offset;
+ if (_jitc->function->assume_frame)
+ return;
+ if ((_jitc->function->self.call & jit_call_varargs) &&
+ jit_arg_reg_p(_jitc->function->vagp))
+ regno = _jitc->function->vagp;
+ else {
+ for (regno = 4; regno < jit_size(gprs) - 1; regno++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, gprs[regno]))
+ break;
+ }
+ }
+#if __WORDSIZE == 32
+ if (_jitc->function->self.call & jit_call_varargs)
+ offset = regno * 4 + 8;
+ else
+ offset = (regno - 4) * 4 + 32;
+#else
+ if (_jitc->function->self.call & jit_call_varargs)
+ offset = regno * 8 + 16;
+ else
+ offset = (regno - 4) * 8 + 48;
+#endif
+ movr(_R15_REGNO, _R13_REGNO);
+
+#define LOAD(R, O) \
+ do { \
+ if (jit_regset_tstbit(&_jitc->function->regset, R)) \
+ ldxi_d(rn(R), _R15_REGNO, O); \
+ } while (0)
+ if (_jitc->function->self.call & jit_call_varargs) {
+ LOAD(_F8, _jitc->function->vaoff + offsetof(jit_va_list_t, f8));
+ LOAD(_F9, _jitc->function->vaoff + offsetof(jit_va_list_t, f9));
+ LOAD(_F10, _jitc->function->vaoff + offsetof(jit_va_list_t, f10));
+ LOAD(_F11, _jitc->function->vaoff + offsetof(jit_va_list_t, f11));
+ LOAD(_F12, _jitc->function->vaoff + offsetof(jit_va_list_t, f12));
+ LOAD(_F13, _jitc->function->vaoff + offsetof(jit_va_list_t, f13));
+ LOAD(_F14, _jitc->function->vaoff + offsetof(jit_va_list_t, f14));
+ }
+ else {
+#if __WORDSIZE == 32
+ LOAD(_F10, 0);
+ LOAD(_F11, 8);
+ LOAD(_F12, 16);
+ LOAD(_F13, 24);
+ LOAD(_F14, 72);
+ LOAD(_F8, 80);
+ LOAD(_F9, 88);
+#else
+ LOAD(_F10, 16);
+ LOAD(_F11, 24);
+ LOAD(_F12, 32);
+ LOAD(_F13, 48);
+ LOAD(_F14, 136);
+ LOAD(_F8, 144);
+ LOAD(_F9, 152);
+#endif
+ }
+#undef LOAD
+#if __WORDSIZE == 32
+ LM(rn(gprs[regno]), _R15_REGNO, x20(offset), _R15_REGNO);
+#else
+ LMG(rn(gprs[regno]), _R15_REGNO, x20(offset), _R15_REGNO);
+#endif
+ BR(_R14_REGNO);
+}
+
+static void
+_vastart(jit_state_t *_jit, int32_t r0)
+{
+ int32_t reg;
+
+ assert(_jitc->function->self.call & jit_call_varargs);
+
+ /* Return jit_va_list_t in the register argument */
+ addi(r0, _R13_REGNO, _jitc->function->vaoff);
+ reg = jit_get_reg(jit_class_gpr);
+
+ /* Initialize gp offset in the save area. */
+ movi(rn(reg), _jitc->function->vagp);
+ stxi(offsetof(jit_va_list_t, gpoff), r0, rn(reg));
+
+ /* Initialize fp offset in the save area. */
+ movi(rn(reg), _jitc->function->vafp);
+ stxi(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
+
+ /* Initialize overflow pointer to the first stack argument. */
+ addi(rn(reg), _R13_REGNO, _jitc->function->self.size);
+ stxi(offsetof(jit_va_list_t, over), r0, rn(reg));
+
+ /* Initialize register save area pointer. */
+ stxi(offsetof(jit_va_list_t, save), r0, _R13_REGNO);
+
+ jit_unget_reg(reg);
+}
+
+static void
+_vaarg(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ int32_t rg0;
+ int32_t rg1;
+ int32_t rg2;
+ jit_word_t ge_code;
+ jit_word_t lt_code;
+
+ assert(_jitc->function->self.call & jit_call_varargs);
+
+ rg0 = jit_get_reg_but_zero(0);
+ rg1 = jit_get_reg_but_zero(0);
+
+ /* Load the gp offset in save area in the first temporary. */
+ ldxi(rn(rg0), r1, offsetof(jit_va_list_t, gpoff));
+
+ /* Jump over if there are no remaining arguments in the save area. */
+ ge_code = bgei_p(_jit->pc.w, rn(rg0), 5);
+
+ /* Load the save area pointer in the second temporary. */
+ ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
+
+ /* Scale offset */
+ rg2 = jit_get_reg_but_zero(0);
+ lshi(rn(rg2), rn(rg0),
+#if __WORDSIZE == 32
+ 2
+#else
+ 3
+#endif
+ );
+ /* Add offset to saved area. */
+ addi(rn(rg2), rn(rg2), 2 * sizeof(jit_word_t));
+
+ /* Load the vararg argument in the first argument. */
+ ldxr(r0, rn(rg1), rn(rg2));
+ jit_unget_reg_but_zero(rg2);
+
+ /* Update the gp offset. */
+ addi(rn(rg0), rn(rg0), 1);
+ stxi(offsetof(jit_va_list_t, gpoff), r1, rn(rg0));
+
+ /* Will only need one temporary register below. */
+ jit_unget_reg_but_zero(rg1);
+
+ /* Jump over overflow code. */
+ lt_code = jmpi_p(_jit->pc.w);
+
+ /* Where to land if argument is in overflow area. */
+ patch_at(ge_code, _jit->pc.w);
+
+ /* Load overflow pointer. */
+ ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
+
+ /* Load argument. */
+ ldr(r0, rn(rg0));
+
+ /* Update overflow pointer. */
+ addi(rn(rg0), rn(rg0), sizeof(jit_word_t));
+ stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
+
+ /* Where to land if argument is in save area. */
+ patch_at(lt_code, _jit->pc.w);
+
+ jit_unget_reg_but_zero(rg0);
+}
+
+static void
+_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
+{
+ jit_word_t d;
+ union {
+ uint16_t *s;
+ jit_word_t w;
+ } u;
+ u.w = instr;
+ union {
+ struct {
+ uint16_t op : 8;
+ uint16_t r1 : 4;
+ uint16_t r3 : 4;
+ } b;
+ uint16_t s;
+ } i0;
+ union {
+ struct {
+ uint16_t i2;
+ } b;
+ uint16_t s;
+ } i1;
+ union {
+ struct {
+ uint32_t ih : 16;
+ uint32_t il : 16;
+ } b;
+ uint32_t i;
+ } i12;
+ i0.s = u.s[0];
+ /* movi_p */
+ if (i0.b.op ==
+#if __WORDSIZE == 32
+ 0xA7 && i0.b.r3 == 8
+#else
+ 0xA5
+#endif
+ ) {
+#if __WORDSIZE == 64
+ assert(i0.b.r3 == 3);
+#endif
+ i1.b.i2 = (jit_uword_t)label;
+ u.s[1] = i1.s;
+ i0.s = u.s[2];
+ assert(i0.b.op == 0xA5 && i0.b.r3 == 2);
+ i1.b.i2 = (jit_uword_t)label >> 16;
+ u.s[3] = i1.s;
+#if __WORDSIZE == 64
+ i0.s = u.s[4];
+ assert(i0.b.op == 0xA5 && i0.b.r3 == 1);
+ i1.b.i2 = (jit_uword_t)label >> 32;
+ u.s[5] = i1.s;
+ i0.s = u.s[6];
+ assert(i0.b.op == 0xA5 && i0.b.r3 == 0);
+ i1.b.i2 = (jit_uword_t)label >> 48;
+ u.s[7] = i1.s;
+#endif
+ }
+ /* BRC */
+ else if (i0.b.op == 0xA7) {
+ assert(i0.b.r3 == 0x4);
+ d = (label - instr) >> 1;
+ assert(s16_p(d));
+ i1.b.i2 = d;
+ u.s[1] = i1.s;
+ }
+ /* BRCL */
+ else if (i0.b.op == 0xC0) {
+ assert(i0.b.r3 == 0x4);
+ d = (label - instr) >> 1;
+ assert(s32_p(d));
+ i12.i = d;
+ u.s[1] = i12.b.ih;
+ u.s[2] = i12.b.il;
+ }
+ else
+ abort();
+}
+#endif
diff --git a/deps/lightening/lightening/s390-fpu.c b/deps/lightening/lightening/s390-fpu.c
new file mode 100644
index 0000000..3866643
--- /dev/null
+++ b/deps/lightening/lightening/s390-fpu.c
@@ -0,0 +1,1316 @@
+/*
+ * Copyright (C) 2013-2017 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+# define RND_CUR 0
+# define RND_BIAS_NEAR 1
+# define RND_NEAR 4
+# define RND_ZERO 5
+# define RND_POS_INF 6
+# define RND_NEG_INF 7
+/****************************************************************
+ * Floating Point Instructions *
+ ****************************************************************/
+/* CONVERT BFP TO HFP */
+# define THDER(R1,R2) RRE_(0xB358,R1,R2)
+# define THDR(R1,R2) RRE_(0xB359,R1,R2)
+/* CONVERT HFP TO BFP */
+# define TBEDR(R1,R2) RRE_(0xB350,R1,R2)
+# define TBDR(R1,R2) RRE_(0xB351,R1,R2)
+/* LOAD */
+# define LER(R1,R2) RR_(0x38,R1,R2)
+# define LDR(R1,R2) RR_(0x28,R1,R2)
+# define LXR(R1,R2) RRE_(0xB365,R1,R2)
+# define LE(R1,D2,X2,B2) RX_(0x78,R1,X2,B2,D2)
+# define LD(R1,D2,X2,B2) RX_(0x68,R1,X2,B2,D2)
+# define LEY(R1,D2,X2,B2) RXY_(0xED,R1,X2,B2,D2,0x64)
+# define LDY(R1,D2,X2,B2) RXY_(0xED,R1,X2,B2,D2,0x65)
+/* LOAD ZERO */
+# define LZER(R1) RRE_(0xB374,R1,0)
+# define LZDR(R1) RRE_(0xB375,R1,0)
+# define LZXR(R1) RRE_(0xB376,R1,0)
+/* STORE */
+# define STE(R1,D2,X2,B2) RX_(0x70,R1,X2,B2,D2)
+# define STD(R1,D2,X2,B2) RX_(0x60,R1,X2,B2,D2)
+# define STEY(R1,D2,X2,B2) RXY_(0xED,R1,X2,B2,D2,0x66)
+# define STDY(R1,D2,X2,B2) RXY_(0xED,R1,X2,B2,D2,0x67)
+/****************************************************************
+ * Hexadecimal Floating Point Instructions *
+ ****************************************************************/
+/* ADD NORMALIZED */
+# define AER(R1,R2) RR_(0x3A,R1,R2)
+# define ADR(R1,R2) RR_(0x2A,R1,R2)
+# define AXR(R1,R2) RR_(0x36,R1,R2)
+# define AE(R1,D2,X2,B2) RX_(0x7A,R1,X2,B2,D2)
+# define AD(R1,D2,X2,B2) RX_(0x6A,R1,X2,B2,D2)
+/* ADD UNNORMALIZED */
+# define AUR(R1,R2) RR_(0x3E,R1,R2)
+# define AWR(R1,R2) RR_(0x2E,R1,R2)
+# define AU(R1,D2,X2,B2) RX_(0x7E,R1,X2,B2,D2)
+# define AW(R1,D2,X2,B2) RX_(0x6E,R1,X2,B2,D2)
+/* COMPARE */
+# define CER(R1,R2) RR_(0x39,R1,R2)
+# define CDR(R1,R2) RR_(0x29,R1,R2)
+# define CXR(R1,R2) RRE_(0xB369,R1,R2)
+# define CE(R1,D2,X2,B2) RX_(0x79,R1,X2,B2,D2)
+# define CD(R1,D2,X2,B2) RX_(0x69,R1,X2,B2,D2)
+/* CONVERT FROM FIXED */
+# define CEFR(R1,R2) RRE_(0xB3B4,R1,R2)
+# define CDFR(R1,R2) RRE_(0xB3B5,R1,R2)
+# define CXFR(R1,R2) RRE_(0xB3B6,R1,R2)
+# define CEGR(R1,R2) RRE_(0xB3C4,R1,R2)
+# define CDGR(R1,R2) RRE_(0xB3C5,R1,R2)
+# define CXGR(R1,R2) RRE_(0xB3C6,R1,R2)
+/* CONVERT TO FIXED */
+# define CFER(R1,R2) RRE_(0xB3B8,R1,R2)
+# define CFDR(R1,R2) RRE_(0xB3B9,R1,R2)
+# define CFXR(R1,R2) RRE_(0xB3BA,R1,R2)
+# define CGER(R1,R2) RRE_(0xB3C8,R1,R2)
+# define CGDR(R1,R2) RRE_(0xB3C9,R1,R2)
+# define CGXR(R1,R2) RRE_(0xB3CA,R1,R2)
+/* DIVIDE */
+# define DER(R1,R2) RR_(0x3D,R1,R2)
+# define DDR(R1,R2) RR_(0x2D,R1,R2)
+# define DXR(R1,R2) RRE_(0xB22D,R1,R2)
+# define DE(R1,D2,X2,B2) RX_(0x7D,R1,X2,B2,D2)
+# define DD(R1,D2,X2,B2) RX_(0x6D,R1,X2,B2,D2)
+/* HALVE */
+# define HER(R1,R2) RR_(0x34,R1,R2)
+# define HDR(R1,R2) RR_(0x24,R1,R2)
+/* LOAD AND TEST */
+# define LTER(R1,R2) RR_(0x32,R1,R2)
+# define LTDR(R1,R2) RR_(0x22,R1,R2)
+# define LTXR(R1,R2) RRE_(0xB362,R1,R2)
+/* LOAD COMPLEMENT */
+# define LCER(R1,R2) RR_(0x33,R1,R2)
+# define LCDR(R1,R2) RR_(0x23,R1,R2)
+# define LCXR(R1,R2) RRE_(0xB363,R1,R2)
+/* LOAD FP INTEGER */
+# define FIER(R1,R2) RRE_(0xB377,R1,R2)
+# define FIDR(R1,R2) RRE_(0xB37F,R1,R2)
+# define FIXR(R1,R2) RRE_(0xB367,R1,R2)
+/* LOAD LENGHTENED */
+# define LDER(R1,R2) RRE_(0xB324,R1,R2)
+# define LXDR(R1,R2) RRE_(0xB325,R1,R2)
+# define LXER(R1,R2) RRE_(0xB326,R1,R2)
+# define LDE(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x24)
+# define LXD(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x25)
+# define LXE(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x26)
+/* LOAD NEGATIVE */
+# define LNER(R1,R2) RR_(0x31,R1,R2)
+# define LNDR(R1,R2) RR_(0x21,R1,R2)
+# define LNXR(R1,R2) RRE_(0xB361,R1,R2)
+/* LOAD POSITIVE */
+# define LPER(R1,R2) RR_(0x30,R1,R2)
+# define LPDR(R1,R2) RR_(0x20,R1,R2)
+# define LPXR(R1,R2) RRE_(0xB360,R1,R2)
+/* LOAD ROUNDED */
+# define LEDR(R1,R2) RR_(0x35,R1,R2)
+# define LDXR(R1,R2) RR_(0x25,R1,R2)
+# define LRER(R1,R2) LEDR(R1,R2)
+# define LRDR(R1,R2) LDXR(R1,R2)
+# define LRXR(R1,R2) RRE_(0xB366,R1,R2)
+/* MULTIPLY */
+# define MEER(R1,R2) RRE_(0xB337,R1,R2)
+# define MDR(R1,R2) RR_(0x2C,R1,R2)
+# define MXR(R1,R2) RR_(0x26,R1,R2)
+# define MDER(R1,R2) RR_(0x3C,R1,R2)
+# define MXDR(R1,R2) RR_(0x27,R1,R2)
+# define MER(R1,R2) MDER(R1,R2)
+# define MEE(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x37)
+# define MD(R1,D2,X2,B2) RX_(0x6C,R1,X2,B2,D2)
+# define MDE(R1,D2,X2,B2) RX_(0x7C,R1,X2,B2,D2)
+# define MXD(R1,D2,X2,B2) RX_(0x67,R1,X2,B2,D2)
+# define ME(R1,D2,X2,B2) MDE(R1,D2,X2,B2)
+/* MULTIPLY AND ADD */
+# define MAER(R1,R3,R2) RRF_(0xB32E,R1,0,R3,R2)
+# define MADR(R1,R3,R2) RRF_(0xB33E,R1,0,R3,R2)
+# define MAE(R1,R3,D2,X2,B2) RXF_(0xED,R3,X2,B2,D2,R1,0x2E)
+# define MAD(R1,R3,D2,X2,B2) RXF_(0xED,R3,X2,B2,D2,R1,0x3E)
+/* MULTIPLY AND SUBTRACT */
+# define MSER(R1,R3,R2) RRF_(0xB32F,R1,0,R3,R2)
+# define MSDR(R1,R3,R2) RRF_(0xB33F,R1,0,R3,R2)
+# define MSE(R1,R3,D2,X2,B2) RXF_(0xED,R3,X2,B2,D2,R1,0x2F)
+# define MSD(R1,R3,D2,X2,B2) RXF_(0xED,R3,X2,B2,D2,R1,0x3F)
+/* SQUARE ROOT */
+# define SQER(R1,R2) RRE_(0xB245,R1,R2)
+# define SQDR(R1,R2) RRE_(0xB244,R1,R2)
+# define SQXR(R1,R2) RRE_(0xB336,R1,R2)
+# define SQE(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x34)
+# define SQD(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x35)
+/* SUBTRACT NORMALIZED */
+# define SER(R1,R2) RR_(0x3B,R1,R2)
+# define SDR(R1,R2) RR_(0x2B,R1,R2)
+# define SXR(R1,R2) RR_(0x37,R1,R2)
+# define SE(R1,D2,X2,B2) RX_(0x7B,R1,X2,B2,D2)
+# define SD(R1,D2,X2,B2) RX_(0x6B,R1,X2,B2,D2)
+/* SUBTRACT UNNORMALIZED */
+# define SUR(R1,R2) RR_(0x3F,R1,R2)
+# define SWR(R1,R2) RR_(0x2F,R1,R2)
+# define SU(R1,D2,X2,B2) RX_(0x7F,R1,X2,B2,D2)
+# define SW(R1,D2,X2,B2) RX_(0x6F,R1,X2,B2,D2)
+/****************************************************************
+ * Binary Floating Point Instructions *
+ ****************************************************************/
+/* ADD */
+# define AEBR(R1,R2) RRE_(0xB30A,R1,R2)
+# define ADBR(R1,R2) RRE_(0xB31A,R1,R2)
+# define AXBR(R1,R2) RRE_(0xB34A,R1,R2)
+# define AEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x0A)
+# define ADB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x1A)
+/* COMPARE */
+# define CEBR(R1,R2) RRE_(0xB309,R1,R2)
+# define CDBR(R1,R2) RRE_(0xB319,R1,R2)
+# define CXBR(R1,R2) RRE_(0xB349,R1,R2)
+# define CEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x09)
+# define CDB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x19)
+/* COMPARE AND SIGNAL */
+# define KEBR(R1,R2) RRE_(0xB308,R1,R2)
+# define KDBR(R1,R2) RRE_(0xB318,R1,R2)
+# define KXBR(R1,R2) RRE_(0xB348,R1,R2)
+# define KEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x08)
+# define KDB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x18)
+/* CONVERT FROM FIXED */
+# define CEFBR(R1,R2) RRE_(0xB394,R1,R2)
+# define CDFBR(R1,R2) RRE_(0xB395,R1,R2)
+# define CXFBR(R1,R2) RRE_(0xB396,R1,R2)
+# define CEGBR(R1,R2) RRE_(0xB3A4,R1,R2)
+# define CDGBR(R1,R2) RRE_(0xB3A5,R1,R2)
+# define CXGBR(R1,R2) RRE_(0xB3A6,R1,R2)
+/* CONVERT TO FIXED */
+# define CFEBR(R1,M3,R2) RRF_(0xB398,M3,0,R1,R2)
+# define CFDBR(R1,M3,R2) RRF_(0xB399,M3,0,R1,R2)
+# define CFXBR(R1,M3,R2) RRF_(0xB39A,M3,0,R1,R2)
+# define CGEBR(R1,M3,R2) RRF_(0xB3A8,M3,0,R1,R2)
+# define CGDBR(R1,M3,R2) RRF_(0xB3A9,M3,0,R1,R2)
+# define CGXBR(R1,M3,R2) RRF_(0xB3AA,M3,0,R1,R2)
+/* DIVIDE */
+# define DEBR(R1,R2) RRE_(0xB30D,R1,R2)
+# define DDBR(R1,R2) RRE_(0xB31D,R1,R2)
+# define DXBR(R1,R2) RRE_(0xB34D,R1,R2)
+# define DEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x0D)
+# define DDB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x1D)
+/* DIVIDE TO INTEGER */
+# define DIEBR(R1,R3,R2,M4) RRF_(0xB353,R3,M4,R1,R2)
+# define DIDBR(R1,R3,R2,M4) RRF_(0xB35B,R3,M4,R1,R2)
+/* EXTRACT FPC */
+# define EFPC(R1) RRE_(0xB38C,R1,0)
+/* LOAD AND TEST */
+# define LTEBR(R1,R2) RRE_(0xB302,R1,R2)
+# define LTDBR(R1,R2) RRE_(0xB312,R1,R2)
+# define LTXBR(R1,R2) RRE_(0xB342,R1,R2)
+/* LOAD COMPLEMENT */
+# define LCEBR(R1,R2) RRE_(0xB303,R1,R2)
+# define LCDBR(R1,R2) RRE_(0xB313,R1,R2)
+# define LCXBR(R1,R2) RRE_(0xB343,R1,R2)
+/* LOAD FP INTEGER */
+# define FIEBR(R1,M3,R2) RRF_(0xB357,M3,0,R1,R2)
+# define FIDBR(R1,M3,R2) RRF_(0xB35F,M3,0,R1,R2)
+# define FIXBR(R1,M3,R2) RRF_(0xB347,M3,0,R1,R2)
+/* LOAD FPC */
+# define LFPC(D2,B2) S_(0xB29D,B2,D2)
+/* LOAD LENGTHENED */
+# define LDEBR(R1,R2) RRE_(0xB304,R1,R2)
+# define LXDBR(R1,R2) RRE_(0xB305,R1,R2)
+# define LXEBR(R1,R2) RRE_(0xB306,R1,R2)
+# define LDEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x04)
+# define LXDB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x05)
+# define LXEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x06)
+/* LOAD NEGATIVE */
+# define LNEBR(R1,R2) RRE_(0xB301,R1,R2)
+# define LNDBR(R1,R2) RRE_(0xB311,R1,R2)
+# define LNXBR(R1,R2) RRE_(0xB341,R1,R2)
+/* LOAD POSITIVE */
+# define LPEBR(R1,R2) RRE_(0xB300,R1,R2)
+# define LPDBR(R1,R2) RRE_(0xB310,R1,R2)
+# define LPXBR(R1,R2) RRE_(0xB340,R1,R2)
+/* LOAD ROUNDED */
+# define LEDBR(R1,R2) RRE_(0xB344,R1,R2)
+# define LDXBR(R1,R2) RRE_(0xB345,R1,R2)
+# define LEXBR(R1,R2) RRE_(0xB346,R1,R2)
+/* MULTIPLY */
+# define MEEBR(R1,R2) RRE_(0xB317,R1,R2)
+# define MDBR(R1,R2) RRE_(0xB31C,R1,R2)
+# define MXBR(R1,R2) RRE_(0xB34C,R1,R2)
+# define MDEBR(R1,R2) RRE_(0xB30C,R1,R2)
+# define MXDBR(R1,R2) RRE_(0xB307,R1,R2)
+# define MEEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x17)
+# define MDB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x1C)
+# define MDEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x0C)
+# define MXDB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x07)
+/* MULTIPLY AND ADD */
+# define MAEBR(R1,R3,R2) RRF_(0xB30E,R1,0,R3,R2)
+# define MADBR(R1,R3,R2) RRF_(0xB31E,R1,0,R3,R2)
+# define MAEB(R1,R3,D2,X2,B2) RXF_(0xED,R3,X2,B2,D2,R1,0x0E)
+# define MADB(R1,R3,D2,X2,B2) RXF_(0xED,R3,X2,B2,D2,R1,0x1E)
+/* MULTIPLY AND SUBTRACT */
+# define MSEBR(R1,R3,R2) RRF_(0xB30F,R1,0,R3,R2)
+# define MSDBR(R1,R3,R2) RRF_(0xB31F,R1,0,R3,R2)
+# define MSEB(R1,R3,D2,X2,B2) RXF_(0xED,R3,X2,B2,D2,R1,0x0F)
+# define MSDB(R1,R3,D2,X2,B2) RXF_(0xED,R3,X2,B2,D2,R1,0x1F)
+/* SET FPC */
+# define SFPC(R1) RRE_(0xB384,R1,0)
+/* SET ROUNDING MODE */
+# define SRNM(D2,B2) S_(0xB299,B2,D2)
+/* SQUARE ROOT */
+# define SQEBR(R1,R2) RRE_(0xB314,R1,R2)
+# define SQDBR(R1,R2) RRE_(0xB315,R1,R2)
+# define SQXBR(R1,R2) RRE_(0xB316,R1,R2)
+/* STORE FPC */
+# define STFPC(D2,B2) S_(0xB29C,B2,D2)
+/* SUBTRACT */
+# define SEBR(R1,R2) RRE_(0xB30B,R1,R2)
+# define SDBR(R1,R2) RRE_(0xB31B,R1,R2)
+# define SXBR(R1,R2) RRE_(0xB34B,R1,R2)
+# define SEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x0B)
+# define SDB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x1B)
+/* TEST DATA CLASS */
+# define TCEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x10)
+# define TCDB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x11)
+# define TCXB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x12)
+# define fp(code,r0,r1,i0) _fp(_jit,jit_code_##code##i_f,r0,r1,i0)
+static void _fp(jit_state_t*,jit_code_t,
+ int32_t,int32_t,jit_float32_t*);
+# define dp(code,r0,r1,i0) _dp(_jit,jit_code_##code##i_d,r0,r1,i0)
+static void _dp(jit_state_t*,jit_code_t,
+ int32_t,int32_t,jit_float64_t*);
+# define fr(cc,r0,r1,r2) _fr(_jit,cc,r0,r1,r2)
+static void _fr(jit_state_t*,int32_t,
+ int32_t,int32_t,int32_t);
+# define dr(cc,r0,r1,r2) _dr(_jit,cc,r0,r1,r2)
+static void _dr(jit_state_t*,int32_t,
+ int32_t,int32_t,int32_t);
+# define fi(cc,r0,r1,i0) _fi(_jit,cc,r0,r1,i0)
+static void _fi(jit_state_t*,int32_t,
+ int32_t,int32_t,jit_float32_t*);
+# define di(cc,r0,r1,i0) _di(_jit,cc,r0,r1,i0)
+static void _di(jit_state_t*,int32_t,
+ int32_t,int32_t,jit_float64_t*);
+# define bfr(cc,i0,r0,r1) _bfr(_jit,cc,i0,r0,r1)
+static void _bfr(jit_state_t*,int32_t,
+ jit_word_t,int32_t,int32_t);
+# define bdr(cc,i0,r0,r1) _bdr(_jit,cc,i0,r0,r1)
+static void _bdr(jit_state_t*,int32_t,
+ jit_word_t,int32_t,int32_t);
+# define bfr_p(cc,i0,r0,r1) _bfr_p(_jit,cc,i0,r0,r1)
+static jit_word_t _bfr_p(jit_state_t*,int32_t,
+ jit_word_t,int32_t,int32_t);
+# define bdr_p(cc,i0,r0,r1) _bdr_p(_jit,cc,i0,r0,r1)
+static jit_word_t _bdr_p(jit_state_t*,int32_t,
+ jit_word_t,int32_t,int32_t);
+# define bfi(cc,i0,r0,i1) _bfi(_jit,cc,i0,r0,i1)
+static void _bfi(jit_state_t*,int32_t,
+ jit_word_t,int32_t,jit_float32_t*);
+# define bdi(cc,i0,r0,i1) _bdi(_jit,cc,i0,r0,i1)
+static void _bdi(jit_state_t*,int32_t,
+ jit_word_t,int32_t,jit_float64_t*);
+# define bfi_p(cc,i0,r0,i1) _bfi_p(_jit,cc,i0,r0,i1)
+static jit_word_t _bfi_p(jit_state_t*,int32_t,
+ jit_word_t,int32_t,jit_float32_t*);
+# define bdi_p(cc,i0,r0,i1) _bdi_p(_jit,cc,i0,r0,i1)
+static jit_word_t _bdi_p(jit_state_t*,int32_t,
+ jit_word_t,int32_t,jit_float64_t*);
+# define buneqr(db,i0,r0,r1) _buneqr(_jit,db,i0,r0,r1)
+static jit_word_t _buneqr(jit_state_t*,int32_t,
+ jit_word_t,int32_t,int32_t);
+# define buneqi(db,i0,r0,i1) _buneqi(_jit,db,i0,r0,(jit_word_t)i1)
+static jit_word_t _buneqi(jit_state_t*,int32_t,
+ jit_word_t,int32_t,jit_word_t);
+# define bltgtr(db,i0,r0,r1) _bltgtr(_jit,db,i0,r0,r1)
+static jit_word_t _bltgtr(jit_state_t*,int32_t,
+ jit_word_t,int32_t,int32_t);
+# define bltgti(db,i0,r0,i1) _bltgti(_jit,db,i0,r0,(jit_word_t)i1)
+static jit_word_t _bltgti(jit_state_t*,int32_t,
+ jit_word_t,int32_t,jit_word_t);
+# define movr_f(r0,r1) _movr_f(_jit,r0,r1)
+static void _movr_f(jit_state_t*,int32_t,int32_t);
+# define movi_f(r0,i0) _movi_f(_jit,r0,i0)
+static void _movi_f(jit_state_t*,int32_t,jit_float32_t*);
+# define movr_d(r0,r1) _movr_d(_jit,r0,r1)
+static void _movr_d(jit_state_t*,int32_t,int32_t);
+# define movi_d(r0,i0) _movi_d(_jit,r0,i0)
+static void _movi_d(jit_state_t*,int32_t,jit_float64_t*);
+# define absr_f(r0,r1) LPEBR(r0,r1)
+# define absr_d(r0,r1) LPDBR(r0,r1)
+# define negr_f(r0,r1) LCEBR(r0,r1)
+# define negr_d(r0,r1) LCDBR(r0,r1)
+# define sqrtr_f(r0,r1) SQEBR(r0,r1)
+# define sqrtr_d(r0,r1) SQDBR(r0,r1)
+# define truncr_f_i(r0,r1) CFEBR(r0,RND_ZERO,r1)
+# define truncr_d_i(r0,r1) CFDBR(r0,RND_ZERO,r1)
+# if __WORDSIZE == 64
+# define truncr_f_l(r0,r1) CGEBR(r0,RND_ZERO,r1)
+# define truncr_d_l(r0,r1) CGDBR(r0,RND_ZERO,r1)
+# endif
+# if __WORDSIZE == 32
+# define extr_f(r0,r1) CEFBR(r0,r1)
+# define extr_d(r0,r1) CDFBR(r0,r1)
+# else
+# define extr_f(r0,r1) CEGBR(r0,r1)
+# define extr_d(r0,r1) CDGBR(r0,r1)
+# endif
+# define extr_d_f(r0,r1) LEDBR(r0,r1)
+# define extr_f_d(r0,r1) LDEBR(r0,r1)
+# define addr_f(r0,r1,r2) _addr_f(_jit,r0,r1,r2)
+static void _addr_f(jit_state_t*,int32_t,int32_t,int32_t);
+# define addi_f(r0,r1,i0) fp(add,r0,r1,i0)
+# define addr_d(r0,r1,r2) _addr_d(_jit,r0,r1,r2)
+static void _addr_d(jit_state_t*,int32_t,int32_t,int32_t);
+# define addi_d(r0,r1,i0) dp(add,r0,r1,i0)
+# define subr_f(r0,r1,r2) _subr_f(_jit,r0,r1,r2)
+static void _subr_f(jit_state_t*,int32_t,int32_t,int32_t);
+# define subi_f(r0,r1,i0) fp(sub,r0,r1,i0)
+# define subr_d(r0,r1,r2) _subr_d(_jit,r0,r1,r2)
+static void _subr_d(jit_state_t*,int32_t,int32_t,int32_t);
+# define subi_d(r0,r1,i0) dp(sub,r0,r1,i0)
+# define rsbr_f(r0,r1,r2) subr_f(r0,r2,r1)
+# define rsbi_f(r0,r1,i0) fp(rsb,r0,r1,i0)
+# define rsbr_d(r0,r1,r2) subr_d(r0,r2,r1)
+# define rsbi_d(r0,r1,i0) dp(rsb,r0,r1,i0)
+# define mulr_f(r0,r1,r2) _mulr_f(_jit,r0,r1,r2)
+static void _mulr_f(jit_state_t*,int32_t,int32_t,int32_t);
+# define muli_f(r0,r1,i0) fp(mul,r0,r1,i0)
+# define mulr_d(r0,r1,r2) _mulr_d(_jit,r0,r1,r2)
+static void _mulr_d(jit_state_t*,int32_t,int32_t,int32_t);
+# define muli_d(r0,r1,i0) dp(mul,r0,r1,i0)
+# define divr_f(r0,r1,r2) _divr_f(_jit,r0,r1,r2)
+static void _divr_f(jit_state_t*,int32_t,int32_t,int32_t);
+# define divi_f(r0,r1,i0) fp(div,r0,r1,i0)
+# define divr_d(r0,r1,r2) _divr_d(_jit,r0,r1,r2)
+static void _divr_d(jit_state_t*,int32_t,int32_t,int32_t);
+# define divi_d(r0,r1,i0) dp(div,r0,r1,i0)
+# define ldr_f(r0,r1) LE(r0,0,0,r1)
+# define ldr_d(r0,r1) LD(r0,0,0,r1)
+# define ldi_f(r0,i0) _ldi_f(_jit,r0,i0)
+static void _ldi_f(jit_state_t*,int32_t,jit_word_t);
+# define ldi_d(r0,i0) _ldi_d(_jit,r0,i0)
+static void _ldi_d(jit_state_t*,int32_t,jit_word_t);
+# define ldxr_f(r0,r1,r2) _ldxr_f(_jit,r0,r1,r2)
+static void _ldxr_f(jit_state_t*,int32_t,int32_t,int32_t);
+# define ldxr_d(r0,r1,r2) _ldxr_d(_jit,r0,r1,r2)
+static void _ldxr_d(jit_state_t*,int32_t,int32_t,int32_t);
+# define ldxi_f(r0,r1,i0) _ldxi_f(_jit,r0,r1,i0)
+static void _ldxi_f(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define ldxi_d(r0,r1,i0) _ldxi_d(_jit,r0,r1,i0)
+static void _ldxi_d(jit_state_t*,int32_t,int32_t,jit_word_t);
+# define str_f(r0,r1) STE(r1,0,0,r0)
+# define str_d(r0,r1) STD(r1,0,0,r0)
+# define sti_f(i0,r0) _sti_f(_jit,i0,r0)
+static void _sti_f(jit_state_t*,jit_word_t,int32_t);
+# define sti_d(i0,r0) _sti_d(_jit,i0,r0)
+static void _sti_d(jit_state_t*,jit_word_t,int32_t);
+# define stxr_f(r0,r1,r2) _stxr_f(_jit,r0,r1,r2)
+static void _stxr_f(jit_state_t*,int32_t,int32_t,int32_t);
+# define stxr_d(r0,r1,r2) _stxr_d(_jit,r0,r1,r2)
+static void _stxr_d(jit_state_t*,int32_t,int32_t,int32_t);
+# define stxi_f(i0,r0,r1) _stxi_f(_jit,i0,r0,r1)
+static void _stxi_f(jit_state_t*,jit_word_t,int32_t,int32_t);
+# define stxi_d(i0,r0,r1) _stxi_d(_jit,i0,r0,r1)
+static void _stxi_d(jit_state_t*,jit_word_t,int32_t,int32_t);
+# define ltr_f(r0,r1,r2) fr(CC_L,r0,r1,r2)
+# define ltr_d(r0,r1,r2) dr(CC_L,r0,r1,r2)
+# define lti_f(r0,r1,i0) fi(CC_L,r0,r1,i0)
+# define lti_d(r0,r1,i0) di(CC_L,r0,r1,i0)
+# define ler_f(r0,r1,r2) fr(CC_LE,r0,r1,r2)
+# define ler_d(r0,r1,r2) dr(CC_LE,r0,r1,r2)
+# define lei_f(r0,r1,i0) fi(CC_LE,r0,r1,i0)
+# define lei_d(r0,r1,i0) di(CC_LE,r0,r1,i0)
+# define eqr_f(r0,r1,r2) fr(CC_E,r0,r1,r2)
+# define eqr_d(r0,r1,r2) dr(CC_E,r0,r1,r2)
+# define eqi_f(r0,r1,i0) fi(CC_E,r0,r1,i0)
+# define eqi_d(r0,r1,i0) di(CC_E,r0,r1,i0)
+# define ger_f(r0,r1,r2) fr(CC_HE,r0,r1,r2)
+# define ger_d(r0,r1,r2) dr(CC_HE,r0,r1,r2)
+# define gei_f(r0,r1,i0) fi(CC_HE,r0,r1,i0)
+# define gei_d(r0,r1,i0) di(CC_HE,r0,r1,i0)
+# define gtr_f(r0,r1,r2) fr(CC_H,r0,r1,r2)
+# define gtr_d(r0,r1,r2) dr(CC_H,r0,r1,r2)
+# define gti_f(r0,r1,i0) fi(CC_H,r0,r1,i0)
+# define gti_d(r0,r1,i0) di(CC_H,r0,r1,i0)
+# define ner_f(r0,r1,r2) fr(CC_NE,r0,r1,r2)
+# define ner_d(r0,r1,r2) dr(CC_NE,r0,r1,r2)
+# define nei_f(r0,r1,i0) fi(CC_NE,r0,r1,i0)
+# define nei_d(r0,r1,i0) di(CC_NE,r0,r1,i0)
+# define unltr_f(r0,r1,r2) fr(CC_NHE,r0,r1,r2)
+# define unltr_d(r0,r1,r2) dr(CC_NHE,r0,r1,r2)
+# define unlti_f(r0,r1,i0) fi(CC_NHE,r0,r1,i0)
+# define unlti_d(r0,r1,i0) di(CC_NHE,r0,r1,i0)
+# define unler_f(r0,r1,r2) fr(CC_NH,r0,r1,r2)
+# define unler_d(r0,r1,r2) dr(CC_NH,r0,r1,r2)
+# define unlei_f(r0,r1,i0) fi(CC_NH,r0,r1,i0)
+# define unlei_d(r0,r1,i0) di(CC_NH,r0,r1,i0)
+# define uneqr_f(r0,r1,r2) _uneqr_f(_jit,r0,r1,r2)
+static void _uneqr_f(jit_state_t*,int32_t,int32_t,int32_t);
+# define uneqr_d(r0,r1,r2) _uneqr_d(_jit,r0,r1,r2)
+static void _uneqr_d(jit_state_t*,int32_t,int32_t,int32_t);
+# define uneqi_f(r0,r1,i0) fp(uneq,r0,r1,i0)
+# define uneqi_d(r0,r1,i0) dp(uneq,r0,r1,i0)
+# define unger_f(r0,r1,r2) fr(CC_NL,r0,r1,r2)
+# define unger_d(r0,r1,r2) dr(CC_NL,r0,r1,r2)
+# define ungei_f(r0,r1,i0) fi(CC_NL,r0,r1,i0)
+# define ungei_d(r0,r1,i0) di(CC_NL,r0,r1,i0)
+# define ungtr_f(r0,r1,r2) fr(CC_NLE,r0,r1,r2)
+# define ungtr_d(r0,r1,r2) dr(CC_NLE,r0,r1,r2)
+# define ungti_f(r0,r1,i0) fi(CC_NLE,r0,r1,i0)
+# define ungti_d(r0,r1,i0) di(CC_NLE,r0,r1,i0)
+# define ltgtr_f(r0,r1,r2) _ltgtr_f(_jit,r0,r1,r2)
+static void _ltgtr_f(jit_state_t*,int32_t,int32_t,int32_t);
+# define ltgtr_d(r0,r1,r2) _ltgtr_d(_jit,r0,r1,r2)
+static void _ltgtr_d(jit_state_t*,int32_t,int32_t,int32_t);
+# define ltgti_f(r0,r1,i0) fp(ltgt,r0,r1,i0)
+# define ltgti_d(r0,r1,i0) dp(ltgt,r0,r1,i0)
+# define ordr_f(r0,r1,r2) fr(CC_NO,r0,r1,r2)
+# define ordr_d(r0,r1,r2) dr(CC_NO,r0,r1,r2)
+# define ordi_f(r0,r1,i0) fi(CC_NO,r0,r1,i0)
+# define ordi_d(r0,r1,i0) di(CC_NO,r0,r1,i0)
+# define unordr_f(r0,r1,r2) fr(CC_O,r0,r1,r2)
+# define unordr_d(r0,r1,r2) dr(CC_O,r0,r1,r2)
+# define unordi_f(r0,r1,i0) fi(CC_O,r0,r1,i0)
+# define unordi_d(r0,r1,i0) di(CC_O,r0,r1,i0)
+# define bltr_f(i0,r0,r1) bfr(CC_L,i0,r0,r1)
+# define bltr_d(i0,r0,r1) bdr(CC_L,i0,r0,r1)
+# define blti_f(i0,r0,i1) bfi(CC_L,i0,r0,i1)
+# define blti_d(i0,r0,i1) bdi(CC_L,i0,r0,i1)
+# define bltr_f_p(i0,r0,r1) bfr_p(CC_L,i0,r0,r1)
+# define bltr_d_p(i0,r0,r1) bdr_p(CC_L,i0,r0,r1)
+# define blti_f_p(i0,r0,i1) bfi_p(CC_L,i0,r0,i1)
+# define blti_d_p(i0,r0,i1) bdi_p(CC_L,i0,r0,i1)
+# define bler_f(i0,r0,r1) bfr(CC_LE,i0,r0,r1)
+# define bler_d(i0,r0,r1) bdr(CC_LE,i0,r0,r1)
+# define blei_f(i0,r0,i1) bfi(CC_LE,i0,r0,i1)
+# define blei_d(i0,r0,i1) bdi(CC_LE,i0,r0,i1)
+# define bler_f_p(i0,r0,r1) bfr_p(CC_LE,i0,r0,r1)
+# define bler_d_p(i0,r0,r1) bdr_p(CC_LE,i0,r0,r1)
+# define blei_f_p(i0,r0,i1) bfi_p(CC_LE,i0,r0,i1)
+# define blei_d_p(i0,r0,i1) bdi_p(CC_LE,i0,r0,i1)
+# define beqr_f(i0,r0,r1) bfr(CC_E,i0,r0,r1)
+# define beqr_d(i0,r0,r1) bdr(CC_E,i0,r0,r1)
+# define beqi_f(i0,r0,i1) bfi(CC_E,i0,r0,i1)
+# define beqi_d(i0,r0,i1) bdi(CC_E,i0,r0,i1)
+# define beqr_f_p(i0,r0,r1) bfr_p(CC_E,i0,r0,r1)
+# define beqr_d_p(i0,r0,r1) bdr_p(CC_E,i0,r0,r1)
+# define beqi_f_p(i0,r0,i1) bfi_p(CC_E,i0,r0,i1)
+# define beqi_d_p(i0,r0,i1) bdi_p(CC_E,i0,r0,i1)
+# define bger_f(i0,r0,r1) bfr(CC_HE,i0,r0,r1)
+# define bger_d(i0,r0,r1) bdr(CC_HE,i0,r0,r1)
+# define bgei_f(i0,r0,i1) bfi(CC_HE,i0,r0,i1)
+# define bgei_d(i0,r0,i1) bdi(CC_HE,i0,r0,i1)
+# define bger_f_p(i0,r0,r1) bfr_p(CC_HE,i0,r0,r1)
+# define bger_d_p(i0,r0,r1) bdr_p(CC_HE,i0,r0,r1)
+# define bgei_f_p(i0,r0,i1) bfi_p(CC_HE,i0,r0,i1)
+# define bgei_d_p(i0,r0,i1) bdi_p(CC_HE,i0,r0,i1)
+# define bgtr_f(i0,r0,r1) bfr(CC_H,i0,r0,r1)
+# define bgtr_d(i0,r0,r1) bdr(CC_H,i0,r0,r1)
+# define bgti_f(i0,r0,i1) bfi(CC_H,i0,r0,i1)
+# define bgti_d(i0,r0,i1) bdi(CC_H,i0,r0,i1)
+# define bgtr_f_p(i0,r0,r1) bfr_p(CC_H,i0,r0,r1)
+# define bgtr_d_p(i0,r0,r1) bdr_p(CC_H,i0,r0,r1)
+# define bgti_f_p(i0,r0,i1) bfi_p(CC_H,i0,r0,i1)
+# define bgti_d_p(i0,r0,i1) bdi_p(CC_H,i0,r0,i1)
+# define bner_f(i0,r0,r1) bfr(CC_NE,i0,r0,r1)
+# define bner_d(i0,r0,r1) bdr(CC_NE,i0,r0,r1)
+# define bnei_f(i0,r0,i1) bfi(CC_NE,i0,r0,i1)
+# define bnei_d(i0,r0,i1) bdi(CC_NE,i0,r0,i1)
+# define bner_f_p(i0,r0,r1) bfr_p(CC_NE,i0,r0,r1)
+# define bner_d_p(i0,r0,r1) bdr_p(CC_NE,i0,r0,r1)
+# define bnei_f_p(i0,r0,i1) bfi_p(CC_NE,i0,r0,i1)
+# define bnei_d_p(i0,r0,i1) bdi_p(CC_NE,i0,r0,i1)
+# define bunltr_f(i0,r0,r1) bfr(CC_NHE,i0,r0,r1)
+# define bunltr_d(i0,r0,r1) bdr(CC_NHE,i0,r0,r1)
+# define bunlti_f(i0,r0,i1) bfi(CC_NHE,i0,r0,i1)
+# define bunlti_d(i0,r0,i1) bdi(CC_NHE,i0,r0,i1)
+# define bunltr_f_p(i0,r0,r1) bfr_p(CC_NHE,i0,r0,r1)
+# define bunltr_d_p(i0,r0,r1) bdr_p(CC_NHE,i0,r0,r1)
+# define bunlti_f_p(i0,r0,i1) bfi_p(CC_NHE,i0,r0,i1)
+# define bunlti_d_p(i0,r0,i1) bdi_p(CC_NHE,i0,r0,i1)
+# define bunler_f(i0,r0,r1) bfr(CC_NH,i0,r0,r1)
+# define bunler_d(i0,r0,r1) bdr(CC_NH,i0,r0,r1)
+# define bunlei_f(i0,r0,i1) bfi(CC_NH,i0,r0,i1)
+# define bunlei_d(i0,r0,i1) bdi(CC_NH,i0,r0,i1)
+# define bunler_f_p(i0,r0,r1) bfr_p(CC_NH,i0,r0,r1)
+# define bunler_d_p(i0,r0,r1) bdr_p(CC_NH,i0,r0,r1)
+# define bunlei_f_p(i0,r0,i1) bfi_p(CC_NH,i0,r0,i1)
+# define bunlei_d_p(i0,r0,i1) bdi_p(CC_NH,i0,r0,i1)
+# define buneqr_f(i0,r0,r1) buneqr(0,i0,r0,r1)
+# define buneqr_d(i0,r0,r1) buneqr(1,i0,r0,r1)
+# define buneqi_f(i0,r0,i1) buneqi(0,i0,r0,i1)
+# define buneqi_d(i0,r0,i1) buneqi(1,i0,r0,i1)
+# define buneqr_f_p(i0,r0,r1) buneqr(0,i0,r0,r1)
+# define buneqr_d_p(i0,r0,r1) buneqr(1,i0,r0,r1)
+# define buneqi_f_p(i0,r0,i1) buneqi(0,i0,r0,i1)
+# define buneqi_d_p(i0,r0,i1) buneqi(1,i0,r0,i1)
+# define bunger_f(i0,r0,r1) bfr(CC_NL,i0,r0,r1)
+# define bunger_d(i0,r0,r1) bdr(CC_NL,i0,r0,r1)
+# define bungei_f(i0,r0,i1) bfi(CC_NL,i0,r0,i1)
+# define bungei_d(i0,r0,i1) bdi(CC_NL,i0,r0,i1)
+# define bunger_f_p(i0,r0,r1) bfr_p(CC_NL,i0,r0,r1)
+# define bunger_d_p(i0,r0,r1) bdr_p(CC_NL,i0,r0,r1)
+# define bungei_f_p(i0,r0,i1) bfi_p(CC_NL,i0,r0,i1)
+# define bungei_d_p(i0,r0,i1) bdi_p(CC_NL,i0,r0,i1)
+# define bungtr_f(i0,r0,r1) bfr(CC_NLE,i0,r0,r1)
+# define bungtr_d(i0,r0,r1) bdr(CC_NLE,i0,r0,r1)
+# define bungti_f(i0,r0,i1) bfi(CC_NLE,i0,r0,i1)
+# define bungti_d(i0,r0,i1) bdi(CC_NLE,i0,r0,i1)
+# define bungtr_f_p(i0,r0,r1) bfr_p(CC_NLE,i0,r0,r1)
+# define bungtr_d_p(i0,r0,r1) bdr_p(CC_NLE,i0,r0,r1)
+# define bungti_f_p(i0,r0,i1) bfi_p(CC_NLE,i0,r0,i1)
+# define bungti_d_p(i0,r0,i1) bdi_p(CC_NLE,i0,r0,i1)
+# define bltgtr_f(i0,r0,r1) bltgtr(0,i0,r0,r1)
+# define bltgtr_d(i0,r0,r1) bltgtr(1,i0,r0,r1)
+# define bltgti_f(i0,r0,i1) bltgti(0,i0,r0,i1)
+# define bltgti_d(i0,r0,i1) bltgti(1,i0,r0,i1)
+# define bltgtr_f_p(i0,r0,r1) bltgtr(0,i0,r0,r1)
+# define bltgtr_d_p(i0,r0,r1) bltgtr(1,i0,r0,r1)
+# define bltgti_f_p(i0,r0,i1) bltgti(0,i0,r0,i1)
+# define bltgti_d_p(i0,r0,i1) bltgti(1,i0,r0,i1)
+# define bordr_f(i0,r0,r1) bfr(CC_NO,i0,r0,r1)
+# define bordr_d(i0,r0,r1) bdr(CC_NO,i0,r0,r1)
+# define bordi_f(i0,r0,i1) bfi(CC_NO,i0,r0,i1)
+# define bordi_d(i0,r0,i1) bdi(CC_NO,i0,r0,i1)
+# define bordr_f_p(i0,r0,r1) bfr_p(CC_NO,i0,r0,r1)
+# define bordr_d_p(i0,r0,r1) bdr_p(CC_NO,i0,r0,r1)
+# define bordi_f_p(i0,r0,i1) bfi_p(CC_NO,i0,r0,i1)
+# define bordi_d_p(i0,r0,i1) bdi_p(CC_NO,i0,r0,i1)
+# define bunordr_f(i0,r0,r1) bfr(CC_O,i0,r0,r1)
+# define bunordr_d(i0,r0,r1) bdr(CC_O,i0,r0,r1)
+# define bunordi_f(i0,r0,i1) bfi(CC_O,i0,r0,i1)
+# define bunordi_d(i0,r0,i1) bdi(CC_O,i0,r0,i1)
+# define bunordr_f_p(i0,r0,r1) bfr_p(CC_O,i0,r0,r1)
+# define bunordr_d_p(i0,r0,r1) bdr_p(CC_O,i0,r0,r1)
+# define bunordi_f_p(i0,r0,i1) bfi_p(CC_O,i0,r0,i1)
+# define bunordi_d_p(i0,r0,i1) bdi_p(CC_O,i0,r0,i1)
+# define vaarg_d(r0, r1) _vaarg_d(_jit, r0, r1)
+static void _vaarg_d(jit_state_t*, int32_t, int32_t);
+#endif
+
+#if CODE
+static void
+_fp(jit_state_t *_jit, jit_code_t code,
+ int32_t r0, int32_t r1, jit_float32_t *i0)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_fpr);
+ movi_f(rn(reg), i0);
+ switch (code) {
+ case jit_code_addi_f: addr_f(r0, r1, rn(reg)); break;
+ case jit_code_subi_f: subr_f(r0, r1, rn(reg)); break;
+ case jit_code_rsbi_f: rsbr_f(r0, r1, rn(reg)); break;
+ case jit_code_muli_f: mulr_f(r0, r1, rn(reg)); break;
+ case jit_code_divi_f: divr_f(r0, r1, rn(reg)); break;
+ case jit_code_uneqi_f: uneqr_f(r0, r1, rn(reg)); break;
+ case jit_code_ltgti_f: ltgtr_f(r0, r1, rn(reg)); break;
+ default: abort();
+ }
+ jit_unget_reg(reg);
+}
+
+static void
+_dp(jit_state_t *_jit, jit_code_t code,
+ int32_t r0, int32_t r1, jit_float64_t *i0)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_fpr);
+ movi_d(rn(reg), i0);
+ switch (code) {
+ case jit_code_addi_d: addr_d(r0, r1, rn(reg)); break;
+ case jit_code_subi_d: subr_d(r0, r1, rn(reg)); break;
+ case jit_code_rsbi_d: rsbr_d(r0, r1, rn(reg)); break;
+ case jit_code_muli_d: mulr_d(r0, r1, rn(reg)); break;
+ case jit_code_divi_d: divr_d(r0, r1, rn(reg)); break;
+ case jit_code_uneqi_d: uneqr_d(r0, r1, rn(reg)); break;
+ case jit_code_ltgti_d: ltgtr_d(r0, r1, rn(reg)); break;
+ default: abort();
+ }
+ jit_unget_reg(reg);
+}
+
+static void
+_fr(jit_state_t *_jit, int32_t cc,
+ int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_word_t w;
+ LGHI(r0, 1);
+ CEBR(r1, r2);
+ w = _jit->pc.w;
+ BRC(cc, 0);
+ LGHI(r0, 0);
+ patch_at(w, _jit->pc.w);
+}
+
+static void
+_dr(jit_state_t *_jit, int32_t cc,
+ int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_word_t w;
+ LGHI(r0, 1);
+ CDBR(r1, r2);
+ w = _jit->pc.w;
+ BRC(cc, 0);
+ LGHI(r0, 0);
+ patch_at(w, _jit->pc.w);
+}
+
+static void
+_fi(jit_state_t *_jit, int32_t cc,
+ int32_t r0, int32_t r1, jit_float32_t *i0)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+ movi_f(rn(reg), i0);
+ fr(cc, r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_di(jit_state_t *_jit, int32_t cc,
+ int32_t r0, int32_t r1, jit_float64_t *i0)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+ movi_d(rn(reg), i0);
+ dr(cc, r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+
+static void
+_bfr(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, int32_t r1)
+{
+ jit_word_t d;
+ CEBR(r0, r1);
+ d = (i0 - _jit->pc.w) >> 1;
+ if (s16_p(d))
+ BRC(cc, x16(d));
+ else {
+ assert(s32_p(d));
+ BRCL(cc, d);
+ }
+}
+
+static void
+_bdr(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, int32_t r1)
+{
+ jit_word_t d;
+ CDBR(r0, r1);
+ d = (i0 - _jit->pc.w) >> 1;
+ if (s16_p(d))
+ BRC(cc, x16(d));
+ else {
+ assert(s32_p(d));
+ BRCL(cc, d);
+ }
+}
+
+static jit_word_t
+_bfr_p(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, int32_t r1)
+{
+ jit_word_t w;
+ CEBR(r0, r1);
+ w = _jit->pc.w;
+ BRCL(cc, 0);
+ return (w);
+}
+
+static jit_word_t
+_bdr_p(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, int32_t r1)
+{
+ jit_word_t w;
+ CDBR(r0, r1);
+ w = _jit->pc.w;
+ BRCL(cc, 0);
+ return (w);
+}
+
+static void
+_bfi(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, jit_float32_t *i1)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+ movi_f(rn(reg), i1);
+ bfr(cc, i0, r0, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_bdi(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, jit_float64_t *i1)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+ movi_d(rn(reg), i1);
+ bdr(cc, i0, r0, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static jit_word_t
+_bfi_p(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, jit_float32_t *i1)
+{
+ jit_word_t w;
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+ movi_f(rn(reg), i1);
+ w = bfr_p(cc, i0, r0, rn(reg));
+ jit_unget_reg(reg);
+ return (w);
+}
+
+static jit_word_t
+_bdi_p(jit_state_t *_jit, int32_t cc,
+ jit_word_t i0, int32_t r0, jit_float64_t *i1)
+{
+ jit_word_t w;
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+ movi_d(rn(reg), i1);
+ w = bdr_p(cc, i0, r0, rn(reg));
+ jit_unget_reg(reg);
+ return (w);
+}
+
+static jit_word_t
+_buneqr(jit_state_t *_jit, int32_t db,
+ jit_word_t i0, int32_t r0, int32_t r1)
+{
+ jit_word_t unord, ne, w;
+ if (db) CDBR(r0, r1);
+ else CEBR(r0, r1);
+ unord = _jit->pc.w;
+ BRC(CC_O, 0); /* unord satisfies condition */
+ ne = _jit->pc.w;
+ BRC(CC_NE, 0); /* ne does not satisfy condition */
+ patch_at(unord, _jit->pc.w);
+ w = _jit->pc.w;
+ BRCL(CC_AL, (i0 - _jit->pc.w) >> 1);
+ patch_at(ne, _jit->pc.w);
+ return (w);
+}
+
+static jit_word_t
+_buneqi(jit_state_t *_jit, int32_t db,
+ jit_word_t i0, int32_t r0, jit_word_t i1)
+{
+ jit_word_t w;
+ int32_t reg;
+ reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+ if (db)
+ movi_d(rn(reg), (jit_float64_t *)i1);
+ else
+ movi_f(rn(reg), (jit_float32_t *)i1);
+ w = buneqr(db, i0, r0, rn(reg));
+ jit_unget_reg(reg);
+ return (w);
+}
+
+static jit_word_t
+_bltgtr(jit_state_t *_jit, int32_t db,
+ jit_word_t i0, int32_t r0, int32_t r1)
+{
+ jit_word_t unord, eq, w;
+ if (db) CDBR(r0, r1);
+ else CEBR(r0, r1);
+ unord = _jit->pc.w;
+ BRC(CC_O, 0); /* unord does not satisfy condition */
+ eq = _jit->pc.w;
+ BRC(CC_E, 0); /* eq does not satisfy condition */
+ w = _jit->pc.w;
+ BRCL(CC_AL, (i0 - _jit->pc.w) >> 1);
+ patch_at(unord, _jit->pc.w);
+ patch_at(eq, _jit->pc.w);
+ return (w);
+}
+
+static jit_word_t
+_bltgti(jit_state_t *_jit, int32_t db,
+ jit_word_t i0, int32_t r0, jit_word_t i1)
+{
+ jit_word_t w;
+ int32_t reg;
+ reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
+ if (db)
+ movi_d(rn(reg), (jit_float64_t *)i1);
+ else
+ movi_f(rn(reg), (jit_float32_t *)i1);
+ w = bltgtr(db, i0, r0, rn(reg));
+ jit_unget_reg(reg);
+ return (w);
+}
+
+static void
+_movr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (r0 != r1)
+ LER(r0, r1);
+}
+
+static void
+_movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t *i0)
+{
+ union {
+ int32_t i;
+ jit_float32_t f;
+ } data;
+ int32_t reg;
+
+ if (*(int32_t *)i0 == 0)
+ LZER(r0);
+ else if (_jitc->no_data) {
+ data.f = *i0;
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), data.i & 0xffffffff);
+ stxi_i(-4, _FP_REGNO, rn(reg));
+ jit_unget_reg_but_zero(reg);
+ ldxi_f(r0, _FP_REGNO, -4);
+ }
+ else
+ ldi_f(r0, (jit_word_t)i0);
+}
+
+static void
+_movr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (r0 != r1)
+ LDR(r0, r1);
+}
+
+static void
+_movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t *i0)
+{
+ union {
+#if __WORDSIZE == 32
+ int32_t i[2];
+#else
+ int64_t l;
+#endif
+ jit_float64_t d;
+ } data;
+ int32_t reg;
+
+ if (*(int64_t *)i0 == 0)
+ LZDR(r0);
+ else if (_jitc->no_data) {
+ data.d = *i0;
+ reg = jit_get_reg_but_zero(0);
+#if __WORDSIZE == 32
+ movi(rn(reg), data.i[0]);
+ stxi_i(-8, _FP_REGNO, rn(reg));
+ movi(rn(reg), data.i[1]);
+ stxi_i(-4, _FP_REGNO, rn(reg));
+#else
+ movi(rn(reg), data.l);
+ stxi_l(-8, _FP_REGNO, rn(reg));
+#endif
+ jit_unget_reg_but_zero(reg);
+ ldxi_d(r0, _FP_REGNO, -8);
+ }
+ else
+ ldi_d(r0, (jit_word_t)i0);
+}
+
+static void
+_addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2)
+ AEBR(r0, r1);
+ else {
+ movr_f(r0, r1);
+ AEBR(r0, r2);
+ }
+}
+
+static void
+_addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2)
+ ADBR(r0, r1);
+ else {
+ movr_d(r0, r1);
+ ADBR(r0, r2);
+ }
+}
+
+static void
+_subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t reg;
+ if (r0 == r2) {
+ reg = jit_get_reg(jit_class_fpr);
+ movr_f(rn(reg), r2);
+ movr_f(r0, r1);
+ SEBR(r0, rn(reg));
+ jit_unget_reg(reg);
+ }
+ else {
+ movr_f(r0, r1);
+ SEBR(r0, r2);
+ }
+}
+
+static void
+_subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t reg;
+ if (r0 == r2) {
+ reg = jit_get_reg(jit_class_fpr);
+ movr_d(rn(reg), r2);
+ movr_d(r0, r1);
+ SDBR(r0, rn(reg));
+ jit_unget_reg(reg);
+ }
+ else {
+ movr_d(r0, r1);
+ SDBR(r0, r2);
+ }
+}
+
+static void
+_mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2)
+ MEEBR(r0, r1);
+ else {
+ movr_f(r0, r1);
+ MEEBR(r0, r2);
+ }
+}
+
+static void
+_mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2)
+ MDBR(r0, r1);
+ else {
+ movr_d(r0, r1);
+ MDBR(r0, r2);
+ }
+}
+
+static void
+_divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t reg;
+ if (r0 == r2) {
+ reg = jit_get_reg(jit_class_fpr);
+ movr_f(rn(reg), r2);
+ movr_f(r0, r1);
+ DEBR(r0, rn(reg));
+ jit_unget_reg(reg);
+ }
+ else {
+ movr_f(r0, r1);
+ DEBR(r0, r2);
+ }
+}
+
+static void
+_divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t reg;
+ if (r0 == r2) {
+ reg = jit_get_reg(jit_class_fpr);
+ movr_d(rn(reg), r2);
+ movr_d(r0, r1);
+ DDBR(r0, rn(reg));
+ jit_unget_reg(reg);
+ }
+ else {
+ movr_d(r0, r1);
+ DDBR(r0, r2);
+ }
+}
+
+static void
+_ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ ldr_f(r0, rn(reg));
+ jit_unget_reg_but_zero(reg);
+}
+
+static void
+_ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ ldr_d(r0, rn(reg));
+ jit_unget_reg_but_zero(reg);
+}
+
+static void
+_ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ movr(rn(reg), r1);
+ addr(rn(reg), rn(reg), r2);
+ ldr_f(r0, rn(reg));
+ jit_unget_reg_but_zero(reg);
+}
+
+static void
+_ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ movr(rn(reg), r1);
+ addr(rn(reg), rn(reg), r2);
+ ldr_d(r0, rn(reg));
+ jit_unget_reg_but_zero(reg);
+}
+
+static void
+_ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ if (u12_p(i0))
+ LE(r0, i0, 0, r1);
+ else if (s20_p(i0))
+ LEY(r0, x20(i0), 0, r1);
+ else {
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ addr(rn(reg), rn(reg), r1);
+ ldr_f(r0, rn(reg));
+ jit_unget_reg_but_zero(reg);
+ }
+}
+
+static void
+_ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ int32_t reg;
+ if (u12_p(i0))
+ LD(r0, i0, 0, r1);
+ else if (s20_p(i0))
+ LDY(r0, x20(i0), 0, r1);
+ else {
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ addr(rn(reg), rn(reg), r1);
+ ldr_d(r0, rn(reg));
+ jit_unget_reg_but_zero(reg);
+ }
+}
+
+static void
+_sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ str_f(rn(reg), r0);
+ jit_unget_reg_but_zero(reg);
+}
+
+static void
+_sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ str_d(rn(reg), r0);
+ jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ movr(rn(reg), r0);
+ addr(rn(reg), rn(reg), r1);
+ str_f(rn(reg), r2);
+ jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ int32_t reg;
+ reg = jit_get_reg_but_zero(0);
+ movr(rn(reg), r0);
+ addr(rn(reg), rn(reg), r1);
+ str_d(rn(reg), r2);
+ jit_unget_reg_but_zero(reg);
+}
+
+static void
+_stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ int32_t reg;
+ if (u12_p(i0))
+ STE(r1, i0, 0, r0);
+ else if (s20_p(i0))
+ STEY(r1, x20(i0), 0, r0);
+ else {
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ addr(rn(reg), rn(reg), r0);
+ str_f(rn(reg), r1);
+ jit_unget_reg_but_zero(reg);
+ }
+}
+
+static void
+_stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ int32_t reg;
+ if (u12_p(i0))
+ STD(r1, i0, 0, r0);
+ else if (s20_p(i0))
+ STDY(r1, x20(i0), 0, r0);
+ else {
+ reg = jit_get_reg_but_zero(0);
+ movi(rn(reg), i0);
+ addr(rn(reg), rn(reg), r0);
+ str_d(rn(reg), r1);
+ jit_unget_reg_but_zero(reg);
+ }
+}
+
+static void
+_uneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_word_t unord, eq;
+ movi(r0, 1); /* set to one */
+ CEBR(r1, r2);
+ unord = _jit->pc.w; /* keep set to one if unord */
+ BRC(CC_O, 0);
+ eq = _jit->pc.w;
+ BRC(CC_E, 0); /* keep set to one if eq */
+ movi(r0, 0); /* set to zero */
+ patch_at(unord, _jit->pc.w);
+ patch_at(eq, _jit->pc.w);
+}
+
+static void
+_uneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_word_t unord, eq;
+ movi(r0, 1); /* set to one */
+ CDBR(r1, r2);
+ unord = _jit->pc.w; /* keep set to one if unord */
+ BRC(CC_O, 0);
+ eq = _jit->pc.w;
+ BRC(CC_E, 0); /* keep set to one if eq */
+ movi(r0, 0); /* set to zero */
+ patch_at(unord, _jit->pc.w);
+ patch_at(eq, _jit->pc.w);
+}
+
+static void
+_ltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_word_t unord, eq;
+ movi(r0, 0); /* set to zero */
+ CEBR(r1, r2);
+ unord = _jit->pc.w; /* keep set to zero if unord */
+ BRC(CC_O, 0);
+ eq = _jit->pc.w;
+ BRC(CC_E, 0); /* keep set to zero if eq */
+ movi(r0, 1); /* set to one */
+ patch_at(unord, _jit->pc.w);
+ patch_at(eq, _jit->pc.w);
+}
+
+static void
+_ltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ jit_word_t unord, eq;
+ movi(r0, 0); /* set to zero */
+ CDBR(r1, r2);
+ unord = _jit->pc.w; /* keep set to zero if unord */
+ BRC(CC_O, 0);
+ eq = _jit->pc.w;
+ BRC(CC_E, 0); /* keep set to zero if eq */
+ movi(r0, 1); /* set to one */
+ patch_at(unord, _jit->pc.w);
+ patch_at(eq, _jit->pc.w);
+}
+
+static void
+_vaarg_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ int32_t rg0;
+ int32_t rg1;
+ int32_t rg2;
+ jit_word_t ge_code;
+ jit_word_t lt_code;
+
+ assert(_jitc->function->self.call & jit_call_varargs);
+
+ rg0 = jit_get_reg_but_zero(jit_class_gpr);
+ rg1 = jit_get_reg_but_zero(jit_class_gpr);
+
+ /* Load the fp offset in save area in the first temporary. */
+ ldxi(rn(rg0), r1, offsetof(jit_va_list_t, fpoff));
+
+ /* Jump over if there are no remaining arguments in the save area. */
+ ge_code = bgei_p(_jit->pc.w, rn(rg0), NUM_FLOAT_REG_ARGS);
+
+ /* Load the save area pointer in the second temporary. */
+ ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
+
+ /* Scale offset. */
+ rg2 = jit_get_reg_but_zero(0);
+ lshi(rn(rg2), rn(rg0), 3);
+ /* Add offset to saved area */
+ addi(rn(rg2), rn(rg2), 16 * sizeof(jit_word_t));
+
+ /* Load the vararg argument in the first argument. */
+ ldxr_d(r0, rn(rg1), rn(rg2));
+ jit_unget_reg_but_zero(rg2);
+
+ /* Update the fp offset. */
+ addi(rn(rg0), rn(rg0), 1);
+ stxi(offsetof(jit_va_list_t, fpoff), r1, rn(rg0));
+
+ /* Will only need one temporary register below. */
+ jit_unget_reg_but_zero(rg1);
+
+ /* Jump over overflow code. */
+ lt_code = jmpi_p(_jit->pc.w);
+
+ /* Where to land if argument is in overflow area. */
+ patch_at(ge_code, _jit->pc.w);
+
+ /* Load overflow pointer. */
+ ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
+
+ /* Load argument. */
+ ldr_d(r0, rn(rg0));
+
+ /* Update overflow pointer. */
+ addi(rn(rg0), rn(rg0), sizeof(jit_float64_t));
+ stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
+
+ /* Where to land if argument is in save area. */
+ patch_at(lt_code, _jit->pc.w);
+
+ jit_unget_reg_but_zero(rg0);
+}
+#endif
diff --git a/deps/lightening/lightening/s390.c b/deps/lightening/lightening/s390.c
new file mode 100644
index 0000000..41e0de4
--- /dev/null
+++ b/deps/lightening/lightening/s390.c
@@ -0,0 +1,1691 @@
+/*
+ * Copyright (C) 2013-2019 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+# define JIT_SP _R15
+# define JIT_RET _R2
+# define JIT_FRET _F0
+
+#if __WORDSIZE == 32
+# define NUM_FLOAT_REG_ARGS 2
+#else
+# define NUM_FLOAT_REG_ARGS 4
+#endif
+#define jit_arg_reg_p(i) ((i) >= 0 && (i) < 5)
+#define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < NUM_FLOAT_REG_ARGS)
+
+/*
+ * Types
+ */
+typedef struct jit_va_list {
+ /* The offsets are "1" based, as addresses are fixed in the
+ * standard stack frame format. */
+ jit_word_t gpoff;
+ jit_word_t fpoff;
+
+ /* Easier when there is an explicitly defined type...
+(gdb) ptype ap
+type = struct __va_list_tag {
+ long __gpr;
+ long __fpr;
+ void *__overflow_arg_area;
+ void *__reg_save_area;
+
+ Note that gopff (__gpr) and fpoff (__fpr) are jit_word_t equivalent
+ and, again, "1" (unit) based, so must be adjusted at va_arg time.
+ */
+ jit_pointer_t over;
+ jit_pointer_t save;
+
+ /* For variadic functions, always allocate space to save callee
+ * save fpr registers.
+ * Note that s390 has a standard stack frame format that lightning
+ * does not fully comply with, but for variadic functions it must,
+ * for those (variadic) do not use the "empty" spaces for any
+ * callee save fpr register, but save them after the va_list
+ * space; and use the standard stack frame format, as required
+ * by variadic functions (and have a compatible va_list pointer). */
+ jit_float64_t f8;
+ jit_float64_t f9;
+ jit_float64_t f10;
+ jit_float64_t f11;
+ jit_float64_t f12;
+ jit_float64_t f13;
+ jit_float64_t f14;
+ jit_float64_t f15;
+} jit_va_list_t;
+
+/*
+ * Prototypes
+ */
+#define jit_get_reg_pair() _jit_get_reg_pair(_jit)
+static int32_t _jit_get_reg_pair(jit_state_t*);
+#define jit_unget_reg_pair(regno) _jit_unget_reg_pair(_jit,regno)
+static void _jit_unget_reg_pair(jit_state_t*,int32_t);
+#define jit_get_reg_but_zero(flags) _jit_get_reg_but_zero(_jit,flags)
+static int32_t _jit_get_reg_but_zero(jit_state_t*,int32_t);
+#define jit_unget_reg_but_zero(reg) jit_unget_reg(reg)
+#define patch(instr, node) _patch(_jit, instr, node)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+
+/* libgcc */
+extern void __clear_cache(void *, void *);
+
+#define PROTO 1
+# include "s390-cpu.c"
+# include "s390-fpu.c"
+#undef PROTO
+
+/*
+ * Initialization
+ */
+static const jit_register_t _rvs[] = {
+ { rc(gpr) | 0x0, "%r0" },
+ { rc(gpr) | 0x1, "%r1" },
+ { rc(gpr) | rc(sav) | 0xc, "%r12" },
+ { rc(gpr) | rc(sav) | 0xb, "%r11" },
+ { rc(gpr) | rc(sav) | 0xa, "%r10" },
+ { rc(gpr) | rc(sav) | 0x9, "%r9" },
+ { rc(gpr) | rc(sav) | 0x8, "%r8" },
+ { rc(gpr) | rc(sav) | 0x7, "%r7" },
+ { rc(gpr) | rc(arg) | rc(sav) | 0x6,"%r6" },
+ { rc(gpr) | rc(arg) | 0x5, "%r5" },
+ { rc(gpr) | rc(arg) | 0x4, "%r4" },
+ { rc(gpr) | rc(arg) | 0x3, "%r3" },
+ { rc(gpr) | rc(arg) | 0x2, "%r2" },
+ { rc(sav) | 0xd, "%r13" }, /* used as JIT_FP */
+ { 0xe, "%r14" },
+ { rc(sav) | 0xf, "%r15" },
+ { rc(fpr) | 0x1, "%f1" },
+ { rc(fpr) | 0x3, "%f3" },
+ { rc(fpr) | 0x5, "%f5" },
+ { rc(fpr) | 0x7, "%f7" },
+ { rc(fpr) | rc(sav) | 0xe, "%f14" },
+ /* Do not use as temporary to simplify stack layout */
+ { 0xf, "%f15" },
+ { rc(fpr) | rc(sav) | 0x8, "%f8" },
+ { rc(fpr) | rc(sav) | 0x9, "%f9" },
+ { rc(fpr) | rc(sav) | 0xa, "%f10" },
+ { rc(fpr) | rc(sav) | 0xb, "%f11" },
+ { rc(fpr) | rc(sav) | 0xc, "%f12" },
+ { rc(fpr) | rc(sav) | 0xd, "%f13" },
+ { rc(fpr) | rc(arg) | 0x6, "%f6" },
+ { rc(fpr) | rc(arg) | 0x4, "%f4" },
+ { rc(fpr) | rc(arg) | 0x2, "%f2" },
+ { rc(fpr) | rc(arg) | 0x0, "%f0" },
+ { _NOREG, "<none>" },
+};
+
+/*
+ * Implementation
+ */
+void
+jit_get_cpu(void)
+{
+}
+
+void
+_jit_init(jit_state_t *_jit)
+{
+ _jitc->reglen = jit_size(_rvs) - 1;
+}
+
+void
+_jit_prolog(jit_state_t *_jit)
+{
+ int32_t offset;
+
+ if (_jitc->function)
+ jit_epilog();
+ assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
+ jit_regset_set_ui(&_jitc->regsav, 0);
+ offset = _jitc->functions.offset;
+ if (offset >= _jitc->functions.length) {
+ jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
+ _jitc->functions.length * sizeof(jit_function_t),
+ (_jitc->functions.length + 16) * sizeof(jit_function_t));
+ _jitc->functions.length += 16;
+ }
+ _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
+ _jitc->function->self.size = stack_framesize;
+ _jitc->function->self.argi = _jitc->function->self.argf =
+ _jitc->function->self.aoff = _jitc->function->self.alen = 0;
+ /* preallocate 8 bytes if not using a constant data buffer */
+ if (_jitc->no_data)
+ _jitc->function->self.aoff = -8;
+ _jitc->function->self.call = jit_call_default;
+ jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
+ _jitc->reglen * sizeof(int32_t));
+
+ /* _no_link here does not mean the jit_link() call can be removed
+ * by rewriting as:
+ * _jitc->function->prolog = jit_new_node(jit_code_prolog);
+ */
+ _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
+ jit_link(_jitc->function->prolog);
+ _jitc->function->prolog->w.w = offset;
+ _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
+ /* u: label value
+ * v: offset in blocks vector
+ * w: offset in functions vector
+ */
+ _jitc->function->epilog->w.w = offset;
+
+ jit_regset_new(&_jitc->function->regset);
+}
+
+int32_t
+_jit_allocai(jit_state_t *_jit, int32_t length)
+{
+ assert(_jitc->function);
+ switch (length) {
+ case 0: case 1: break;
+ case 2: _jitc->function->self.aoff &= -2; break;
+ case 3: case 4: _jitc->function->self.aoff &= -4; break;
+ default: _jitc->function->self.aoff &= -8; break;
+ }
+ _jitc->function->self.aoff -= length;
+ if (!_jitc->realize) {
+ jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
+ jit_dec_synth();
+ }
+ return (_jitc->function->self.aoff);
+}
+
+void
+_jit_allocar(jit_state_t *_jit, int32_t u, int32_t v)
+{
+ int32_t reg;
+ assert(_jitc->function);
+ jit_inc_synth_ww(allocar, u, v);
+ if (!_jitc->function->allocar) {
+ _jitc->function->aoffoff = jit_allocai(sizeof(int32_t));
+ _jitc->function->allocar = 1;
+ }
+ reg = jit_get_reg(jit_class_gpr);
+ jit_negr(reg, v);
+ jit_andi(reg, reg, -8);
+ jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+ jit_addr(u, u, reg);
+ jit_addr(JIT_SP, JIT_SP, reg);
+ jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+ jit_unget_reg(reg);
+ jit_dec_synth();
+}
+
+void
+_jit_ret(jit_state_t *_jit)
+{
+ jit_node_t *instr;
+ assert(_jitc->function);
+ jit_inc_synth(ret);
+ /* jump to epilog */
+ instr = jit_jmpi();
+ jit_patch_at(instr, _jitc->function->epilog);
+ jit_dec_synth();
+}
+
+void
+_jit_retr(jit_state_t *_jit, int32_t u)
+{
+ jit_inc_synth_w(retr, u);
+ jit_movr(JIT_RET, u);
+ jit_ret();
+ jit_dec_synth();
+}
+
+void
+_jit_reti(jit_state_t *_jit, jit_word_t u)
+{
+ jit_inc_synth_w(reti, u);
+ jit_movi(JIT_RET, u);
+ jit_ret();
+ jit_dec_synth();
+}
+
+void
+_jit_retr_f(jit_state_t *_jit, int32_t u)
+{
+ jit_inc_synth_w(retr_f, u);
+ jit_movr_f(JIT_FRET, u);
+ jit_ret();
+ jit_dec_synth();
+}
+
+void
+_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
+{
+ jit_inc_synth_f(reti_f, u);
+ jit_movi_f(JIT_FRET, u);
+ jit_ret();
+ jit_dec_synth();
+}
+
+void
+_jit_retr_d(jit_state_t *_jit, int32_t u)
+{
+ jit_inc_synth_w(retr_d, u);
+ jit_movr_d(JIT_FRET, u);
+ jit_ret();
+ jit_dec_synth();
+}
+
+void
+_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
+{
+ jit_inc_synth_d(reti_d, u);
+ jit_movi_d(JIT_FRET, u);
+ jit_ret();
+ jit_dec_synth();
+}
+
+void
+_jit_epilog(jit_state_t *_jit)
+{
+ assert(_jitc->function);
+ assert(_jitc->function->epilog->next == NULL);
+ jit_link(_jitc->function->epilog);
+ _jitc->function = NULL;
+}
+
+jit_bool_t
+_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
+{
+ if (u->code == jit_code_arg)
+ return (jit_arg_reg_p(u->u.w));
+ assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
+ return (jit_arg_f_reg_p(u->u.w));
+}
+
+void
+_jit_ellipsis(jit_state_t *_jit)
+{
+ jit_inc_synth(ellipsis);
+ if (_jitc->prepare) {
+ jit_link_prepare();
+ assert(!(_jitc->function->call.call & jit_call_varargs));
+ _jitc->function->call.call |= jit_call_varargs;
+ }
+ else {
+ jit_link_prolog();
+ assert(!(_jitc->function->self.call & jit_call_varargs));
+ _jitc->function->self.call |= jit_call_varargs;
+
+ /* Allocate va_list like object in the stack. */
+ _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t));
+
+ /* Initialize gp offset in save area. */
+ if (jit_arg_reg_p(_jitc->function->self.argi))
+ _jitc->function->vagp = _jitc->function->self.argi;
+ else
+ _jitc->function->vagp = 5;
+
+ /* Initialize fp offset in save area. */
+ if (jit_arg_f_reg_p(_jitc->function->self.argf))
+ _jitc->function->vafp = _jitc->function->self.argf;
+ else
+ _jitc->function->vafp = NUM_FLOAT_REG_ARGS;
+ }
+ jit_dec_synth();
+}
+
+void
+_jit_va_push(jit_state_t *_jit, int32_t u)
+{
+ jit_inc_synth_w(va_push, u);
+ jit_pushargr(u);
+ jit_dec_synth();
+}
+
+jit_node_t *
+_jit_arg(jit_state_t *_jit)
+{
+ jit_node_t *node;
+ int32_t offset;
+ assert(_jitc->function);
+ if (jit_arg_reg_p(_jitc->function->self.argi))
+ offset = _jitc->function->self.argi++;
+ else {
+ offset = _jitc->function->self.size;
+ _jitc->function->self.size += sizeof(jit_word_t);
+ }
+ node = jit_new_node_ww(jit_code_arg, offset,
+ ++_jitc->function->self.argn);
+ jit_link_prolog();
+ return (node);
+}
+
+jit_node_t *
+_jit_arg_f(jit_state_t *_jit)
+{
+ jit_node_t *node;
+ int32_t offset;
+ assert(_jitc->function);
+ if (jit_arg_f_reg_p(_jitc->function->self.argf))
+ offset = _jitc->function->self.argf++;
+ else {
+ offset = _jitc->function->self.size;
+ _jitc->function->self.size += sizeof(jit_word_t);
+ }
+ node = jit_new_node_ww(jit_code_arg_f, offset,
+ ++_jitc->function->self.argn);
+ jit_link_prolog();
+ return (node);
+}
+
+jit_node_t *
+_jit_arg_d(jit_state_t *_jit)
+{
+ jit_node_t *node;
+ int32_t offset;
+ assert(_jitc->function);
+ if (jit_arg_f_reg_p(_jitc->function->self.argf))
+ offset = _jitc->function->self.argf++;
+ else {
+ offset = _jitc->function->self.size;
+ _jitc->function->self.size += sizeof(jit_float64_t);
+ }
+ node = jit_new_node_ww(jit_code_arg_d, offset,
+ ++_jitc->function->self.argn);
+ jit_link_prolog();
+ return (node);
+}
+
+void
+_jit_getarg_c(jit_state_t *_jit, int32_t u, jit_node_t *v)
+{
+ assert(v->code == jit_code_arg);
+ jit_inc_synth_wp(getarg_c, u, v);
+ if (jit_arg_reg_p(v->u.w))
+ jit_extr_c(u, _R2 - v->u.w);
+ else
+ jit_ldxi_c(u, JIT_FP,
+ v->u.w + (__WORDSIZE >> 3) - sizeof(int8_t));
+ jit_dec_synth();
+}
+
+void
+_jit_getarg_uc(jit_state_t *_jit, int32_t u, jit_node_t *v)
+{
+ assert(v->code == jit_code_arg);
+ jit_inc_synth_wp(getarg_uc, u, v);
+ if (jit_arg_reg_p(v->u.w))
+ jit_extr_uc(u, _R2 - v->u.w);
+ else
+ jit_ldxi_uc(u, JIT_FP,
+ v->u.w + (__WORDSIZE >> 3) - sizeof(uint8_t));
+ jit_dec_synth();
+}
+
+void
+_jit_getarg_s(jit_state_t *_jit, int32_t u, jit_node_t *v)
+{
+ assert(v->code == jit_code_arg);
+ jit_inc_synth_wp(getarg_s, u, v);
+ if (jit_arg_reg_p(v->u.w))
+ jit_extr_s(u, _R2 - v->u.w);
+ else
+ jit_ldxi_s(u, JIT_FP,
+ v->u.w + (__WORDSIZE >> 3) - sizeof(int16_t));
+ jit_dec_synth();
+}
+
+void
+_jit_getarg_us(jit_state_t *_jit, int32_t u, jit_node_t *v)
+{
+ assert(v->code == jit_code_arg);
+ jit_inc_synth_wp(getarg_us, u, v);
+ if (jit_arg_reg_p(v->u.w))
+ jit_extr_us(u, _R2 - v->u.w);
+ else
+ jit_ldxi_us(u, JIT_FP,
+ v->u.w + (__WORDSIZE >> 3) - sizeof(uint16_t));
+ jit_dec_synth();
+}
+
+void
+_jit_getarg_i(jit_state_t *_jit, int32_t u, jit_node_t *v)
+{
+ assert(v->code == jit_code_arg);
+ jit_inc_synth_wp(getarg_i, u, v);
+ if (jit_arg_reg_p(v->u.w)) {
+#if __WORDSIZE == 32
+ jit_movr(u, _R2 - v->u.w);
+#else
+ jit_extr_i(u, _R2 - v->u.w);
+#endif
+ }
+ else
+ jit_ldxi_i(u, JIT_FP,
+ v->u.w + (__WORDSIZE >> 3) - sizeof(int32_t));
+ jit_dec_synth();
+}
+
+#if __WORDSIZE == 64
+void
+_jit_getarg_ui(jit_state_t *_jit, int32_t u, jit_node_t *v)
+{
+ assert(v->code == jit_code_arg);
+ jit_inc_synth_wp(getarg_ui, u, v);
+ if (jit_arg_reg_p(v->u.w))
+ jit_extr_ui(u, _R2 - v->u.w);
+ else
+ jit_ldxi_ui(u, JIT_FP,
+ v->u.w + (__WORDSIZE >> 3) - sizeof(uint32_t));
+ jit_dec_synth();
+}
+
+void
+_jit_getarg_l(jit_state_t *_jit, int32_t u, jit_node_t *v)
+{
+ assert(v->code == jit_code_arg);
+ jit_inc_synth_wp(getarg_l, u, v);
+ if (jit_arg_reg_p(v->u.w))
+ jit_movr(u, _R2 - v->u.w);
+ else
+ jit_ldxi_l(u, JIT_FP, v->u.w);
+ jit_dec_synth();
+}
+#endif
+
+void
+_jit_putargr(jit_state_t *_jit, int32_t u, jit_node_t *v)
+{
+ assert(v->code == jit_code_arg);
+ jit_inc_synth_wp(putargr, u, v);
+ if (jit_arg_reg_p(v->u.w))
+ jit_movr(_R2 - v->u.w, u);
+ else
+ jit_stxi(v->u.w, JIT_FP, u);
+ jit_dec_synth();
+}
+
+void
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+{
+ int32_t regno;
+ assert(v->code == jit_code_arg);
+ jit_inc_synth_wp(putargi, u, v);
+ if (jit_arg_reg_p(v->u.w))
+ jit_movi(_R2 - v->u.w, u);
+ else {
+ regno = jit_get_reg(jit_class_gpr);
+ jit_movi(regno, u);
+ jit_stxi(v->u.w, JIT_FP, regno);
+ jit_unget_reg(regno);
+ }
+ jit_dec_synth();
+}
+
+void
+_jit_getarg_f(jit_state_t *_jit, int32_t u, jit_node_t *v)
+{
+ assert(v->code == jit_code_arg_f);
+ jit_inc_synth_wp(getarg_f, u, v);
+ if (jit_arg_f_reg_p(v->u.w))
+ jit_movr_f(u, _F0 - v->u.w);
+ else
+ jit_ldxi_f(u, JIT_FP,
+ v->u.w
+#if __WORDSIZE == 64
+ + (__WORDSIZE >> 3) - sizeof(jit_float32_t)
+#endif
+ );
+ jit_dec_synth();
+}
+
+void
+_jit_putargr_f(jit_state_t *_jit, int32_t u, jit_node_t *v)
+{
+ assert(v->code == jit_code_arg_f);
+ jit_inc_synth_wp(putargr_f, u, v);
+ if (jit_arg_f_reg_p(v->u.w))
+ jit_movr_f(_F0 - v->u.w, u);
+ else
+ jit_stxi_f(v->u.w
+#if __WORDSIZE == 64
+ + (__WORDSIZE >> 3) - sizeof(jit_float32_t)
+#endif
+ , JIT_FP, u);
+ jit_dec_synth();
+}
+
+void
+_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
+{
+ int32_t regno;
+ assert(v->code == jit_code_arg_f);
+ jit_inc_synth_fp(putargi_f, u, v);
+ if (jit_arg_f_reg_p(v->u.w))
+ jit_movi_f(_F0 - v->u.w, u);
+ else {
+ regno = jit_get_reg(jit_class_fpr);
+ jit_movi_f(regno, u);
+ jit_stxi_f(v->u.w
+#if __WORDSIZE == 64
+ + (__WORDSIZE >> 3) - sizeof(jit_float32_t)
+#endif
+ , JIT_FP, regno);
+ jit_unget_reg(regno);
+ }
+ jit_dec_synth();
+}
+
+void
+_jit_getarg_d(jit_state_t *_jit, int32_t u, jit_node_t *v)
+{
+ assert(v->code == jit_code_arg_d);
+ jit_inc_synth_wp(getarg_d, u, v);
+ if (jit_arg_f_reg_p(v->u.w))
+ jit_movr_d(u, _F0 - v->u.w);
+ else
+ jit_ldxi_d(u, JIT_FP, v->u.w);
+ jit_dec_synth();
+}
+
+void
+_jit_putargr_d(jit_state_t *_jit, int32_t u, jit_node_t *v)
+{
+ assert(v->code == jit_code_arg_d);
+ jit_inc_synth_wp(putargr_d, u, v);
+ if (jit_arg_f_reg_p(v->u.w))
+ jit_movr_d(_F0 - v->u.w, u);
+ else
+ jit_stxi_d(v->u.w, JIT_FP, u);
+ jit_dec_synth();
+}
+
+void
+_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
+{
+ int32_t regno;
+ assert(v->code == jit_code_arg_d);
+ jit_inc_synth_dp(putargi_d, u, v);
+ if (jit_arg_f_reg_p(v->u.w))
+ jit_movi_d(_F0 - v->u.w, u);
+ else {
+ regno = jit_get_reg(jit_class_fpr);
+ jit_movi_d(regno, u);
+ jit_stxi_d(v->u.w, JIT_FP, regno);
+ jit_unget_reg(regno);
+ }
+ jit_dec_synth();
+}
+
+void
+_jit_pushargr(jit_state_t *_jit, int32_t u)
+{
+ assert(_jitc->function);
+ jit_inc_synth_w(pushargr, u);
+ jit_link_prepare();
+ if (jit_arg_reg_p(_jitc->function->call.argi)) {
+ jit_movr(_R2 - _jitc->function->call.argi, u);
+ ++_jitc->function->call.argi;
+ }
+ else {
+ jit_stxi(_jitc->function->call.size + stack_framesize, JIT_SP, u);
+ _jitc->function->call.size += sizeof(jit_word_t);
+ }
+ jit_dec_synth();
+}
+
+void
+_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+{
+ int32_t regno;
+ assert(_jitc->function);
+ jit_inc_synth_w(pushargi, u);
+ jit_link_prepare();
+ if (jit_arg_reg_p(_jitc->function->call.argi)) {
+ jit_movi(_R2 - _jitc->function->call.argi, u);
+ ++_jitc->function->call.argi;
+ }
+ else {
+ regno = jit_get_reg(jit_class_gpr);
+ jit_movi(regno, u);
+ jit_stxi(_jitc->function->call.size + stack_framesize, JIT_SP, regno);
+ jit_unget_reg(regno);
+ _jitc->function->call.size += sizeof(jit_word_t);
+ }
+ jit_dec_synth();
+}
+
+void
+_jit_pushargr_f(jit_state_t *_jit, int32_t u)
+{
+ assert(_jitc->function);
+ jit_inc_synth_w(pushargr_f, u);
+ jit_link_prepare();
+ if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
+ jit_movr_f(_F0 - _jitc->function->call.argf, u);
+ ++_jitc->function->call.argf;
+ }
+ else {
+ jit_stxi_f(_jitc->function->call.size + stack_framesize
+#if __WORDSIZE == 64
+ + (__WORDSIZE >> 3) - sizeof(jit_float32_t)
+#endif
+ , JIT_SP, u);
+ _jitc->function->call.size += sizeof(jit_word_t);
+ }
+ jit_dec_synth();
+}
+
+void
+_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
+{
+ int32_t regno;
+ assert(_jitc->function);
+ jit_inc_synth_f(pushargi_f, u);
+ jit_link_prepare();
+ if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
+ jit_movi_f(_F0 - _jitc->function->call.argf, u);
+ ++_jitc->function->call.argf;
+ }
+ else {
+ regno = jit_get_reg(jit_class_fpr);
+ jit_movi_f(regno, u);
+ jit_stxi_f(_jitc->function->call.size + stack_framesize
+#if __WORDSIZE == 64
+ + (__WORDSIZE >> 3) - sizeof(jit_float32_t)
+#endif
+ , JIT_SP, regno);
+ jit_unget_reg(regno);
+ _jitc->function->call.size += sizeof(jit_word_t);
+ }
+ jit_dec_synth();
+}
+
+void
+_jit_pushargr_d(jit_state_t *_jit, int32_t u)
+{
+ assert(_jitc->function);
+ jit_inc_synth_w(pushargr_d, u);
+ jit_link_prepare();
+ if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
+ jit_movr_d(_F0 - _jitc->function->call.argf, u);
+ ++_jitc->function->call.argf;
+ }
+ else {
+ jit_stxi_d(_jitc->function->call.size + stack_framesize, JIT_SP, u);
+ _jitc->function->call.size += sizeof(jit_float64_t);
+ }
+ jit_dec_synth();
+}
+
+void
+_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
+{
+ int32_t regno;
+ assert(_jitc->function);
+ jit_inc_synth_d(pushargi_d, u);
+ jit_link_prepare();
+ if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
+ jit_movi_d(_F0 - _jitc->function->call.argf, u);
+ ++_jitc->function->call.argf;
+ }
+ else {
+ regno = jit_get_reg(jit_class_fpr);
+ jit_movi_d(regno, u);
+ jit_stxi_d(_jitc->function->call.size + stack_framesize, JIT_SP, regno);
+ jit_unget_reg(regno);
+ _jitc->function->call.size += sizeof(jit_float64_t);
+ }
+ jit_dec_synth();
+}
+
+jit_bool_t
+_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, int32_t regno)
+{
+ int32_t spec;
+ spec = jit_class(_rvs[regno].spec);
+ if (spec & jit_class_arg) {
+ regno = _R2 - regno;
+ if (regno >= 0 && regno < node->v.w)
+ return (1);
+ if (spec & jit_class_fpr) {
+ regno = _F0 - regno;
+ if (regno >= 0 && regno < node->w.w)
+ return (1);
+ }
+ }
+ return (0);
+}
+
+void
+_jit_finishr(jit_state_t *_jit, int32_t r0)
+{
+ jit_node_t *call;
+ assert(_jitc->function);
+ jit_inc_synth_w(finishr, r0);
+ if (_jitc->function->self.alen < _jitc->function->call.size)
+ _jitc->function->self.alen = _jitc->function->call.size;
+ call = jit_callr(r0);
+ call->v.w = _jitc->function->call.argi;
+ call->w.w = _jitc->function->call.argf;
+ _jitc->function->call.argi = _jitc->function->call.argf =
+ _jitc->function->call.size = 0;
+ _jitc->prepare = 0;
+ jit_dec_synth();
+}
+
+jit_node_t *
+_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
+{
+ jit_node_t *node;
+ assert(_jitc->function);
+ jit_inc_synth_w(finishi, (jit_word_t)i0);
+ if (_jitc->function->self.alen < _jitc->function->call.size)
+ _jitc->function->self.alen = _jitc->function->call.size;
+ node = jit_calli(i0);
+ node->v.w = _jitc->function->call.argi;
+ node->w.w = _jitc->function->call.argf;
+ _jitc->function->call.argi = _jitc->function->call.argf =
+ _jitc->function->call.size = 0;
+ _jitc->prepare = 0;
+ jit_dec_synth();
+ return (node);
+}
+
+void
+_jit_retval_c(jit_state_t *_jit, int32_t r0)
+{
+ jit_inc_synth_w(retval_c, r0);
+ jit_extr_c(r0, JIT_RET);
+ jit_dec_synth();
+}
+
+void
+_jit_retval_uc(jit_state_t *_jit, int32_t r0)
+{
+ jit_inc_synth_w(retval_uc, r0);
+ jit_extr_uc(r0, JIT_RET);
+ jit_dec_synth();
+}
+
+void
+_jit_retval_s(jit_state_t *_jit, int32_t r0)
+{
+ jit_inc_synth_w(retval_s, r0);
+ jit_extr_s(r0, JIT_RET);
+ jit_dec_synth();
+}
+
+void
+_jit_retval_us(jit_state_t *_jit, int32_t r0)
+{
+ jit_inc_synth_w(retval_us, r0);
+ jit_extr_us(r0, JIT_RET);
+ jit_dec_synth();
+}
+
+void
+_jit_retval_i(jit_state_t *_jit, int32_t r0)
+{
+ jit_inc_synth_w(retval_i, r0);
+#if __WORDSIZE == 64
+ jit_extr_i(r0, JIT_RET);
+#else
+ jit_movr(r0, JIT_RET);
+#endif
+ jit_dec_synth();
+}
+
+#if __WORDSIZE == 64
+void
+_jit_retval_ui(jit_state_t *_jit, int32_t r0)
+{
+ jit_inc_synth_w(retval_ui, r0);
+ jit_extr_ui(r0, JIT_RET);
+ jit_dec_synth();
+}
+
+void
+_jit_retval_l(jit_state_t *_jit, int32_t r0)
+{
+ jit_inc_synth_w(retval_l, r0);
+ jit_movr(r0, JIT_RET);
+ jit_dec_synth();
+}
+#endif
+
+void
+_jit_retval_f(jit_state_t *_jit, int32_t r0)
+{
+ jit_inc_synth_w(retval_f, r0);
+ jit_movr_f(r0, JIT_FRET);
+ jit_dec_synth();
+}
+
+void
+_jit_retval_d(jit_state_t *_jit, int32_t r0)
+{
+ jit_inc_synth_w(retval_d, r0);
+ jit_movr_d(r0, JIT_FRET);
+ jit_dec_synth();
+}
+
+jit_pointer_t
+_emit_code(jit_state_t *_jit)
+{
+ jit_node_t *node;
+ jit_node_t *temp;
+ jit_word_t word;
+ int32_t value;
+ int32_t offset;
+ struct {
+ jit_node_t *node;
+ jit_word_t word;
+#if DEVEL_DISASSEMBLER
+ jit_word_t prevw;
+#endif
+ int32_t patch_offset;
+ } undo;
+#if DEVEL_DISASSEMBLER
+ jit_word_t prevw;
+#endif
+
+ _jitc->function = NULL;
+
+ jit_reglive_setup();
+
+ undo.word = 0;
+ undo.node = NULL;
+ undo.patch_offset = 0;
+
+#define assert_data(node) /**/
+#define case_rr(name, type) \
+ case jit_code_##name##r##type: \
+ name##r##type(rn(node->u.w), rn(node->v.w)); \
+ break
+#define case_rw(name, type) \
+ case jit_code_##name##i##type: \
+ name##i##type(rn(node->u.w), node->v.w); \
+ break
+#define case_wr(name, type) \
+ case jit_code_##name##i##type: \
+ name##i##type(node->u.w, rn(node->v.w)); \
+ break
+#define case_rrr(name, type) \
+ case jit_code_##name##r##type: \
+ name##r##type(rn(node->u.w), \
+ rn(node->v.w), rn(node->w.w)); \
+ break
+#define case_rrrr(name, type) \
+ case jit_code_##name##r##type: \
+ name##r##type(rn(node->u.q.l), rn(node->u.q.h), \
+ rn(node->v.w), rn(node->w.w)); \
+ break
+#define case_rrw(name, type) \
+ case jit_code_##name##i##type: \
+ name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+ break
+#define case_rrrw(name, type) \
+ case jit_code_##name##i##type: \
+ name##i##type(rn(node->u.q.l), rn(node->u.q.h), \
+ rn(node->v.w), node->w.w); \
+ break
+#define case_rrf(name) \
+ case jit_code_##name##i_f: \
+ assert_data(node); \
+ name##i_f(rn(node->u.w), rn(node->v.w), \
+ (jit_float32_t *)node->w.n->u.w); \
+ break
+#define case_rrd(name) \
+ case jit_code_##name##i_d: \
+ assert_data(node); \
+ name##i_d(rn(node->u.w), rn(node->v.w), \
+ (jit_float64_t *)node->w.n->u.w); \
+ break
+#define case_wrr(name, type) \
+ case jit_code_##name##i##type: \
+ name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+ break
+#define case_brr(name, type) \
+ case jit_code_##name##r##type: \
+ temp = node->u.n; \
+ assert(temp->code == jit_code_label || \
+ temp->code == jit_code_epilog); \
+ if (temp->flag & jit_flag_patch) \
+ name##r##type(temp->u.w, rn(node->v.w), \
+ rn(node->w.w)); \
+ else { \
+ word = name##r##type##_p(_jit->pc.w, \
+ rn(node->v.w), \
+ rn(node->w.w)); \
+ patch(word, node); \
+ } \
+ break
+#define case_brw(name, type) \
+ case jit_code_##name##i##type: \
+ temp = node->u.n; \
+ assert(temp->code == jit_code_label || \
+ temp->code == jit_code_epilog); \
+ if (temp->flag & jit_flag_patch) \
+ name##i##type(temp->u.w, \
+ rn(node->v.w), node->w.w); \
+ else { \
+ word = name##i##type##_p(_jit->pc.w, \
+ rn(node->v.w), node->w.w); \
+ patch(word, node); \
+ } \
+ break;
+#define case_brf(name) \
+ case jit_code_##name##i_f: \
+ temp = node->u.n; \
+ assert(temp->code == jit_code_label || \
+ temp->code == jit_code_epilog); \
+ if (temp->flag & jit_flag_patch) \
+ name##i_f(temp->u.w, rn(node->v.w), \
+ (jit_float32_t *)node->w.n->u.w); \
+ else { \
+ word = name##i_f_p(_jit->pc.w, rn(node->v.w), \
+ (jit_float32_t *)node->w.n->u.w);\
+ patch(word, node); \
+ } \
+ break
+#define case_brd(name) \
+ case jit_code_##name##i_d: \
+ temp = node->u.n; \
+ assert(temp->code == jit_code_label || \
+ temp->code == jit_code_epilog); \
+ if (temp->flag & jit_flag_patch) \
+ name##i_d(temp->u.w, rn(node->v.w), \
+ (jit_float64_t *)node->w.n->u.w); \
+ else { \
+ word = name##i_d_p(_jit->pc.w, rn(node->v.w), \
+ (jit_float64_t *)node->w.n->u.w);\
+ patch(word, node); \
+ } \
+ break
+#if DEVEL_DISASSEMBLER
+ prevw = _jit->pc.w;
+#endif
+ for (node = _jitc->head; node; node = node->next) {
+ if (_jit->pc.uc >= _jitc->code.end)
+ return (NULL);
+
+#if DEVEL_DISASSEMBLER
+ node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
+ prevw = _jit->pc.w;
+#endif
+ value = jit_classify(node->code);
+ jit_regarg_set(node, value);
+ switch (node->code) {
+ case jit_code_align:
+ assert(!(node->u.w & (node->u.w - 1)) &&
+ node->u.w <= sizeof(jit_word_t));
+ if (node->u.w == sizeof(jit_word_t) &&
+ (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
+ nop(sizeof(jit_word_t) - word);
+ break;
+ case jit_code_note: case jit_code_name:
+ node->u.w = _jit->pc.w;
+ break;
+ case jit_code_label:
+ if ((node->link || (node->flag & jit_flag_use)) &&
+ (word = _jit->pc.w & 3))
+ nop(4 - word);
+ /* remember label is defined */
+ node->flag |= jit_flag_patch;
+ node->u.w = _jit->pc.w;
+ break;
+ case_rrr(add,);
+ case_rrw(add,);
+ case_rrr(addc,);
+ case_rrw(addc,);
+ case_rrr(addx,);
+ case_rrw(addx,);
+ case_rrr(sub,);
+ case_rrw(sub,);
+ case_rrr(subc,);
+ case_rrw(subc,);
+ case_rrr(subx,);
+ case_rrw(subx,);
+ case_rrw(rsb,);
+ case_rrr(mul,);
+ case_rrw(mul,);
+ case_rrrr(qmul,);
+ case_rrrw(qmul,);
+ case_rrrr(qmul, _u);
+ case_rrrw(qmul, _u);
+ case_rrr(div,);
+ case_rrw(div,);
+ case_rrr(div, _u);
+ case_rrw(div, _u);
+ case_rrr(rem,);
+ case_rrw(rem,);
+ case_rrr(rem, _u);
+ case_rrw(rem, _u);
+ case_rrrr(qdiv,);
+ case_rrrw(qdiv,);
+ case_rrrr(qdiv, _u);
+ case_rrrw(qdiv, _u);
+ case_rrr(lsh,);
+ case_rrw(lsh,);
+ case_rrr(rsh,);
+ case_rrw(rsh,);
+ case_rrr(rsh, _u);
+ case_rrw(rsh, _u);
+ case_rr(neg,);
+ case_rr(com,);
+ case_rrr(and,);
+ case_rrw(and,);
+ case_rrr(or,);
+ case_rrw(or,);
+ case_rrr(xor,);
+ case_rrw(xor,);
+ case_rr(trunc, _f_i);
+ case_rr(trunc, _d_i);
+#if __WORDSIZE == 64
+ case_rr(trunc, _f_l);
+ case_rr(trunc, _d_l);
+#endif
+ case_rr(ld, _c);
+ case_rw(ld, _c);
+ case_rr(ld, _uc);
+ case_rw(ld, _uc);
+ case_rr(ld, _s);
+ case_rw(ld, _s);
+ case_rr(ld, _us);
+ case_rw(ld, _us);
+ case_rr(ld, _i);
+ case_rw(ld, _i);
+#if __WORDSIZE == 64
+ case_rr(ld, _ui);
+ case_rw(ld, _ui);
+ case_rr(ld, _l);
+ case_rw(ld, _l);
+#endif
+ case_rrr(ldx, _c);
+ case_rrw(ldx, _c);
+ case_rrr(ldx, _uc);
+ case_rrw(ldx, _uc);
+ case_rrr(ldx, _s);
+ case_rrw(ldx, _s);
+ case_rrr(ldx, _us);
+ case_rrw(ldx, _us);
+ case_rrr(ldx, _i);
+ case_rrw(ldx, _i);
+#if __WORDSIZE == 64
+ case_rrr(ldx, _ui);
+ case_rrw(ldx, _ui);
+ case_rrr(ldx, _l);
+ case_rrw(ldx, _l);
+#endif
+ case_rr(st, _c);
+ case_wr(st, _c);
+ case_rr(st, _s);
+ case_wr(st, _s);
+ case_rr(st, _i);
+ case_wr(st, _i);
+#if __WORDSIZE == 64
+ case_rr(st, _l);
+ case_wr(st, _l);
+#endif
+ case_rrr(stx, _c);
+ case_wrr(stx, _c);
+ case_rrr(stx, _s);
+ case_wrr(stx, _s);
+ case_rrr(stx, _i);
+ case_wrr(stx, _i);
+#if __WORDSIZE == 64
+ case_rrr(stx, _l);
+ case_wrr(stx, _l);
+#endif
+ case_rr(hton, _us);
+ case_rr(hton, _ui);
+#if __WORDSIZE == 64
+ case_rr(hton, _ul);
+#endif
+ case_rr(ext, _c);
+ case_rr(ext, _uc);
+ case_rr(ext, _s);
+ case_rr(ext, _us);
+#if __WORDSIZE == 64
+ case_rr(ext, _i);
+ case_rr(ext, _ui);
+#endif
+ case_rr(mov,);
+ case jit_code_movi:
+ if (node->flag & jit_flag_node) {
+ temp = node->v.n;
+ if (temp->code == jit_code_data ||
+ (temp->code == jit_code_label &&
+ (temp->flag & jit_flag_patch)))
+ movi(rn(node->u.w), temp->u.w);
+ else {
+ assert(temp->code == jit_code_label ||
+ temp->code == jit_code_epilog);
+ word = movi_p(rn(node->u.w), temp->u.w);
+ patch(word, node);
+ }
+ }
+ else
+ movi(rn(node->u.w), node->v.w);
+ break;
+ case_rrr(lt,);
+ case_rrw(lt,);
+ case_rrr(lt, _u);
+ case_rrw(lt, _u);
+ case_rrr(le,);
+ case_rrw(le,);
+ case_rrr(le, _u);
+ case_rrw(le, _u);
+ case_rrr(eq,);
+ case_rrw(eq,);
+ case_rrr(ge,);
+ case_rrw(ge,);
+ case_rrr(ge, _u);
+ case_rrw(ge, _u);
+ case_rrr(gt,);
+ case_rrw(gt,);
+ case_rrr(gt, _u);
+ case_rrw(gt, _u);
+ case_rrr(ne,);
+ case_rrw(ne,);
+ case_brr(blt,);
+ case_brw(blt,);
+ case_brr(blt, _u);
+ case_brw(blt, _u);
+ case_brr(ble,);
+ case_brw(ble,);
+ case_brr(ble, _u);
+ case_brw(ble, _u);
+ case_brr(beq,);
+ case_brw(beq,);
+ case_brr(bge,);
+ case_brw(bge,);
+ case_brr(bge, _u);
+ case_brw(bge, _u);
+ case_brr(bgt,);
+ case_brw(bgt,);
+ case_brr(bgt, _u);
+ case_brw(bgt, _u);
+ case_brr(bne,);
+ case_brw(bne,);
+ case_brr(boadd,);
+ case_brw(boadd,);
+ case_brr(boadd, _u);
+ case_brw(boadd, _u);
+ case_brr(bxadd,);
+ case_brw(bxadd,);
+ case_brr(bxadd, _u);
+ case_brw(bxadd, _u);
+ case_brr(bosub,);
+ case_brw(bosub,);
+ case_brr(bosub, _u);
+ case_brw(bosub, _u);
+ case_brr(bxsub,);
+ case_brw(bxsub,);
+ case_brr(bxsub, _u);
+ case_brw(bxsub, _u);
+ case_brr(bms,);
+ case_brw(bms,);
+ case_brr(bmc,);
+ case_brw(bmc,);
+ case_rrr(add, _f);
+ case_rrf(add);
+ case_rrr(sub, _f);
+ case_rrf(sub);
+ case_rrf(rsb);
+ case_rrr(mul, _f);
+ case_rrf(mul);
+ case_rrr(div, _f);
+ case_rrf(div);
+ case_rr(abs, _f);
+ case_rr(neg, _f);
+ case_rr(sqrt, _f);
+ case_rr(ext, _f);
+ case_rr(ld, _f);
+ case_rw(ld, _f);
+ case_rrr(ldx, _f);
+ case_rrw(ldx, _f);
+ case_rr(st, _f);
+ case_wr(st, _f);
+ case_rrr(stx, _f);
+ case_wrr(stx, _f);
+ case_rr(mov, _f);
+ case jit_code_movi_f:
+ assert_data(node);
+ movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
+ break;
+ case_rr(ext, _d_f);
+ case_rrr(lt, _f);
+ case_rrf(lt);
+ case_rrr(le, _f);
+ case_rrf(le);
+ case_rrr(eq, _f);
+ case_rrf(eq);
+ case_rrr(ge, _f);
+ case_rrf(ge);
+ case_rrr(gt, _f);
+ case_rrf(gt);
+ case_rrr(ne, _f);
+ case_rrf(ne);
+ case_rrr(unlt, _f);
+ case_rrf(unlt);
+ case_rrr(unle, _f);
+ case_rrf(unle);
+ case_rrr(uneq, _f);
+ case_rrf(uneq);
+ case_rrr(unge, _f);
+ case_rrf(unge);
+ case_rrr(ungt, _f);
+ case_rrf(ungt);
+ case_rrr(ltgt, _f);
+ case_rrf(ltgt);
+ case_rrr(ord, _f);
+ case_rrf(ord);
+ case_rrr(unord, _f);
+ case_rrf(unord);
+ case_brr(blt, _f);
+ case_brf(blt);
+ case_brr(ble, _f);
+ case_brf(ble);
+ case_brr(beq, _f);
+ case_brf(beq);
+ case_brr(bge, _f);
+ case_brf(bge);
+ case_brr(bgt, _f);
+ case_brf(bgt);
+ case_brr(bne, _f);
+ case_brf(bne);
+ case_brr(bunlt, _f);
+ case_brf(bunlt);
+ case_brr(bunle, _f);
+ case_brf(bunle);
+ case_brr(buneq, _f);
+ case_brf(buneq);
+ case_brr(bunge, _f);
+ case_brf(bunge);
+ case_brr(bungt, _f);
+ case_brf(bungt);
+ case_brr(bltgt, _f);
+ case_brf(bltgt);
+ case_brr(bord, _f);
+ case_brf(bord);
+ case_brr(bunord, _f);
+ case_brf(bunord);
+ case_rrr(add, _d);
+ case_rrd(add);
+ case_rrr(sub, _d);
+ case_rrd(sub);
+ case_rrd(rsb);
+ case_rrr(mul, _d);
+ case_rrd(mul);
+ case_rrr(div, _d);
+ case_rrd(div);
+ case_rr(abs, _d);
+ case_rr(neg, _d);
+ case_rr(sqrt, _d);
+ case_rr(ext, _d);
+ case_rr(ld, _d);
+ case_rw(ld, _d);
+ case_rrr(ldx, _d);
+ case_rrw(ldx, _d);
+ case_rr(st, _d);
+ case_wr(st, _d);
+ case_rrr(stx, _d);
+ case_wrr(stx, _d);
+ case_rr(mov, _d);
+ case jit_code_movi_d:
+ assert_data(node);
+ movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
+ break;
+ case_rr(ext, _f_d);
+ case_rrr(lt, _d);
+ case_rrd(lt);
+ case_rrr(le, _d);
+ case_rrd(le);
+ case_rrr(eq, _d);
+ case_rrd(eq);
+ case_rrr(ge, _d);
+ case_rrd(ge);
+ case_rrr(gt, _d);
+ case_rrd(gt);
+ case_rrr(ne, _d);
+ case_rrd(ne);
+ case_rrr(unlt, _d);
+ case_rrd(unlt);
+ case_rrr(unle, _d);
+ case_rrd(unle);
+ case_rrr(uneq, _d);
+ case_rrd(uneq);
+ case_rrr(unge, _d);
+ case_rrd(unge);
+ case_rrr(ungt, _d);
+ case_rrd(ungt);
+ case_rrr(ltgt, _d);
+ case_rrd(ltgt);
+ case_rrr(ord, _d);
+ case_rrd(ord);
+ case_rrr(unord, _d);
+ case_rrd(unord);
+ case_brr(blt, _d);
+ case_brd(blt);
+ case_brr(ble, _d);
+ case_brd(ble);
+ case_brr(beq, _d);
+ case_brd(beq);
+ case_brr(bge, _d);
+ case_brd(bge);
+ case_brr(bgt, _d);
+ case_brd(bgt);
+ case_brr(bne, _d);
+ case_brd(bne);
+ case_brr(bunlt, _d);
+ case_brd(bunlt);
+ case_brr(bunle, _d);
+ case_brd(bunle);
+ case_brr(buneq, _d);
+ case_brd(buneq);
+ case_brr(bunge, _d);
+ case_brd(bunge);
+ case_brr(bungt, _d);
+ case_brd(bungt);
+ case_brr(bltgt, _d);
+ case_brd(bltgt);
+ case_brr(bord, _d);
+ case_brd(bord);
+ case_brr(bunord, _d);
+ case_brd(bunord);
+ case jit_code_jmpr:
+ jmpr(rn(node->u.w));
+ break;
+ case jit_code_jmpi:
+ if (node->flag & jit_flag_node) {
+ temp = node->u.n;
+ assert(temp->code == jit_code_label ||
+ temp->code == jit_code_epilog);
+ if (temp->flag & jit_flag_patch)
+ jmpi(temp->u.w);
+ else {
+ word = jmpi_p(_jit->pc.w);
+ patch(word, node);
+ }
+ }
+ else
+ jmpi(node->u.w);
+ break;
+ case jit_code_callr:
+ callr(rn(node->u.w));
+ break;
+ case jit_code_calli:
+ if (node->flag & jit_flag_node) {
+ temp = node->u.n;
+ assert(temp->code == jit_code_label ||
+ temp->code == jit_code_epilog);
+ if (temp->flag & jit_flag_patch)
+ calli(temp->u.w);
+ else {
+ word = calli_p(_jit->pc.w);
+ patch(word, node);
+ }
+ }
+ else
+ calli(node->u.w);
+ break;
+ case jit_code_prolog:
+ _jitc->function = _jitc->functions.ptr + node->w.w;
+ undo.node = node;
+ undo.word = _jit->pc.w;
+#if DEVEL_DISASSEMBLER
+ undo.prevw = prevw;
+#endif
+ undo.patch_offset = _jitc->patches.offset;
+ restart_function:
+ _jitc->again = 0;
+ prolog(node);
+ break;
+ case jit_code_epilog:
+ assert(_jitc->function == _jitc->functions.ptr + node->w.w);
+ if (_jitc->again) {
+ for (temp = undo.node->next;
+ temp != node; temp = temp->next) {
+ if (temp->code == jit_code_label ||
+ temp->code == jit_code_epilog)
+ temp->flag &= ~jit_flag_patch;
+ }
+ temp->flag &= ~jit_flag_patch;
+ node = undo.node;
+ _jit->pc.w = undo.word;
+#if DEVEL_DISASSEMBLER
+ prevw = undo.prevw;
+#endif
+ _jitc->patches.offset = undo.patch_offset;
+ goto restart_function;
+ }
+ if (node->link && (word = _jit->pc.w & 3))
+ nop(4 - word);
+ /* remember label is defined */
+ node->flag |= jit_flag_patch;
+ node->u.w = _jit->pc.w;
+ epilog(node);
+ _jitc->function = NULL;
+ break;
+ case jit_code_va_start:
+ vastart(rn(node->u.w));
+ break;
+ case jit_code_va_arg:
+ vaarg(rn(node->u.w), rn(node->v.w));
+ break;
+ case jit_code_va_arg_d:
+ vaarg_d(rn(node->u.w), rn(node->v.w));
+ break;
+ case jit_code_live: case jit_code_ellipsis:
+ case jit_code_va_push:
+ case jit_code_allocai: case jit_code_allocar:
+ case jit_code_arg:
+ case jit_code_arg_f: case jit_code_arg_d:
+ case jit_code_va_end:
+ case jit_code_ret:
+ case jit_code_retr: case jit_code_reti:
+ case jit_code_retr_f: case jit_code_reti_f:
+ case jit_code_retr_d: case jit_code_reti_d:
+ case jit_code_getarg_c: case jit_code_getarg_uc:
+ case jit_code_getarg_s: case jit_code_getarg_us:
+ case jit_code_getarg_i:
+#if __WORDSIZE == 64
+ case jit_code_getarg_ui: case jit_code_getarg_l:
+#endif
+ case jit_code_getarg_f: case jit_code_getarg_d:
+ case jit_code_putargr: case jit_code_putargi:
+ case jit_code_putargr_f: case jit_code_putargi_f:
+ case jit_code_putargr_d: case jit_code_putargi_d:
+ case jit_code_pushargr: case jit_code_pushargi:
+ case jit_code_pushargr_f: case jit_code_pushargi_f:
+ case jit_code_pushargr_d: case jit_code_pushargi_d:
+ case jit_code_retval_c: case jit_code_retval_uc:
+ case jit_code_retval_s: case jit_code_retval_us:
+ case jit_code_retval_i:
+#if __WORDSIZE == 64
+ case jit_code_retval_ui: case jit_code_retval_l:
+#endif
+ case jit_code_retval_f: case jit_code_retval_d:
+ case jit_code_prepare:
+ case jit_code_finishr: case jit_code_finishi:
+ break;
+ default:
+ abort();
+ }
+ jit_regarg_clr(node, value);
+ assert(_jitc->regarg == 0 && _jitc->synth == 0);
+ /* update register live state */
+ jit_reglive(node);
+ }
+#undef case_brw
+#undef case_brr
+#undef case_wrr
+#undef case_rrw
+#undef case_rrr
+#undef case_wr
+#undef case_rw
+#undef case_rr
+
+ for (offset = 0; offset < _jitc->patches.offset; offset++) {
+ node = _jitc->patches.ptr[offset].node;
+ word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
+ patch_at(_jitc->patches.ptr[offset].inst, word);
+ }
+
+ jit_flush(_jit->code.ptr, _jit->pc.uc);
+
+ return (_jit->code.ptr);
+}
+
+#define CODE 1
+# include "s390-cpu.c"
+# include "s390-fpu.c"
+#undef CODE
+
+void
+jit_flush(void *fptr, void *tptr)
+{
+#if defined(__GNUC__)
+ jit_word_t f, t, s;
+
+ s = sysconf(_SC_PAGE_SIZE);
+ f = (jit_word_t)fptr & -s;
+ t = (((jit_word_t)tptr) + s - 1) & -s;
+ __clear_cache((void *)f, (void *)t);
+#endif
+}
+
+void
+_emit_ldxi(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_word_t i0)
+{
+ ldxi(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_gpr_t r1)
+{
+ stxi(i0, rn(r0), rn(r1));
+}
+
+void
+_emit_ldxi_d(jit_state_t *_jit, jit_fpr_t r0, jit_gpr_t r1, jit_word_t i0)
+{
+ ldxi_d(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_fpr_t r1)
+{
+ stxi_d(i0, rn(r0), rn(r1));
+}
+
+static int32_t
+_jit_get_reg_pair(jit_state_t *_jit)
+{
+ int32_t r1, r2;
+ /* Try to find a register pair for use with operations that
+ * require a odd based register pair. Search for the best
+ * match to avoid spills or at least a valid operation.
+ */
+
+ /* Try non callee save first */
+ if (jit_reg_free_p(_R0) && jit_reg_free_p(_R1))
+ r1 = _R0, r2 = _R1;
+ else if (jit_reg_free_p(_R2) && jit_reg_free_p(_R3))
+ r1 = _R2, r2 = _R3;
+ else if (jit_reg_free_p(_R4) && jit_reg_free_p(_R5))
+ r1 = _R4, r2 = _R5;
+ /* Try callee save registers */
+ else if (jit_reg_free_p(_R10) && jit_reg_free_p(_R11))
+ r1 = _R10, r2 = _R11;
+ else if (jit_reg_free_p(_R8) && jit_reg_free_p(_R9))
+ r1 = _R8, r2 = _R9;
+ else if (jit_reg_free_p(_R6) && jit_reg_free_p(_R7))
+ r1 = _R6, r2 = _R7;
+
+ /* We *must* find a register pair */
+ else if (jit_reg_free_if_spill_p(_R0) && jit_reg_free_if_spill_p(_R1))
+ r1 = _R0, r2 = _R1;
+ else if (jit_reg_free_if_spill_p(_R2) && jit_reg_free_if_spill_p(_R3))
+ r1 = _R2, r2 = _R3;
+ else if (jit_reg_free_if_spill_p(_R4) && jit_reg_free_if_spill_p(_R5))
+ r1 = _R4, r2 = _R5;
+ else if (jit_reg_free_if_spill_p(_R10) && jit_reg_free_if_spill_p(_R11))
+ r1 = _R10, r2 = _R11;
+ else if (jit_reg_free_if_spill_p(_R8) && jit_reg_free_if_spill_p(_R9))
+ r1 = _R8, r2 = _R9;
+ else if (jit_reg_free_if_spill_p(_R6) && jit_reg_free_if_spill_p(_R7))
+ r1 = _R6, r2 = _R7;
+ else
+ /* Do not jit_get_reg() all registers to avoid it */
+ abort();
+
+ (void)jit_get_reg(jit_class_gpr|jit_class_named|r1);
+ (void)jit_get_reg(jit_class_gpr|jit_class_named|r2);
+
+ return (r1);
+}
+
+static void
+_jit_unget_reg_pair(jit_state_t *_jit, int32_t reg)
+{
+ int32_t r1, r2;
+ r1 = reg;
+ switch (r1) {
+ case _R0: r2 = _R1; break;
+ case _R2: r2 = _R3; break;
+ case _R4: r2 = _R5; break;
+ case _R6: r2 = _R7; break;
+ case _R8: r2 = _R9; break;
+ case _R10: r2 = _R11; break;
+ default: abort();
+ }
+ jit_unget_reg(r1);
+ jit_unget_reg(r2);
+}
+
+static int32_t
+_jit_get_reg_but_zero(jit_state_t *_jit, int32_t flags)
+{
+ int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ if (reg == _R0) {
+ reg = jit_get_reg(jit_class_gpr|flags);
+ jit_unget_reg(_R0);
+ }
+ return (reg);
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+{
+ int32_t flag;
+
+ assert(node->flag & jit_flag_node);
+ if (node->code == jit_code_movi)
+ flag = node->v.n->flag;
+ else
+ flag = node->u.n->flag;
+ assert(!(flag & jit_flag_patch));
+ if (_jitc->patches.offset >= _jitc->patches.length) {
+ jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
+ _jitc->patches.length * sizeof(jit_patch_t),
+ (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
+ _jitc->patches.length += 1024;
+ }
+ _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
+ _jitc->patches.ptr[_jitc->patches.offset].node = node;
+ ++_jitc->patches.offset;
+}
diff --git a/deps/lightening/lightening/s390.h b/deps/lightening/lightening/s390.h
new file mode 100644
index 0000000..0e74b2e
--- /dev/null
+++ b/deps/lightening/lightening/s390.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2013-2017 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_s390_h
+#define _jit_s390_h
+
+#define JIT_HASH_CONSTS 1
+#define JIT_NUM_OPERANDS 2
+
+/*
+ * Types
+ */
+#define JIT_FP _R13
+typedef enum {
+#define jit_r(i) (_R12 + ((i) << 1))
+#define jit_r_num() 3
+#define jit_v(i) (_R11 + ((i) << 1))
+#define jit_v_num() 3
+#define jit_f(i) (_F8 + (i))
+#define jit_f_num() 6
+#define JIT_R0 _R12
+#define JIT_R1 _R10
+#define JIT_R2 _R8
+#define JIT_V0 _R11
+#define JIT_V1 _R9
+#define JIT_V2 _R7
+ _R0, _R1, /* Volatile */
+ _R12, /* Saved, GOT */
+ _R11, _R10, _R9, _R8, /* Saved */
+ _R7, /* Saved */
+ _R6, /* Saved, parameter */
+ _R5, _R4, _R3, /* Parameter passing */
+ _R2, /* Volatile, parameter and return value */
+ _R13, /* Saved, literal pool pointer */
+ _R14, /* Volatile, return address */
+ _R15, /* Saved, stack pointer */
+#define JIT_F0 _F8
+#define JIT_F1 _F9
+#define JIT_F2 _F10
+#define JIT_F3 _F11
+#define JIT_F4 _F12
+#define JIT_F5 _F13
+ _F1, _F3, _F5, _F7, /* Volatile */
+ _F14, _F15, _F8, _F9, /* Saved */
+ _F10, _F11, _F12, _F13, /* Saved */
+ _F6, _F4, _F2, /* Volatile, parameter */
+ _F0, /* Volatile, parameter and return value */
+ _NOREG,
+#define JIT_NOREG _NOREG
+} jit_reg_t;
+
+#endif /* _jit_s390_h */
diff --git a/deps/lightening/lightening/x86-cpu.c b/deps/lightening/lightening/x86-cpu.c
new file mode 100644
index 0000000..e9e5299
--- /dev/null
+++ b/deps/lightening/lightening/x86-cpu.c
@@ -0,0 +1,2789 @@
+/*
+ * Copyright (C) 2012-2019 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+/* avoid using it due to partial stalls */
+#define USE_INC_DEC 0
+
+#if __X32
+# define WIDE 0
+# define IF_WIDE(wide, narrow) narrow
+#else
+# define WIDE 1
+# define IF_WIDE(wide, narrow) wide
+#endif
+
+#define _RAX_REGNO 0
+#define _RCX_REGNO 1
+#define _RDX_REGNO 2
+#define _RBX_REGNO 3
+#define _RSP_REGNO 4
+#define _RBP_REGNO 5
+#define _RSI_REGNO 6
+#define _RDI_REGNO 7
+#define _R8_REGNO 8
+#define _R9_REGNO 9
+#define _R10_REGNO 10
+#define _R11_REGNO 11
+#define _R12_REGNO 12
+#define _R13_REGNO 13
+#define _R14_REGNO 14
+#define _R15_REGNO 15
+#define r7(reg) ((reg) & 7)
+#define r8(reg) ((reg) & 15)
+#if __X32
+# define reg8_p(rn) ((rn) >= _RAX_REGNO && (rn) <= _RBX_REGNO)
+#else
+# define reg8_p(rn) 1
+#endif
+
+#define can_sign_extend_int_p(im) \
+ IF_WIDE((((im) >= 0 && (long long)(im) <= 0x7fffffffLL) || \
+ ((im) < 0 && (long long)(im) > -0x80000000LL)), \
+ 1)
+#define can_zero_extend_int_p(im) \
+ IF_WIDE(((im) >= 0 && (im) < 0x80000000LL), \
+ 1)
+#define fits_uint32_p(im) \
+ IF_WIDE((((im) & 0xffffffff00000000LL) == 0), \
+ 1)
+
+#define _SCL1 0x00
+#define _SCL2 0x01
+#define _SCL4 0x02
+#define _SCL8 0x03
+
+#define X86_ADD 0
+#define X86_OR 1 << 3
+#define X86_ADC 2 << 3
+#define X86_SBB 3 << 3
+#define X86_AND 4 << 3
+#define X86_SUB 5 << 3
+#define X86_XOR 6 << 3
+#define X86_CMP 7 << 3
+#define X86_ROL 0
+#define X86_ROR 1
+#define X86_RCL 2
+#define X86_RCR 3
+#define X86_SHL 4
+#define X86_SHR 5
+#define X86_SAR 7
+#define X86_NOT 2
+#define X86_NEG 3
+#define X86_MUL 4
+#define X86_IMUL 5
+#define X86_DIV 6
+#define X86_IDIV 7
+
+#define FOR_EACH_CC(M) \
+ M(o, O, 0x0) \
+ M(no, NO, 0x1) \
+ M(nae, NAE, 0x2) \
+ M(b, B, 0x2) \
+ M(c, C, 0x2) \
+ M(ae, AE, 0x3) \
+ M(nb, NB, 0x3) \
+ M(nc, NC, 0x3) \
+ M(e, E, 0x4) \
+ M(z, Z, 0x4) \
+ M(ne, NE, 0x5) \
+ M(nz, NZ, 0x5) \
+ M(be, BE, 0x6) \
+ M(na, NA, 0x6) \
+ M(a, A, 0x7) \
+ M(nbe, NBE, 0x7) \
+ M(s, S, 0x8) \
+ M(ns, NS, 0x9) \
+ M(p, P, 0xa) \
+ M(pe, PE, 0xa) \
+ M(np, NP, 0xb) \
+ M(po, PO, 0xb) \
+ M(l, L, 0xc) \
+ M(nge, NGE, 0xc) \
+ M(ge, GE, 0xd) \
+ M(nl_, NL, 0xd) \
+ M(le, LE, 0xe) \
+ M(ng, NG, 0xe) \
+ M(g, G, 0xf) \
+ M(nle, NLE, 0xf) \
+ /* EOL */
+
+enum x86_cc
+{
+#define DEFINE_ENUM(cc, CC, code) X86_CC_##CC = code,
+ FOR_EACH_CC(DEFINE_ENUM)
+#undef DEFINE_ENUM
+};
+
+static inline void
+mrm(jit_state_t *_jit, uint8_t md, uint8_t r, uint8_t m)
+{
+ emit_u8(_jit, (md<<6) | (r<<3) | m);
+}
+
+static inline void
+sib(jit_state_t *_jit, uint8_t sc, uint8_t i, uint8_t b)
+{
+ emit_u8(_jit, (sc<<6) | (i<<3) | b);
+}
+
+static inline void
+ic(jit_state_t *_jit, uint8_t c)
+{
+ emit_u8(_jit, c);
+}
+
+static inline void
+is(jit_state_t *_jit, uint16_t s)
+{
+ emit_u16(_jit, s);
+}
+
+static inline void
+ii(jit_state_t *_jit, uint32_t i)
+{
+ emit_u32(_jit, i);
+}
+
+#if __X64
+static inline void
+il(jit_state_t *_jit, uint64_t l)
+{
+ emit_u64(_jit, l);
+}
+#endif
+
+static void
+rex(jit_state_t *_jit, int32_t l, int32_t w,
+ int32_t r, int32_t x, int32_t b)
+{
+#if __X64
+ int32_t v = 0x40 | (w << 3);
+
+ if (r != _NOREG)
+ v |= (r & 8) >> 1;
+ if (x != _NOREG)
+ v |= (x & 8) >> 2;
+ if (b != _NOREG)
+ v |= (b & 8) >> 3;
+ if (l || v != 0x40)
+ ic(_jit, v);
+#endif
+}
+
+static void
+rx(jit_state_t *_jit, int32_t rd, int32_t md,
+ int32_t rb, int32_t ri, int32_t ms)
+{
+ if (ri == _NOREG) {
+ if (rb == _NOREG) {
+#if __X32
+ mrm(_jit, 0x00, r7(rd), 0x05);
+#else
+ mrm(_jit, 0x00, r7(rd), 0x04);
+ sib(_jit, _SCL1, 0x04, 0x05);
+#endif
+ ii(_jit, md);
+ } else if (r7(rb) == _RSP_REGNO) {
+ if (md == 0) {
+ mrm(_jit, 0x00, r7(rd), 0x04);
+ sib(_jit, ms, 0x04, 0x04);
+ }
+ else if ((int8_t)md == md) {
+ mrm(_jit, 0x01, r7(rd), 0x04);
+ sib(_jit, ms, 0x04, 0x04);
+ ic(_jit, md);
+ } else {
+ mrm(_jit, 0x02, r7(rd), 0x04);
+ sib(_jit, ms, 0x04, 0x04);
+ ii(_jit, md);
+ }
+ } else {
+ if (md == 0 && r7(rb) != _RBP_REGNO)
+ mrm(_jit, 0x00, r7(rd), r7(rb));
+ else if ((int8_t)md == md) {
+ mrm(_jit, 0x01, r7(rd), r7(rb));
+ ic(_jit, md);
+ } else {
+ mrm(_jit, 0x02, r7(rd), r7(rb));
+ ii(_jit, md);
+ }
+ }
+ }
+ else if (rb == _NOREG) {
+ mrm(_jit, 0x00, r7(rd), 0x04);
+ sib(_jit, ms, r7(ri), 0x05);
+ ii(_jit, md);
+ }
+ else if (r8(ri) != _RSP_REGNO) {
+ if (md == 0 && r7(rb) != _RBP_REGNO) {
+ mrm(_jit, 0x00, r7(rd), 0x04);
+ sib(_jit, ms, r7(ri), r7(rb));
+ } else if ((int8_t)md == md) {
+ mrm(_jit, 0x01, r7(rd), 0x04);
+ sib(_jit, ms, r7(ri), r7(rb));
+ ic(_jit, md);
+ } else {
+ mrm(_jit, 0x02, r7(rd), 0x04);
+ sib(_jit, ms, r7(ri), r7(rb));
+ ic(_jit, md);
+ }
+ } else {
+ fprintf(stderr, "illegal index register");
+ abort();
+ }
+}
+
+static void
+pushr(jit_state_t *_jit, int32_t r0)
+{
+ _jit->frame_size += __WORDSIZE / 8;
+ rex(_jit, 0, WIDE, 0, 0, r0);
+ ic(_jit, 0x50 | r7(r0));
+}
+
+static void
+popr(jit_state_t *_jit, int32_t r0)
+{
+ _jit->frame_size -= __WORDSIZE / 8;
+ rex(_jit, 0, WIDE, 0, 0, r0);
+ ic(_jit, 0x58 | r7(r0));
+}
+
+static void
+nop(jit_state_t *_jit, int32_t count)
+{
+ switch (count) {
+ case 0:
+ break;
+ case 1: /* NOP */
+ ic(_jit, 0x90);
+ break;
+ case 2: /* 66 NOP */
+ ic(_jit, 0x66); ic(_jit, 0x90);
+ break;
+ case 3: /* NOP DWORD ptr [EAX] */
+ ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x00);
+ break;
+ case 4: /* NOP DWORD ptr [EAX + 00H] */
+ ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x40); ic(_jit, 0x00);
+ break;
+ case 5: /* NOP DWORD ptr [EAX + EAX*1 + 00H] */
+ ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x44); ic(_jit, 0x00);
+ ic(_jit, 0x00);
+ break;
+ case 6: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00H] */
+ ic(_jit, 0x66); ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x44);
+ ic(_jit, 0x00); ic(_jit, 0x00);
+ break;
+ case 7: /* NOP DWORD ptr [EAX + 00000000H] */
+ ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x80); ii(_jit, 0x0000);
+ break;
+ case 8: /* NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
+ ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x84); ic(_jit, 0x00);
+ ii(_jit, 0x0000);
+ break;
+ case 9: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
+ ic(_jit, 0x66); ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x84);
+ ic(_jit, 0x00); ii(_jit, 0x0000);
+ break;
+ default:
+ abort();
+ }
+}
+
+static void
+movr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (r0 != r1) {
+ rex(_jit, 0, 1, r1, _NOREG, r0);
+ ic(_jit, 0x89);
+ ic(_jit, 0xc0 | (r1 << 3) | r7(r0));
+ }
+}
+
+static void
+movcr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ rex(_jit, 0, WIDE, r0, _NOREG, r1);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xbe);
+ mrm(_jit, 0x03, r7(r0), r7(r1));
+}
+
+static void
+movcr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ rex(_jit, 0, WIDE, r0, _NOREG, r1);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xb6);
+ mrm(_jit, 0x03, r7(r0), r7(r1));
+}
+
+static void
+movsr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ rex(_jit, 0, WIDE, r0, _NOREG, r1);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xbf);
+ mrm(_jit, 0x03, r7(r0), r7(r1));
+}
+
+static void
+movsr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ rex(_jit, 0, WIDE, r0, _NOREG, r1);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xb7);
+ mrm(_jit, 0x03, r7(r0), r7(r1));
+}
+
+#if __X64
+static void
+movir(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ rex(_jit, 0, 1, r0, _NOREG, r1);
+ ic(_jit, 0x63);
+ mrm(_jit, 0x03, r7(r0), r7(r1));
+}
+
+static void
+movir_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ rex(_jit, 0, 0, r1, _NOREG, r0);
+ ic(_jit, 0x89);
+ ic(_jit, 0xc0 | (r1 << 3) | r7(r0));
+}
+#endif
+
+static jit_reloc_t
+mov_addr(jit_state_t *_jit, int32_t r0)
+{
+ uint8_t *pc_start = _jit->pc.uc;
+ rex(_jit, 0, WIDE, _NOREG, _NOREG, r0);
+ ic(_jit, 0xb8 | r7(r0));
+ ptrdiff_t inst_start = _jit->pc.uc - pc_start;
+ return emit_abs_reloc(_jit, inst_start);
+}
+
+static void
+imovi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+#if __X64
+ if (fits_uint32_p(i0)) {
+ rex(_jit, 0, 0, _NOREG, _NOREG, r0);
+ ic(_jit, 0xb8 | r7(r0));
+ ii(_jit, i0);
+ } else {
+ rex(_jit, 0, 1, _NOREG, _NOREG, r0);
+ ic(_jit, 0xb8 | r7(r0));
+ il(_jit, i0);
+ }
+#else
+ ic(_jit, 0xb8 | r7(r0));
+ ii(_jit, i0);
+#endif
+}
+
+static void
+alur(jit_state_t *_jit, int32_t code, int32_t r0, int32_t r1)
+{
+ rex(_jit, 0, WIDE, r1, _NOREG, r0);
+ ic(_jit, code | 0x01);
+ mrm(_jit, 0x03, r7(r1), r7(r0));
+}
+
+static inline void
+icmpr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return alur(_jit, X86_CMP, r0, r1);
+}
+static inline void
+iaddr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return alur(_jit, X86_ADD, r0, r1);
+}
+static inline void
+iaddxr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return alur(_jit, X86_ADC, r0, r1);
+}
+static inline void
+isubr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return alur(_jit, X86_SUB, r0, r1);
+}
+static inline void
+isubxr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return alur(_jit, X86_SBB, r0, r1);
+}
+static inline void
+iandr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return alur(_jit, X86_AND, r0, r1);
+}
+static inline void
+iorr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return alur(_jit, X86_OR, r0, r1);
+}
+static inline void
+ixorr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return alur(_jit, X86_XOR, r0, r1);
+}
+
+static void
+movi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (i0)
+ imovi(_jit, r0, i0);
+ else
+ ixorr(_jit, r0, r0);
+}
+
+static void
+alui(jit_state_t *_jit, int32_t code, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ rex(_jit, 0, WIDE, _NOREG, _NOREG, r0);
+ if ((int8_t)i0 == i0) {
+ ic(_jit, 0x83);
+ ic(_jit, 0xc0 | code | r7(r0));
+ ic(_jit, i0);
+ } else {
+ if (r0 == _RAX_REGNO) {
+ ic(_jit, code | 0x05);
+ } else {
+ ic(_jit, 0x81);
+ ic(_jit, 0xc0 | code | r7(r0));
+ }
+ ii(_jit, i0);
+ }
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ alur(_jit, code, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static inline void
+icmpi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ return alui(_jit, X86_CMP, r0, i0);
+}
+static inline void
+iaddi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ return alui(_jit, X86_ADD, r0, i0);
+}
+static inline void
+iaddxi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ return alui(_jit, X86_ADC, r0, i0);
+}
+static inline void
+isubi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ return alui(_jit, X86_SUB, r0, i0);
+}
+static inline void
+isubxi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ return alui(_jit, X86_SBB, r0, i0);
+}
+static inline void
+iandi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ return alui(_jit, X86_AND, r0, i0);
+}
+static inline void
+iori(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ return alui(_jit, X86_OR, r0, i0);
+}
+static inline void
+ixori(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ return alui(_jit, X86_XOR, r0, i0);
+}
+
+static void
+unr(jit_state_t *_jit, int32_t code, int32_t r0)
+{
+ rex(_jit, 0, WIDE, _NOREG, _NOREG, r0);
+ ic(_jit, 0xf7);
+ mrm(_jit, 0x03, code, r7(r0));
+}
+
+static inline void
+umulr(jit_state_t *_jit, int32_t r0)
+{
+ return unr(_jit, X86_IMUL, r0);
+}
+static inline void
+umulr_u(jit_state_t *_jit, int32_t r0)
+{
+ return unr(_jit, X86_MUL, r0);
+}
+static inline void
+idivr(jit_state_t *_jit, int32_t r0)
+{
+ return unr(_jit, X86_IDIV, r0);
+}
+static inline void
+idivr_u(jit_state_t *_jit, int32_t r0)
+{
+ return unr(_jit, X86_DIV, r0);
+}
+static inline void
+inegr(jit_state_t *_jit, int32_t r0)
+{
+ return unr(_jit, X86_NEG, r0);
+}
+static inline void
+icomr(jit_state_t *_jit, int32_t r0)
+{
+ return unr(_jit, X86_NOT, r0);
+}
+
+#if USE_INC_DEC
+static void
+incr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ movr(_jit, r0, r1);
+# if __X64
+ rex(_jit, 0, WIDE, _NOREG, _NOREG, r0);
+ ic(_jit, 0xff);
+ ic(_jit, 0xc0 | r7(r0));
+# else
+ ic(_jit, 0x40 | r7(r0));
+# endif
+}
+
+static void
+decr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ movr(_jit, r0, r1);
+# if __X64
+ rex(_jit, 0, WIDE, _NOREG, _NOREG, r0);
+ ic(_jit, 0xff);
+ ic(_jit, 0xc8 | r7(r0));
+# else
+ ic(_jit, 0x48 | r7(r0));
+# endif
+}
+#endif
+
+static void
+lea(jit_state_t *_jit, int32_t md, int32_t rb,
+ int32_t ri, int32_t ms, int32_t rd)
+{
+ rex(_jit, 0, WIDE, rd, ri, rb);
+ ic(_jit, 0x8d);
+ rx(_jit, rd, md, rb, ri, ms);
+}
+
+static void
+xchgr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ rex(_jit, 0, WIDE, r1, _NOREG, r0);
+ ic(_jit, 0x87);
+ mrm(_jit, 0x03, r7(r1), r7(r0));
+}
+
+static void
+xchgrm(jit_state_t *_jit, int32_t val_and_dst, int32_t loc)
+{
+ rex(_jit, 0, WIDE, val_and_dst, _NOREG, loc);
+ ic(_jit, 0x87);
+ rx(_jit, val_and_dst, 0, loc, _NOREG, _SCL1);
+}
+
+static void
+lock(jit_state_t *_jit)
+{
+ ic(_jit, 0xf0);
+}
+
+static void
+cmpxchgmr(jit_state_t *_jit, int32_t loc, int32_t desired)
+{
+ lock(_jit);
+ rex(_jit, 0, WIDE, desired, _NOREG, loc);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xb1);
+ rx(_jit, desired, 0, loc, _NOREG, _SCL1);
+}
+
+static void
+testr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ rex(_jit, 0, WIDE, r1, _NOREG, r0);
+ ic(_jit, 0x85);
+ mrm(_jit, 0x03, r7(r1), r7(r0));
+}
+
+static void
+testi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ rex(_jit, 0, WIDE, _NOREG, _NOREG, r0);
+ if (r0 == _RAX_REGNO) {
+ ic(_jit, 0xa9);
+ } else {
+ ic(_jit, 0xf7);
+ mrm(_jit, 0x03, 0x00, r7(r0));
+ }
+ ii(_jit, i0);
+}
+
+static void
+negr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (r0 == r1) {
+ inegr(_jit, r0);
+ } else {
+ ixorr(_jit, r0, r0);
+ isubr(_jit, r0, r1);
+ }
+}
+
+static void
+addr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r1)
+ iaddr(_jit, r0, r2);
+ else if (r0 == r2)
+ iaddr(_jit, r0, r1);
+ else
+ lea(_jit, 0, r1, r2, _SCL1, r0);
+}
+
+static void
+addi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0)
+ movr(_jit, r0, r1);
+#if USE_INC_DEC
+ else if (i0 == 1)
+ incr(_jit, r0, r1);
+ else if (i0 == -1)
+ decr(_jit, r0, r1);
+#endif
+ else if (can_sign_extend_int_p(i0)) {
+ if (r0 == r1)
+ iaddi(_jit, r0, i0);
+ else
+ lea(_jit, i0, r1, _NOREG, _SCL1, r0);
+ }
+ else if (r0 != r1) {
+ movi(_jit, r0, i0);
+ iaddr(_jit, r0, r1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ iaddr(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+addcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2) {
+ iaddr(_jit, r0, r1);
+ } else {
+ movr(_jit, r0, r1);
+ iaddr(_jit, r0, r2);
+ }
+}
+
+static void
+addci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ movr(_jit, r0, r1);
+ iaddi(_jit, r0, i0);
+ }
+ else if (r0 == r1) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ iaddr(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ } else {
+ movi(_jit, r0, i0);
+ iaddr(_jit, r0, r1);
+ }
+}
+
+static void
+addxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2) {
+ iaddxr(_jit, r0, r1);
+ } else {
+ movr(_jit, r0, r1);
+ iaddxr(_jit, r0, r2);
+ }
+}
+
+static void
+addxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ movr(_jit, r0, r1);
+ iaddxi(_jit, r0, i0);
+ }
+ else if (r0 == r1) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ iaddxr(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ } else {
+ movi(_jit, r0, i0);
+ iaddxr(_jit, r0, r1);
+ }
+}
+
+static void
+subr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r1 == r2)
+ ixorr(_jit, r0, r0);
+ else if (r0 == r2) {
+ isubr(_jit, r0, r1);
+ inegr(_jit, r0);
+ } else {
+ movr(_jit, r0, r1);
+ isubr(_jit, r0, r2);
+ }
+}
+
+static void
+subi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0)
+ movr(_jit, r0, r1);
+#if USE_INC_DEC
+ else if (i0 == 1)
+ decr(_jit, r0, r1);
+ else if (i0 == -1)
+ incr(_jit, r0, r1);
+#endif
+ else if (can_sign_extend_int_p(i0)) {
+ if (r0 == r1)
+ isubi(_jit, r0, i0);
+ else
+ lea(_jit, -i0, r1, _NOREG, _SCL1, r0);
+ }
+ else if (r0 != r1) {
+ movi(_jit, r0, -i0);
+ iaddr(_jit, r0, r1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ isubr(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+subcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2 && r0 != r1) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, jit_gpr_regno(reg), r0);
+ movr(_jit, r0, r1);
+ isubr(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ } else {
+ movr(_jit, r0, r1);
+ isubr(_jit, r0, r2);
+ }
+}
+
+static void
+subci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ movr(_jit, r0, r1);
+ if (can_sign_extend_int_p(i0)) {
+ isubi(_jit, r0, i0);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ isubr(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+subxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r2 && r0 != r1) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, jit_gpr_regno(reg), r0);
+ movr(_jit, r0, r1);
+ isubxr(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ } else {
+ movr(_jit, r0, r1);
+ isubxr(_jit, r0, r2);
+ }
+}
+
+static void
+subxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ movr(_jit, r0, r1);
+ if (can_sign_extend_int_p(i0)) {
+ isubxi(_jit, r0, i0);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ imovi(_jit, jit_gpr_regno(reg), i0);
+ isubxr(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+irotshr(jit_state_t *_jit, int32_t code, int32_t r0)
+{
+ rex(_jit, 0, WIDE, _RCX_REGNO, _NOREG, r0);
+ ic(_jit, 0xd3);
+ mrm(_jit, 0x03, code, r7(r0));
+}
+
+static void
+rotshr(jit_state_t *_jit, int32_t code,
+ int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == _RCX_REGNO) {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, jit_gpr_regno(reg), r1);
+ if (r2 != _RCX_REGNO)
+ movr(_jit, _RCX_REGNO, r2);
+ irotshr(_jit, code, jit_gpr_regno(reg));
+ movr(_jit, _RCX_REGNO, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ } else if (r2 != _RCX_REGNO) {
+ /* Already know that R0 isn't RCX. */
+ pushr(_jit, _RCX_REGNO);
+ if (r1 == _RCX_REGNO) {
+ if (r0 == r2)
+ xchgr(_jit, r0, _RCX_REGNO);
+ else {
+ movr(_jit, r0, r1);
+ movr(_jit, _RCX_REGNO, r2);
+ }
+ } else {
+ movr(_jit, _RCX_REGNO, r2);
+ movr(_jit, r0, r1);
+ }
+ irotshr(_jit, code, r0);
+ popr(_jit, _RCX_REGNO);
+ } else {
+ movr(_jit, r0, r1);
+ irotshr(_jit, code, r0);
+ }
+}
+
+static void
+irotshi(jit_state_t *_jit, int32_t code, int32_t r0, jit_word_t i0)
+{
+ rex(_jit, 0, WIDE, _NOREG, _NOREG, r0);
+ if (i0 == 1) {
+ ic(_jit, 0xd1);
+ mrm(_jit, 0x03, code, r7(r0));
+ } else {
+ ic(_jit, 0xc1);
+ mrm(_jit, 0x03, code, r7(r0));
+ ic(_jit, i0);
+ }
+}
+
+static void
+rotshi(jit_state_t *_jit, int32_t code,
+ int32_t r0, int32_t r1, jit_word_t i0)
+{
+ movr(_jit, r0, r1);
+ if (i0)
+ irotshi(_jit, code, r0, i0);
+}
+
+static void
+lshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0)
+ movr(_jit, r0, r1);
+ else if (i0 <= 3)
+ lea(_jit, 0, _NOREG, r1, i0 == 1 ? _SCL2 : i0 == 2 ? _SCL4 : _SCL8, r0);
+ else
+ rotshi(_jit, X86_SHL, r0, r1, i0);
+}
+
+static void
+lshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return rotshr(_jit, X86_SHL, r0, r1, r2);
+}
+
+static void
+rshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return rotshr(_jit, X86_SAR, r0, r1, r2);
+}
+
+static void
+rshi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+ return rotshi(_jit, X86_SAR, r0, r1, i0);
+}
+
+static void
+rshr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return rotshr(_jit, X86_SHR, r0, r1, r2);
+}
+
+static void
+rshi_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+ return rotshi(_jit, X86_SHR, r0, r1, i0);
+}
+
+static void
+imulr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ rex(_jit, 0, WIDE, r0, _NOREG, r1);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xaf);
+ mrm(_jit, 0x03, r7(r0), r7(r1));
+}
+
+static void
+imuli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ rex(_jit, 0, WIDE, r0, _NOREG, r1);
+ if ((int8_t)i0 == i0) {
+ ic(_jit, 0x6b);
+ mrm(_jit, 0x03, r7(r0), r7(r1));
+ ic(_jit, i0);
+ } else {
+ ic(_jit, 0x69);
+ mrm(_jit, 0x03, r7(r0), r7(r1));
+ ii(_jit, i0);
+ }
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ imulr(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+mulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r1)
+ imulr(_jit, r0, r2);
+ else if (r0 == r2) {
+ imulr(_jit, r0, r1);
+ } else {
+ movr(_jit, r0, r1);
+ imulr(_jit, r0, r2);
+ }
+}
+
+static int
+ffsw(jit_word_t i)
+{
+ if (sizeof(int) == sizeof(i))
+ return ffs(i);
+ int bit = ffs((int)i);
+ if (bit == 0) {
+ bit = ffs((int)((uint64_t)i >> 32));
+ if (bit)
+ bit += 32;
+ }
+ return bit;
+}
+
+static void
+muli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ switch (i0) {
+ case 0:
+ ixorr(_jit, r0, r0);
+ break;
+ case 1:
+ movr(_jit, r0, r1);
+ break;
+ case -1:
+ negr(_jit, r0, r1);
+ break;
+ case 2:
+ lea(_jit, 0, _NOREG, r1, _SCL2, r0);
+ break;
+ case 4:
+ lea(_jit, 0, _NOREG, r1, _SCL4, r0);
+ break;
+ case 8:
+ lea(_jit, 0, _NOREG, r1, _SCL8, r0);
+ break;
+ default:
+ if (i0 > 0 && !(i0 & (i0 - 1)))
+ lshi(_jit, r0, r1, ffsw(i0) - 1);
+ else if (can_sign_extend_int_p(i0))
+ imuli(_jit, r0, r1, i0);
+ else if (r0 != r1) {
+ movi(_jit, r0, i0);
+ imulr(_jit, r0, r1);
+ }
+ else
+ imuli(_jit, r0, r0, i0);
+ break;
+ }
+}
+
+static void
+iqmulr(jit_state_t *_jit, int32_t r0, int32_t r1,
+ int32_t r2, int32_t r3, jit_bool_t sign)
+{
+ if (r0 != _RAX_REGNO && r1 != _RAX_REGNO)
+ pushr(_jit, _RAX_REGNO);
+ if (r0 != _RDX_REGNO && r1 != _RDX_REGNO)
+ pushr(_jit, _RDX_REGNO);
+
+ int32_t mul;
+ if (r3 == _RAX_REGNO) {
+ mul = r2;
+ } else {
+ mul = r3;
+ movr(_jit, _RAX_REGNO, r2);
+ }
+ if (sign)
+ umulr(_jit, mul);
+ else
+ umulr_u(_jit, mul);
+
+ if (r0 == _RDX_REGNO && r1 == _RAX_REGNO) {
+ xchgr(_jit, _RAX_REGNO, _RDX_REGNO);
+ } else {
+ if (r0 != _RDX_REGNO)
+ movr(_jit, r0, _RAX_REGNO);
+ movr(_jit, r1, _RDX_REGNO);
+ if (r0 == _RDX_REGNO)
+ movr(_jit, r0, _RAX_REGNO);
+ }
+
+ if (r0 != _RDX_REGNO && r1 != _RDX_REGNO)
+ popr(_jit, _RDX_REGNO);
+ if (r0 != _RAX_REGNO && r1 != _RAX_REGNO)
+ popr(_jit, _RAX_REGNO);
+}
+
+static void
+qmulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ return iqmulr(_jit, r0, r1, r2, r3, 1);
+}
+
+static void
+qmulr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ return iqmulr(_jit, r0, r1, r2, r3, 0);
+}
+
+static void
+iqmuli(jit_state_t *_jit, int32_t r0, int32_t r1,
+ int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+ if (i0 == 0) {
+ ixorr(_jit, r0, r0);
+ ixorr(_jit, r1, r1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ if (sign)
+ qmulr(_jit, r0, r1, r2, jit_gpr_regno(reg));
+ else
+ qmulr_u(_jit, r0, r1, r2, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+qmuli(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+ return iqmuli(_jit, r0, r1, r2, i0, 1);
+}
+
+static void
+qmuli_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+ return iqmuli(_jit, r0, r1, r2, i0, 0);
+}
+
+static void
+sign_extend_rdx_rax(jit_state_t *_jit)
+{
+ rex(_jit, 0, WIDE, 0, 0, 0);
+ ic(_jit, 0x99);
+}
+
+static void
+divremr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2,
+ jit_bool_t sign, jit_bool_t divide)
+{
+ if (r0 != _RAX_REGNO)
+ pushr(_jit, _RAX_REGNO);
+ if (r0 != _RDX_REGNO)
+ pushr(_jit, _RDX_REGNO);
+
+ int tmp_divisor = 0;
+ if (r2 == _RAX_REGNO || r2 == _RDX_REGNO) {
+ jit_gpr_t tmp = get_temp_gpr(_jit);
+ movr(_jit, jit_gpr_regno(tmp), r2);
+ r2 = jit_gpr_regno(tmp);
+ tmp_divisor = 1;
+ }
+
+ movr(_jit, _RAX_REGNO, r1);
+
+ if (sign) {
+ sign_extend_rdx_rax(_jit);
+ idivr(_jit, r2);
+ } else {
+ ixorr(_jit, _RDX_REGNO, _RDX_REGNO);
+ idivr_u(_jit, r2);
+ }
+
+ if (divide)
+ movr(_jit, r0, _RAX_REGNO);
+ else
+ movr(_jit, r0, _RDX_REGNO);
+
+ if (tmp_divisor)
+ unget_temp_gpr(_jit);
+
+ if (r0 != _RDX_REGNO)
+ popr(_jit, _RDX_REGNO);
+ if (r0 != _RAX_REGNO)
+ popr(_jit, _RAX_REGNO);
+}
+
+static void
+divremi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0,
+ jit_bool_t sign, jit_bool_t divide)
+{
+ jit_gpr_t tmp = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(tmp), i0);
+
+ divremr(_jit, r0, r1, jit_gpr_regno(tmp), sign, divide);
+ unget_temp_gpr(_jit);
+}
+
+static void
+divr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return divremr(_jit, r0, r1, r2, 1, 1);
+}
+
+static void
+divi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ return divremi(_jit, r0, r1, i0, 1, 1);
+}
+
+static void
+divr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return divremr(_jit, r0, r1, r2, 0, 1);
+}
+
+static void
+divi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ return divremi(_jit, r0, r1, i0, 0, 1);
+}
+
+
+static void
+remr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return divremr(_jit, r0, r1, r2, 1, 0);
+}
+
+static void
+remi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ return divremi(_jit, r0, r1, i0, 1, 0);
+}
+
+static void
+remr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ return divremr(_jit, r0, r1, r2, 0, 0);
+}
+
+static void
+remi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ return divremi(_jit, r0, r1, i0, 0, 0);
+}
+
+static void
+iqdivr(jit_state_t *_jit, int32_t r0, int32_t r1,
+ int32_t r2, int32_t r3, jit_bool_t sign)
+{
+ if (r0 != _RAX_REGNO && r1 != _RAX_REGNO)
+ pushr(_jit, _RAX_REGNO);
+ if (r0 != _RDX_REGNO && r1 != _RDX_REGNO)
+ pushr(_jit, _RDX_REGNO);
+
+ int tmp_divisor = 0;
+ if (r3 == _RAX_REGNO || r3 == _RDX_REGNO) {
+ jit_gpr_t tmp = get_temp_gpr(_jit);
+ movr(_jit, jit_gpr_regno(tmp), r3);
+ r3 = jit_gpr_regno(tmp);
+ tmp_divisor = 1;
+ }
+
+ movr(_jit, _RAX_REGNO, r2);
+
+ if (sign) {
+ sign_extend_rdx_rax(_jit);
+ idivr(_jit, r3);
+ } else {
+ ixorr(_jit, _RDX_REGNO, _RDX_REGNO);
+ idivr_u(_jit, r3);
+ }
+
+ if (r0 == _RDX_REGNO && r1 == _RAX_REGNO) {
+ xchgr(_jit, _RAX_REGNO, _RDX_REGNO);
+ } else {
+ if (r0 != _RDX_REGNO)
+ movr(_jit, r0, _RAX_REGNO);
+ movr(_jit, r1, _RDX_REGNO);
+ if (r0 == _RDX_REGNO)
+ movr(_jit, r0, _RAX_REGNO);
+ }
+
+ if (tmp_divisor)
+ unget_temp_gpr(_jit);
+
+ if (r0 != _RDX_REGNO && r1 != _RDX_REGNO)
+ popr(_jit, _RDX_REGNO);
+ if (r0 != _RAX_REGNO && r1 != _RAX_REGNO)
+ popr(_jit, _RAX_REGNO);
+}
+
+static void
+qdivr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ return iqdivr(_jit, r0, r1, r2, r3, 1);
+}
+
+static void
+qdivr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+ return iqdivr(_jit, r0, r1, r2, r3, 0);
+}
+
+static void
+iqdivi(jit_state_t *_jit, int32_t r0, int32_t r1,
+ int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ if (sign)
+ qdivr(_jit, r0, r1, r2, jit_gpr_regno(reg));
+ else
+ qdivr_u(_jit, r0, r1, r2, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+}
+
+static void
+qdivi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+ return iqdivi(_jit, r0, r1, r2, i0, 1);
+}
+
+static void
+qdivi_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+ return iqdivi(_jit, r0, r1, r2, i0, 0);
+}
+
+static void
+comr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ movr(_jit, r0, r1);
+ icomr(_jit, r0);
+}
+
+static void
+andr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r1 == r2)
+ movr(_jit, r0, r1);
+ else if (r0 == r1)
+ iandr(_jit, r0, r2);
+ else if (r0 == r2) {
+ iandr(_jit, r0, r1);
+ } else {
+ movr(_jit, r0, r1);
+ iandr(_jit, r0, r2);
+ }
+}
+
+static void
+andi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+
+ if (i0 == 0)
+ ixorr(_jit, r0, r0);
+ else if (i0 == -1)
+ movr(_jit, r0, r1);
+ else if (r0 == r1) {
+ if (can_sign_extend_int_p(i0)) {
+ iandi(_jit, r0, i0);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ iandr(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+ } else {
+ movi(_jit, r0, i0);
+ iandr(_jit, r0, r1);
+ }
+}
+
+static void
+orr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r1 == r2) {
+ movr(_jit, r0, r1);
+ } else if (r0 == r1) {
+ iorr(_jit, r0, r2);
+ } else if (r0 == r2) {
+ iorr(_jit, r0, r1);
+ } else {
+ movr(_jit, r0, r1);
+ iorr(_jit, r0, r2);
+ }
+}
+
+static void
+ori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0)
+ movr(_jit, r0, r1);
+ else if (i0 == -1)
+ movi(_jit, r0, -1);
+ else if (can_sign_extend_int_p(i0)) {
+ movr(_jit, r0, r1);
+ iori(_jit, r0, i0);
+ }
+ else if (r0 != r1) {
+ movi(_jit, r0, i0);
+ iorr(_jit, r0, r1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ iorr(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+xorr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r1 == r2)
+ ixorr(_jit, r0, r0);
+ else if (r0 == r1)
+ ixorr(_jit, r0, r2);
+ else if (r0 == r2) {
+ ixorr(_jit, r0, r1);
+ } else {
+ movr(_jit, r0, r1);
+ ixorr(_jit, r0, r2);
+ }
+}
+
+static void
+xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (i0 == 0)
+ movr(_jit, r0, r1);
+ else if (i0 == -1)
+ comr(_jit, r0, r1);
+ else if (can_sign_extend_int_p(i0)) {
+ movr(_jit, r0, r1);
+ ixori(_jit, r0, i0);
+ }
+ else if (r0 != r1) {
+ movi(_jit, r0, i0);
+ ixorr(_jit, r0, r1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ ixorr(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+extr_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (reg8_p(r1)) {
+ movcr(_jit, r0, r1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, jit_gpr_regno(reg), r1);
+ movcr(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+extr_uc(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (reg8_p(r1)) {
+ movcr_u(_jit, r0, r1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, jit_gpr_regno(reg), r1);
+ movcr_u(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+extr_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return movsr(_jit, r0, r1);
+}
+
+static void
+extr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return movsr_u(_jit, r0, r1);
+}
+
+#if __X64
+static void
+extr_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return movir(_jit, r0, r1);
+}
+static void
+extr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return movir_u(_jit, r0, r1);
+}
+#endif
+
+static void
+bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ extr_us(_jit, r0, r1);
+ ic(_jit, 0x66);
+ rex(_jit, 0, 0, _NOREG, _NOREG, r0);
+ ic(_jit, 0xc1);
+ mrm(_jit, 0x03, X86_ROR, r7(r0));
+ ic(_jit, 8);
+}
+
+static void
+bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ movr(_jit, r0, r1);
+ rex(_jit, 0, 0, _NOREG, _NOREG, r0);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xc8 | r7(r0));
+}
+
+#if __X64
+static void
+bswapr_ul(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ movr(_jit, r0, r1);
+ rex(_jit, 0, 1, _NOREG, _NOREG, r0);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xc8 | r7(r0));
+}
+#endif
+
+static void
+ldr_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ rex(_jit, 0, WIDE, r0, _NOREG, r1);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xbe);
+ rx(_jit, r0, 0, r1, _NOREG, _SCL1);
+}
+
+static void
+ldi_c(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ rex(_jit, 0, WIDE, r0, _NOREG, _NOREG);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xbe);
+ rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ ldr_c(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldr_uc(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ rex(_jit, 0, WIDE, r0, _NOREG, r1);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xb6);
+ rx(_jit, r0, 0, r1, _NOREG, _SCL1);
+}
+
+static void
+ldi_uc(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ rex(_jit, 0, WIDE, r0, _NOREG, _NOREG);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xb6);
+ rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ ldr_uc(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldr_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ rex(_jit, 0, WIDE, r0, _NOREG, r1);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xbf);
+ rx(_jit, r0, 0, r1, _NOREG, _SCL1);
+}
+
+static void
+ldi_s(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ rex(_jit, 0, WIDE, r0, _NOREG, _NOREG);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xbf);
+ rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ ldr_s(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ rex(_jit, 0, WIDE, r0, _NOREG, r1);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xb7);
+ rx(_jit, r0, 0, r1, _NOREG, _SCL1);
+}
+
+static void
+ldi_us(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ rex(_jit, 0, WIDE, r0, _NOREG, _NOREG);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xb7);
+ rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ ldr_us(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldr_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+#if __X64
+ rex(_jit, 0, WIDE, r0, _NOREG, r1);
+ ic(_jit, 0x63);
+#else
+ ic(_jit, 0x8b);
+#endif
+ rx(_jit, r0, 0, r1, _NOREG, _SCL1);
+}
+
+static void
+ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0)) {
+#if __X64
+ rex(_jit, 0, WIDE, r0, _NOREG, _NOREG);
+ ic(_jit, 0x63);
+#else
+ ic(_jit, 0x8b);
+#endif
+ rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ ldr_i(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+#if __X64
+static void
+ldr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ rex(_jit, 0, 0, r0, _NOREG, r1);
+ ic(_jit, 0x63);
+ rx(_jit, r0, 0, r1, _NOREG, _SCL1);
+}
+
+static void
+ldi_ui(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ rex(_jit, 0, 0, r0, _NOREG, _NOREG);
+ ic(_jit, 0x63);
+ rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ ldr_ui(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldr_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ rex(_jit, 0, 1, r0, _NOREG, r1);
+ ic(_jit, 0x8b);
+ rx(_jit, r0, 0, r1, _NOREG, _SCL1);
+}
+
+static void
+ldi_l(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ rex(_jit, 0, 1, r0, _NOREG, _NOREG);
+ ic(_jit, 0x8b);
+ rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ ldr_l(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+#endif
+
+static void
+ldxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ rex(_jit, 0, WIDE, r0, r1, r2);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xbe);
+ rx(_jit, r0, 0, r2, r1, _SCL1);
+}
+
+static void
+ldxi_c(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ rex(_jit, 0, WIDE, r0, _NOREG, r1);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xbe);
+ rx(_jit, r0, i0, r1, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ ldxr_c(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_uc(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ rex(_jit, 0, WIDE, r0, r1, r2);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xb6);
+ rx(_jit, r0, 0, r2, r1, _SCL1);
+}
+
+static void
+ldxi_uc(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ rex(_jit, 0, WIDE, r0, _NOREG, r1);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xb6);
+ rx(_jit, r0, i0, r1, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ ldxr_uc(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ rex(_jit, 0, WIDE, r0, r1, r2);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xbf);
+ rx(_jit, r0, 0, r2, r1, _SCL1);
+}
+
+static void
+ldxi_s(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ rex(_jit, 0, WIDE, r0, _NOREG, r1);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xbf);
+ rx(_jit, r0, i0, r1, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ ldxr_s(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_us(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ rex(_jit, 0, WIDE, r0, r1, r2);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xb7);
+ rx(_jit, r0, 0, r2, r1, _SCL1);
+}
+
+static void
+ldxi_us(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ rex(_jit, 0, WIDE, r0, _NOREG, r1);
+ ic(_jit, 0x0f);
+ ic(_jit, 0xb7);
+ rx(_jit, r0, i0, r1, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ ldxr_us(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+#if __X64
+ rex(_jit, 0, WIDE, r0, r1, r2);
+ ic(_jit, 0x63);
+#else
+ ic(_jit, 0x8b);
+#endif
+ rx(_jit, r0, 0, r2, r1, _SCL1);
+}
+
+static void
+ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0)) {
+#if __X64
+ rex(_jit, 0, WIDE, r0, _NOREG, r1);
+ ic(_jit, 0x63);
+#else
+ ic(_jit, 0x8b);
+#endif
+ rx(_jit, r0, i0, r1, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ ldxr_i(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+#if __X64
+static void
+ldxr_ui(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ rex(_jit, 0, 0, r0, r1, r2);
+ ic(_jit, 0x8b);
+ rx(_jit, r0, 0, r2, r1, _SCL1);
+}
+
+static void
+ldxi_ui(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ rex(_jit, 0, 0, r0, _NOREG, r1);
+ ic(_jit, 0x8b);
+ rx(_jit, r0, i0, r1, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ ldxr_ui(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ rex(_jit, 0, 1, r0, r1, r2);
+ ic(_jit, 0x8b);
+ rx(_jit, r0, 0, r2, r1, _SCL1);
+}
+
+static void
+ldxi_l(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ rex(_jit, 0, 1, r0, _NOREG, r1);
+ ic(_jit, 0x8b);
+ rx(_jit, r0, i0, r1, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ ldxr_l(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+#endif
+
+static void stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1);
+
+static void
+str_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (reg8_p(r1)) {
+ rex(_jit, 0, 0, r1, _NOREG, r0);
+ ic(_jit, 0x88);
+ rx(_jit, r1, 0, r0, _NOREG, _SCL1);
+ } else {
+ // See comment in stxi_c.
+ return stxi_c(_jit, 0, r0, r1);
+ }
+}
+
+static void
+sti_c(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ if (reg8_p(r0)) {
+ rex(_jit, 0, 0, r0, _NOREG, _NOREG);
+ ic(_jit, 0x88);
+ rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, jit_gpr_regno(reg), r0);
+ rex(_jit, 0, 0, jit_gpr_regno(reg), _NOREG, _NOREG);
+ ic(_jit, 0x88);
+ rx(_jit, jit_gpr_regno(reg), i0, _NOREG, _NOREG, _SCL1);
+ unget_temp_gpr(_jit);
+ }
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ str_c(_jit, jit_gpr_regno(reg), r0);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+str_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ic(_jit, 0x66);
+ rex(_jit, 0, 0, r1, _NOREG, r0);
+ ic(_jit, 0x89);
+ rx(_jit, r1, 0, r0, _NOREG, _SCL1);
+}
+
+static void
+sti_s(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ ic(_jit, 0x66);
+ rex(_jit, 0, 0, r0, _NOREG, _NOREG);
+ ic(_jit, 0x89);
+ rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ str_s(_jit, jit_gpr_regno(reg), r0);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+str_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ rex(_jit, 0, 0, r1, _NOREG, r0);
+ ic(_jit, 0x89);
+ rx(_jit, r1, 0, r0, _NOREG, _SCL1);
+}
+
+static void
+sti_i(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ rex(_jit, 0, 0, r0, _NOREG, _NOREG);
+ ic(_jit, 0x89);
+ rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ str_i(_jit, jit_gpr_regno(reg), r0);
+ unget_temp_gpr(_jit);
+ }
+}
+
+#if __X64
+static void
+str_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ rex(_jit, 0, 1, r1, _NOREG, r0);
+ ic(_jit, 0x89);
+ rx(_jit, r1, 0, r0, _NOREG, _SCL1);
+}
+
+static void
+sti_l(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ if (can_sign_extend_int_p(i0)) {
+ rex(_jit, 0, 1, r0, _NOREG, _NOREG);
+ ic(_jit, 0x89);
+ rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ str_l(_jit, jit_gpr_regno(reg), r0);
+ unget_temp_gpr(_jit);
+ }
+}
+#endif
+
+static void
+stxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (reg8_p(r2)) {
+ rex(_jit, 0, 0, r2, r1, r0);
+ ic(_jit, 0x88);
+ rx(_jit, r2, 0, r0, r1, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movr(_jit, jit_gpr_regno(reg), r2);
+ rex(_jit, 0, 0, jit_gpr_regno(reg), r1, r0);
+ ic(_jit, 0x88);
+ rx(_jit, jit_gpr_regno(reg), 0, r0, r1, _SCL1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (can_sign_extend_int_p(i0)) {
+ if (reg8_p(r1)) {
+ rex(_jit, 1, 0, r1, _NOREG, r0);
+ ic(_jit, 0x88);
+ rx(_jit, r1, i0, r0, _NOREG, _SCL1);
+ } else {
+ // Here we have a hack. Normally tmp registers are just for the
+ // backend's use, but there are cases in which jit_move_operands
+ // can use a temp register too. In a move of an operand to memory
+ // this would result in two simultaneous uses of a temp register.
+ // Oddly this situation only applies on 32-bit x86 with byte
+ // stores -- this is the only platform on which reg8_p can be
+ // false -- so we just make a special case here.
+ ASSERT(r0 != r1);
+ int32_t tmp = r0 == _RAX_REGNO ? _RCX_REGNO : _RAX_REGNO;
+ ASSERT(reg8_p(tmp));
+ pushr(_jit, tmp);
+ movr(_jit, tmp, r1);
+ if (r0 == _RSP_REGNO)
+ i0 += __WORDSIZE / 8;
+ rex(_jit, 0, 0, tmp, _NOREG, r0);
+ ic(_jit, 0x88);
+ rx(_jit, tmp, i0, r0, _NOREG, _SCL1);
+ popr(_jit, tmp);
+ }
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ stxr_c(_jit, jit_gpr_regno(reg), r0, r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+stxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ ic(_jit, 0x66);
+ rex(_jit, 0, 0, r2, r1, r0);
+ ic(_jit, 0x89);
+ rx(_jit, r2, 0, r0, r1, _SCL1);
+}
+
+static void
+stxi_s(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (can_sign_extend_int_p(i0)) {
+ ic(_jit, 0x66);
+ rex(_jit, 0, 0, r1, _NOREG, r0);
+ ic(_jit, 0x89);
+ rx(_jit, r1, i0, r0, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ stxr_s(_jit, jit_gpr_regno(reg), r0, r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+stxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ rex(_jit, 0, 0, r2, r1, r0);
+ ic(_jit, 0x89);
+ rx(_jit, r2, 0, r0, r1, _SCL1);
+}
+
+static void
+stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (can_sign_extend_int_p(i0)) {
+ rex(_jit, 0, 0, r1, _NOREG, r0);
+ ic(_jit, 0x89);
+ rx(_jit, r1, i0, r0, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ stxr_i(_jit, jit_gpr_regno(reg), r0, r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+#if __X64
+static void
+stxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ rex(_jit, 0, 1, r2, r1, r0);
+ ic(_jit, 0x89);
+ rx(_jit, r2, 0, r0, r1, _SCL1);
+}
+
+static void
+stxi_l(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (can_sign_extend_int_p(i0)) {
+ rex(_jit, 0, 1, r1, _NOREG, r0);
+ ic(_jit, 0x89);
+ rx(_jit, r1, i0, r0, _NOREG, _SCL1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ stxr_l(_jit, jit_gpr_regno(reg), r0, r1);
+ unget_temp_gpr(_jit);
+ }
+}
+#endif
+
+static jit_reloc_t
+jccs(jit_state_t *_jit, int32_t code)
+{
+ ic(_jit, 0x70 | code);
+ return emit_rel8_reloc(_jit, 1);
+}
+
+static jit_reloc_t
+jcc(jit_state_t *_jit, int32_t code)
+{
+ ic(_jit, 0x0f);
+ ic(_jit, 0x80 | code);
+ return emit_rel32_reloc(_jit, 2);
+}
+
+static void
+jcci(jit_state_t *_jit, int32_t code, jit_word_t i0)
+{
+ ptrdiff_t rel8 = i0 - (_jit->pc.w + 1 + 1);
+ ptrdiff_t rel32 = i0 - (_jit->pc.w + 2 + 4);
+ if (INT8_MIN <= rel8 && rel8 <= INT8_MAX)
+ {
+ ic(_jit, 0x70 | code);
+ ic(_jit, rel8);
+ }
+ else
+ {
+ ASSERT(INT32_MIN <= rel32 && rel32 <= INT32_MAX);
+ ic(_jit, 0x0f);
+ ic(_jit, 0x80 | code);
+ ii(_jit, rel32);
+ }
+}
+
+#define DEFINE_JUMPS(cc, CC, code) \
+ static inline jit_reloc_t j##cc(jit_state_t *_jit) \
+ { \
+ return jcc(_jit, X86_CC_##CC); \
+ } \
+ static inline jit_reloc_t j##cc##s(jit_state_t *_jit) \
+ { \
+ return jccs(_jit, X86_CC_##CC); \
+ }
+FOR_EACH_CC(DEFINE_JUMPS)
+#undef DEFINE_JUMPS
+
+static jit_reloc_t
+jcr(jit_state_t *_jit, int32_t code, int32_t r0, int32_t r1)
+{
+ alur(_jit, X86_CMP, r0, r1);
+ return jcc(_jit, code);
+}
+
+static jit_reloc_t
+jci(jit_state_t *_jit, int32_t code, int32_t r0, jit_word_t i0)
+{
+ alui(_jit, X86_CMP, r0, i0);
+ return jcc(_jit, code);
+}
+
+static jit_reloc_t
+jci0(jit_state_t *_jit, int32_t code, int32_t r0)
+{
+ testr(_jit, r0, r0);
+ return jcc(_jit, code);
+}
+
+static jit_reloc_t
+bltr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return jcr(_jit, X86_CC_L, r0, r1);
+}
+
+static jit_reloc_t
+blti(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (i1) return jci (_jit, X86_CC_L, r0, i1);
+ else return jci0(_jit, X86_CC_S, r0);
+}
+
+static jit_reloc_t
+bltr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return jcr(_jit, X86_CC_B, r0, r1);
+}
+
+static jit_reloc_t
+blti_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (i1) return jci (_jit, X86_CC_B, r0, i1);
+ else return jci0(_jit, X86_CC_B, r0);
+}
+
+static jit_reloc_t
+bler(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return jcr (_jit, X86_CC_LE, r0, r1);
+}
+
+static jit_reloc_t
+blei(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (i1) return jci (_jit, X86_CC_LE, r0, i1);
+ else return jci0(_jit, X86_CC_LE, r0);
+}
+
+static jit_reloc_t
+bler_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return jcr (_jit, X86_CC_BE, r0, r1);
+}
+
+static jit_reloc_t
+blei_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (i1) return jci (_jit, X86_CC_BE, r0, i1);
+ else return jci0(_jit, X86_CC_BE, r0);
+}
+
+static jit_reloc_t
+beqr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return jcr (_jit, X86_CC_E, r0, r1);
+}
+
+static jit_reloc_t
+beqi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (i1) return jci (_jit, X86_CC_E, r0, i1);
+ else return jci0(_jit, X86_CC_E, r0);
+}
+
+static jit_reloc_t
+bger(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return jcr (_jit, X86_CC_GE, r0, r1);
+}
+
+static jit_reloc_t
+bgei(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (i1) return jci (_jit, X86_CC_GE, r0, i1);
+ else return jci0(_jit, X86_CC_NS, r0);
+}
+
+static jit_reloc_t
+bger_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return jcr (_jit, X86_CC_AE, r0, r1);
+}
+
+static jit_reloc_t
+bgei_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return jci (_jit, X86_CC_AE, r0, i1);
+}
+
+static jit_reloc_t
+bgtr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return jcr(_jit, X86_CC_G, r0, r1);
+}
+
+static jit_reloc_t
+bgti(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ return jci(_jit, X86_CC_G, r0, i1);
+}
+
+static jit_reloc_t
+bgtr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return jcr(_jit, X86_CC_A, r0, r1);
+}
+
+static jit_reloc_t
+bgti_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (i1) return jci (_jit, X86_CC_A, r0, i1);
+ else return jci0(_jit, X86_CC_NE, r0);
+}
+
+static jit_reloc_t
+bner(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ return jcr(_jit, X86_CC_NE, r0, r1);
+}
+
+static jit_reloc_t
+bnei(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (i1) return jci (_jit, X86_CC_NE, r0, i1);
+ else return jci0(_jit, X86_CC_NE, r0);
+}
+
+static jit_reloc_t
+bmsr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ testr(_jit, r0, r1);
+ return jnz(_jit);
+}
+
+static jit_reloc_t
+bmsi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_zero_extend_int_p(i1)) {
+ testi(_jit, r0, i1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i1);
+ testr(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+ return jnz(_jit);
+}
+
+static jit_reloc_t
+bmcr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ testr(_jit, r0, r1);
+ return jz(_jit);
+}
+
+static jit_reloc_t
+bmci(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_zero_extend_int_p(i1)) {
+ testi(_jit, r0, i1);
+ } else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i1);
+ testr(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+ return jz(_jit);
+}
+
+static jit_reloc_t
+boaddr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ iaddr(_jit, r0, r1);
+ return jo(_jit);
+}
+
+static jit_reloc_t
+boaddi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_sign_extend_int_p(i1)) {
+ iaddi(_jit, r0, i1);
+ return jo(_jit);
+ }
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i1);
+ unget_temp_gpr(_jit);
+ return boaddr(_jit, r0, jit_gpr_regno(reg));
+}
+
+static jit_reloc_t
+boaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ iaddr(_jit, r0, r1);
+ return jc(_jit);
+}
+
+static jit_reloc_t
+boaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_sign_extend_int_p(i1)) {
+ iaddi(_jit, r0, i1);
+ return jc(_jit);
+ }
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i1);
+ unget_temp_gpr(_jit);
+ return boaddr_u(_jit, r0, jit_gpr_regno(reg));
+}
+
+static jit_reloc_t
+bxaddr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ iaddr(_jit, r0, r1);
+ return jno(_jit);
+}
+
+static jit_reloc_t
+bxaddi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_sign_extend_int_p(i1)) {
+ iaddi(_jit, r0, i1);
+ return jno(_jit);
+ }
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i1);
+ unget_temp_gpr(_jit);
+ return bxaddr(_jit, r0, jit_gpr_regno(reg));
+}
+
+static jit_reloc_t
+bxaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ iaddr(_jit, r0, r1);
+ return jnc(_jit);
+}
+
+static jit_reloc_t
+bxaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_sign_extend_int_p(i1)) {
+ iaddi(_jit, r0, i1);
+ return jnc(_jit);
+ }
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i1);
+ unget_temp_gpr(_jit);
+ return bxaddr_u(_jit, r0, jit_gpr_regno(reg));
+}
+
+static jit_reloc_t
+bosubr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ isubr(_jit, r0, r1);
+ return jo(_jit);
+}
+
+static jit_reloc_t
+bosubi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_sign_extend_int_p(i1)) {
+ isubi(_jit, r0, i1);
+ return jo(_jit);
+ }
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i1);
+ unget_temp_gpr(_jit);
+ return bosubr(_jit, r0, jit_gpr_regno(reg));
+}
+
+static jit_reloc_t
+bosubr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ isubr(_jit, r0, r1);
+ return jc(_jit);
+}
+
+static jit_reloc_t
+bosubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_sign_extend_int_p(i1)) {
+ isubi(_jit, r0, i1);
+ return jc(_jit);
+ }
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i1);
+ unget_temp_gpr(_jit);
+ return bosubr_u(_jit, r0, jit_gpr_regno(reg));
+}
+
+static jit_reloc_t
+bxsubr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ isubr(_jit, r0, r1);
+ return jno(_jit);
+}
+
+static jit_reloc_t
+bxsubi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_sign_extend_int_p(i1)) {
+ isubi(_jit, r0, i1);
+ return jno(_jit);
+ }
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i1);
+ unget_temp_gpr(_jit);
+ return bxsubr(_jit, r0, jit_gpr_regno(reg));
+}
+
+static jit_reloc_t
+bxsubr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ isubr(_jit, r0, r1);
+ return jnc(_jit);
+}
+
+static jit_reloc_t
+bxsubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+ if (can_sign_extend_int_p(i1)) {
+ isubi(_jit, r0, i1);
+ return jnc(_jit);
+ }
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i1);
+ unget_temp_gpr(_jit);
+ return bxsubr_u(_jit, r0, jit_gpr_regno(reg));
+}
+
+static void
+callr(jit_state_t *_jit, int32_t r0)
+{
+ rex(_jit, 0, 0, _NOREG, _NOREG, r0);
+ ic(_jit, 0xff);
+ mrm(_jit, 0x03, 0x02, r7(r0));
+}
+
+static void
+calli(jit_state_t *_jit, jit_word_t i0)
+{
+ ptrdiff_t rel32 = i0 - (_jit->pc.w + 1 + 4);
+ if (INT32_MIN <= rel32 && rel32 <= INT32_MAX)
+ {
+ ic(_jit, 0xe8);
+ ii(_jit, rel32);
+ }
+ else
+ {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ jit_patch_there(_jit, mov_addr(_jit, jit_gpr_regno(reg)), (void*)i0);
+ callr(_jit, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+jmpi_with_link(jit_state_t *_jit, jit_word_t i0)
+{
+ return calli(_jit, i0);
+}
+
+static void
+pop_link_register(jit_state_t *_jit)
+{
+ /* Treat this instruction as having no effect on the stack size; its
+ * effect is non-local (across functions) and handled manually. */
+
+ int saved_frame_size = _jit->frame_size;
+ popr(_jit, jit_gpr_regno (JIT_LR));
+ _jit->frame_size = saved_frame_size;
+}
+
+static void
+push_link_register(jit_state_t *_jit)
+{
+ /* See comment in pop_link_register. */
+
+ int saved_frame_size = _jit->frame_size;
+ pushr(_jit, jit_gpr_regno (JIT_LR));
+ _jit->frame_size = saved_frame_size;
+}
+
+static void
+jmpr(jit_state_t *_jit, int32_t r0)
+{
+ rex(_jit, 0, WIDE, _NOREG, _NOREG, r0);
+ ic(_jit, 0xff);
+ mrm(_jit, 0x03, 0x04, r7(r0));
+}
+
+static void
+jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+ ptrdiff_t rel8 = i0 - (_jit->pc.w + 1 + 1);
+ ptrdiff_t rel32 = i0 - (_jit->pc.w + 1 + 4);
+ if (INT8_MIN <= rel8 && rel8 <= INT8_MAX)
+ {
+ ic(_jit, 0xeb);
+ ic(_jit, rel8);
+ }
+ else if (INT32_MIN <= rel32 && rel32 <= INT32_MAX)
+ {
+ ic(_jit, 0xe9);
+ ii(_jit, rel32);
+ }
+ else
+ {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ jit_patch_there(_jit, mov_addr(_jit, jit_gpr_regno(reg)), (void*)i0);
+ jmpr(_jit, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static jit_reloc_t
+jmp(jit_state_t *_jit)
+{
+ ic(_jit, 0xe9);
+ return emit_rel32_reloc(_jit, 1);
+}
+
+static void
+ret(jit_state_t *_jit)
+{
+ ic(_jit, 0xc3);
+}
+
+static void
+retr(jit_state_t *_jit, int32_t r0)
+{
+ movr(_jit, _RAX_REGNO, r0);
+ ret(_jit);
+}
+
+static void
+reti(jit_state_t *_jit, jit_word_t i0)
+{
+ movi(_jit, _RAX_REGNO, i0);
+ ret(_jit);
+}
+
+static void
+retval_c(jit_state_t *_jit, int32_t r0)
+{
+ extr_c(_jit, r0, _RAX_REGNO);
+}
+
+static void
+retval_uc(jit_state_t *_jit, int32_t r0)
+{
+ extr_uc(_jit, r0, _RAX_REGNO);
+}
+
+static void
+retval_s(jit_state_t *_jit, int32_t r0)
+{
+ extr_s(_jit, r0, _RAX_REGNO);
+}
+
+static void
+retval_us(jit_state_t *_jit, int32_t r0)
+{
+ extr_us(_jit, r0, _RAX_REGNO);
+}
+
+static void
+retval_i(jit_state_t *_jit, int32_t r0)
+{
+#if __X32
+ movr(_jit, r0, _RAX_REGNO);
+#else
+ extr_i(_jit, r0, _RAX_REGNO);
+#endif
+}
+
+#if __X64
+static void
+retval_ui(jit_state_t *_jit, int32_t r0)
+{
+ extr_ui(_jit, r0, _RAX_REGNO);
+}
+
+static void
+retval_l(jit_state_t *_jit, int32_t r0)
+{
+ movr(_jit, r0, _RAX_REGNO);
+}
+#endif
+
+static void
+mfence(jit_state_t *_jit)
+{
+ ic(_jit, 0x0f);
+ ic(_jit, 0xae);
+ ic(_jit, 0xf0);
+}
+
+static void
+ldr_atomic(jit_state_t *_jit, int32_t dst, int32_t loc)
+{
+#if __X64
+ ldr_l(_jit, dst, loc);
+#else
+ ldr_i(_jit, dst, loc);
+#endif
+}
+
+static void
+str_atomic(jit_state_t *_jit, int32_t loc, int32_t val)
+{
+#if __X64
+ str_l(_jit, loc, val);
+#else
+ str_i(_jit, loc, val);
+#endif
+ mfence(_jit);
+}
+
+static void
+swap_atomic(jit_state_t *_jit, int32_t dst, int32_t loc, int32_t val)
+{
+ if (dst == val) {
+ xchgrm(_jit, dst, loc);
+ } else {
+ int32_t tmp = jit_gpr_regno(get_temp_gpr(_jit));
+ movr(_jit, tmp, val);
+ xchgrm(_jit, tmp, loc);
+ movr(_jit, dst, tmp);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+cas_atomic(jit_state_t *_jit, int32_t dst, int32_t loc, int32_t expected,
+ int32_t desired)
+{
+ ASSERT(loc != expected);
+ ASSERT(loc != desired);
+
+ if (dst == jit_gpr_regno(_RAX)) {
+ if (loc == dst) {
+ int32_t tmp = jit_gpr_regno(get_temp_gpr(_jit));
+ movr(_jit, tmp ,loc);
+ movr(_jit, dst, expected);
+ cmpxchgmr(_jit, tmp, desired);
+ unget_temp_gpr(_jit);
+ } else {
+ movr(_jit, dst, expected);
+ cmpxchgmr(_jit, loc, desired);
+ }
+ } else if (loc == jit_gpr_regno(_RAX)) {
+ int32_t tmp = jit_gpr_regno(get_temp_gpr(_jit));
+ movr(_jit, tmp, loc);
+ movr(_jit, jit_gpr_regno(_RAX), expected);
+ cmpxchgmr(_jit, tmp, desired);
+ movr(_jit, dst, jit_gpr_regno(_RAX));
+ movr(_jit, loc, tmp);
+ unget_temp_gpr(_jit);
+ } else if (expected == jit_gpr_regno(_RAX)) {
+ int32_t tmp = jit_gpr_regno(get_temp_gpr(_jit));
+ movr(_jit, tmp, expected);
+ cmpxchgmr(_jit, loc, desired);
+ movr(_jit, dst, jit_gpr_regno(_RAX));
+ movr(_jit, expected, tmp);
+ unget_temp_gpr(_jit);
+ } else if (desired == jit_gpr_regno(_RAX)) {
+ int32_t tmp = jit_gpr_regno(get_temp_gpr(_jit));
+ movr(_jit, tmp, desired);
+ movr(_jit, jit_gpr_regno(_RAX), expected);
+ cmpxchgmr(_jit, loc, tmp);
+ movr(_jit, dst, jit_gpr_regno(_RAX));
+ movr(_jit, desired, tmp);
+ unget_temp_gpr(_jit);
+ } else {
+ int32_t tmp = jit_gpr_regno(get_temp_gpr(_jit));
+ movr(_jit, tmp, jit_gpr_regno(_RAX));
+ movr(_jit, jit_gpr_regno(_RAX), expected);
+ cmpxchgmr(_jit, loc, desired);
+ movr(_jit, dst, jit_gpr_regno(_RAX));
+ movr(_jit, jit_gpr_regno(_RAX), tmp);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+breakpoint(jit_state_t *_jit)
+{
+ ic(_jit, 0xcc);
+}
diff --git a/deps/lightening/lightening/x86-sse.c b/deps/lightening/lightening/x86-sse.c
new file mode 100644
index 0000000..ab66dc7
--- /dev/null
+++ b/deps/lightening/lightening/x86-sse.c
@@ -0,0 +1,1016 @@
+/*
+ * Copyright (C) 2012-2017, 2019 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+#define _XMM0_REGNO 0
+#define _XMM1_REGNO 1
+#define _XMM2_REGNO 2
+#define _XMM3_REGNO 3
+#define _XMM4_REGNO 4
+#define _XMM5_REGNO 5
+#define _XMM6_REGNO 6
+#define _XMM7_REGNO 7
+#define _XMM8_REGNO 8
+#define _XMM9_REGNO 9
+#define _XMM10_REGNO 10
+#define _XMM11_REGNO 11
+#define _XMM12_REGNO 12
+#define _XMM13_REGNO 13
+#define _XMM14_REGNO 14
+#define _XMM15_REGNO 15
+#define X86_SSE_MOV 0x10
+#define X86_SSE_MOV1 0x11
+#define X86_SSE_MOVLP 0x12
+#define X86_SSE_MOVHP 0x16
+#define X86_SSE_MOVA 0x28
+#define X86_SSE_CVTIS 0x2a
+#define X86_SSE_CVTTSI 0x2c
+#define X86_SSE_CVTSI 0x2d
+#define X86_SSE_UCOMI 0x2e
+#define X86_SSE_COMI 0x2f
+#define X86_SSE_ROUND 0x3a
+#define X86_SSE_SQRT 0x51
+#define X86_SSE_RSQRT 0x52
+#define X86_SSE_RCP 0x53
+#define X86_SSE_AND 0x54
+#define X86_SSE_ANDN 0x55
+#define X86_SSE_OR 0x56
+#define X86_SSE_XOR 0x57
+#define X86_SSE_ADD 0x58
+#define X86_SSE_MUL 0x59
+#define X86_SSE_CVTSD 0x5a
+#define X86_SSE_CVTDT 0x5b
+#define X86_SSE_SUB 0x5c
+#define X86_SSE_MIN 0x5d
+#define X86_SSE_DIV 0x5e
+#define X86_SSE_MAX 0x5f
+#define X86_SSE_X2G 0x6e
+#define X86_SSE_EQB 0x74
+#define X86_SSE_EQW 0x75
+#define X86_SSE_EQD 0x76
+#define X86_SSE_G2X 0x7e
+#define X86_SSE_MOV2 0xd6
+
+static void
+sser(jit_state_t *_jit, int32_t c, int32_t r0, int32_t r1)
+{
+ rex(_jit, 0, 0, r0, 0, r1);
+ ic(_jit, 0x0f);
+ ic(_jit, c);
+ mrm(_jit, 0x03, r7(r0), r7(r1));
+}
+
+static void
+ssexr(jit_state_t *_jit, int32_t p, int32_t c,
+ int32_t r0, int32_t r1)
+{
+ ic(_jit, p);
+ rex(_jit, 0, 0, r0, 0, r1);
+ ic(_jit, 0x0f);
+ ic(_jit, c);
+ mrm(_jit, 0x03, r7(r0), r7(r1));
+}
+
+static void
+ssexi(jit_state_t *_jit, int32_t c, int32_t r0,
+ int32_t m, int32_t i)
+{
+ ic(_jit, 0x66);
+ rex(_jit, 0, 0, 0, 0, r0);
+ ic(_jit, 0x0f);
+ ic(_jit, c);
+ mrm(_jit, 0x03, r7(m), r7(r0));
+ ic(_jit, i);
+}
+
+static void
+sselxr(jit_state_t *_jit, int32_t p, int32_t c, int32_t r0, int32_t r1)
+{
+ if (__X64) {
+ ic(_jit, p);
+ rex(_jit, 0, 1, r0, 0, r1);
+ ic(_jit, 0x0f);
+ ic(_jit, c);
+ mrm(_jit, 0x03, r7(r0), r7(r1));
+ } else {
+ ssexr(_jit, p, c, r0, r1);
+ }
+}
+
+static void
+ssexrx(jit_state_t *_jit, int32_t px, int32_t code, int32_t md,
+ int32_t rb, int32_t ri, int32_t ms, int32_t rd)
+{
+ ic(_jit, px);
+ rex(_jit, 0, 0, rd, ri, rb);
+ ic(_jit, 0x0f);
+ ic(_jit, code);
+ rx(_jit, rd, md, rb, ri, ms);
+}
+
+static void
+movdlxr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0x66, X86_SSE_X2G, r0, r1);
+}
+
+static void movdqxr(jit_state_t *_jit, int32_t r0, int32_t r1) maybe_unused;
+static void
+movdqxr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ sselxr(_jit, 0x66, X86_SSE_X2G, r0, r1);
+}
+
+static void
+movssmr(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms, int32_t rd)
+{
+ ssexrx(_jit, 0xf3, X86_SSE_MOV, md, rb, ri, ms, rd);
+}
+static void
+movsdmr(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms, int32_t rd)
+{
+ ssexrx(_jit, 0xf2, X86_SSE_MOV, md, rb, ri, ms, rd);
+}
+static void
+movssrm(jit_state_t *_jit, int32_t rs, int32_t md, int32_t mb, int32_t mi, int32_t ms)
+{
+ ssexrx(_jit, 0xf3, X86_SSE_MOV1, md, mb, mi, ms, rs);
+}
+static void
+movsdrm(jit_state_t *_jit, int32_t rs, int32_t md, int32_t mb, int32_t mi, int32_t ms)
+{
+ ssexrx(_jit, 0xf2, X86_SSE_MOV1, md, mb, mi, ms, rs);
+}
+
+static void
+movr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (r0 != r1)
+ ssexr(_jit, 0xf3, X86_SSE_MOV, r0, r1);
+}
+
+static void
+movr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (r0 != r1)
+ ssexr(_jit, 0xf2, X86_SSE_MOV, r0, r1);
+}
+
+static void
+addssr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0xf3, X86_SSE_ADD, r0, r1);
+}
+static void
+addsdr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0xf2, X86_SSE_ADD, r0, r1);
+}
+static void
+subssr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0xf3, X86_SSE_SUB, r0, r1);
+}
+static void
+subsdr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0xf2, X86_SSE_SUB, r0, r1);
+}
+static void
+mulssr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0xf3, X86_SSE_MUL, r0, r1);
+}
+static void
+mulsdr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0xf2, X86_SSE_MUL, r0, r1);
+}
+static void
+divssr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0xf3, X86_SSE_DIV, r0, r1);
+}
+static void
+divsdr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0xf2, X86_SSE_DIV, r0, r1);
+}
+static void
+andpsr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ sser(_jit, X86_SSE_AND, r0, r1);
+}
+static void
+andpdr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0x66, X86_SSE_AND, r0, r1);
+}
+static void
+truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0xf3, X86_SSE_CVTTSI, r0, r1);
+}
+static void
+truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0xf2, X86_SSE_CVTTSI, r0, r1);
+}
+#if __X64
+static void
+truncr_f_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ sselxr(_jit, 0xf3, X86_SSE_CVTTSI, r0, r1);
+}
+static void
+truncr_d_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ sselxr(_jit, 0xf2, X86_SSE_CVTTSI, r0, r1);
+}
+#endif
+static void
+extr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ sselxr(_jit, 0xf3, X86_SSE_CVTIS, r0, r1);
+}
+static void
+extr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ sselxr(_jit, 0xf2, X86_SSE_CVTIS, r0, r1);
+}
+
+static void
+extr_f_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0xf3, X86_SSE_CVTSD, r0, r1);
+}
+static void
+extr_d_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0xf2, X86_SSE_CVTSD, r0, r1);
+}
+static void
+ucomissr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ sser(_jit, X86_SSE_UCOMI, r0, r1);
+}
+static void
+ucomisdr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0x66, X86_SSE_UCOMI, r0, r1);
+}
+static void
+xorpsr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ sser(_jit, X86_SSE_XOR, r0, r1);
+}
+static void
+xorpdr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0x66, X86_SSE_XOR, r0, r1);
+}
+static void orpdr(jit_state_t *_jit, int32_t r0, int32_t r1) maybe_unused;
+static void
+orpdr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0x66, X86_SSE_OR, r0, r1);
+}
+static void
+pcmpeqlr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0x66, X86_SSE_EQD, r0, r1);
+}
+static void
+psrl(jit_state_t *_jit, int32_t r0, int32_t i0)
+{
+ ssexi(_jit, 0x72, r0, 0x02, i0);
+}
+static void
+psrq(jit_state_t *_jit, int32_t r0, int32_t i0)
+{
+ ssexi(_jit, 0x73, r0, 0x02, i0);
+}
+static void
+pslq(jit_state_t *_jit, int32_t r0, int32_t i0)
+{
+ ssexi(_jit, 0x73, r0, 0x06, i0);
+}
+static void
+sqrtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0xf3, X86_SSE_SQRT, r0, r1);
+}
+static void
+sqrtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ssexr(_jit, 0xf2, X86_SSE_SQRT, r0, r1);
+}
+static void
+ldr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ movssmr(_jit, 0, r1, _NOREG, _SCL1, r0);
+}
+static void
+str_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ movssrm(_jit, r1, 0, r0, _NOREG, _SCL1);
+}
+static void
+ldr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ movsdmr(_jit, 0, r1, _NOREG, _SCL1, r0);
+}
+static void
+str_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ movsdrm(_jit, r1, 0, r0, _NOREG, _SCL1);
+}
+
+static void
+movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t i0)
+{
+ union {
+ int32_t i;
+ jit_float32_t f;
+ } data;
+
+ data.f = i0;
+ if (data.f == 0.0 && !(data.i & 0x80000000))
+ xorpsr(_jit, r0, r0);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), data.i);
+ movdlxr(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t i0)
+{
+ union {
+ int32_t ii[2];
+ jit_word_t w;
+ jit_float64_t d;
+ } data;
+
+ data.d = i0;
+ if (data.d == 0.0 && !(data.ii[1] & 0x80000000))
+ xorpdr(_jit, r0, r0);
+ else {
+ jit_gpr_t ireg = get_temp_gpr(_jit);
+#if __X64
+ movi(_jit, jit_gpr_regno(ireg), data.w);
+ movdqxr(_jit, r0, jit_gpr_regno(ireg));
+ unget_temp_gpr(_jit);
+#else
+ jit_fpr_t freg = get_temp_fpr(_jit);
+ movi(_jit, jit_gpr_regno(ireg), data.ii[1]);
+ movdlxr(_jit, jit_fpr_regno(freg), jit_gpr_regno(ireg));
+ pslq(_jit, jit_fpr_regno(freg), 32);
+ movi(_jit, jit_gpr_regno(ireg), data.ii[0]);
+ movdlxr(_jit, r0, jit_gpr_regno(ireg));
+ orpdr(_jit, r0, jit_fpr_regno(freg));
+ unget_temp_fpr(_jit);
+ unget_temp_gpr(_jit);
+#endif
+ }
+}
+
+#if __X32
+static void
+x87rx(jit_state_t *_jit, int32_t code, int32_t md,
+ int32_t rb, int32_t ri, int32_t ms)
+{
+ rex(_jit, 0, 1, rb, ri, _NOREG);
+ ic(_jit, 0xd8 | (code >> 3));
+ rx(_jit, (code & 7), md, rb, ri, ms);
+}
+
+static void
+fldsm(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms)
+{
+ return x87rx(_jit, 010, md, rb, ri, ms);
+}
+
+static void
+fstsm(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms)
+{
+ return x87rx(_jit, 013, md, rb, ri, ms);
+}
+
+static void
+fldlm(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms)
+{
+ return x87rx(_jit, 050, md, rb, ri, ms);
+}
+
+static void
+fstlm(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms)
+{
+ return x87rx(_jit, 053, md, rb, ri, ms);
+}
+#endif
+
+static void
+retval_f(jit_state_t *_jit, int32_t r0)
+{
+#if __X32
+ subi(_jit, _RSP_REGNO, _RSP_REGNO, 4);
+ fstsm(_jit, 0, _RSP_REGNO, _NOREG, _SCL1);
+ ldr_f(_jit, r0, _RSP_REGNO);
+ addi(_jit, _RSP_REGNO, _RSP_REGNO, 4);
+#else
+ movr_f(_jit, r0, _XMM0_REGNO);
+#endif
+}
+
+static void
+retval_d(jit_state_t *_jit, int32_t r0)
+{
+#if __X32
+ subi(_jit, _RSP_REGNO, _RSP_REGNO, 8);
+ fstlm(_jit, 0, _RSP_REGNO, _NOREG, _SCL1);
+ ldr_d(_jit, r0, _RSP_REGNO);
+ addi(_jit, _RSP_REGNO, _RSP_REGNO, 8);
+#else
+ movr_d(_jit, r0, _XMM0_REGNO);
+#endif
+}
+
+static void
+retr_f(jit_state_t *_jit, int32_t u)
+{
+#if __X32
+ subi(_jit, _RSP_REGNO, _RSP_REGNO, 4);
+ str_f(_jit, _RSP_REGNO, u);
+ fldsm(_jit, 0, _RSP_REGNO, _NOREG, _SCL1);
+ addi(_jit, _RSP_REGNO, _RSP_REGNO, 4);
+#else
+ movr_f(_jit, _XMM0_REGNO, u);
+#endif
+ ret(_jit);
+}
+
+static void
+retr_d(jit_state_t *_jit, int32_t u)
+{
+#if __X32
+ subi(_jit, _RSP_REGNO, _RSP_REGNO, 8);
+ str_d(_jit, _RSP_REGNO, u);
+ fldlm(_jit, 0, _RSP_REGNO, _NOREG, _SCL1);
+ addi(_jit, _RSP_REGNO, _RSP_REGNO, 8);
+#else
+ movr_d(_jit, _XMM0_REGNO, u);
+#endif
+ ret(_jit);
+}
+
+static void
+addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r1)
+ addssr(_jit, r0, r2);
+ else if (r0 == r2)
+ addssr(_jit, r0, r1);
+ else {
+ movr_f(_jit, r0, r1);
+ addssr(_jit, r0, r2);
+ }
+}
+
+static void
+addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r1)
+ addsdr(_jit, r0, r2);
+ else if (r0 == r2)
+ addsdr(_jit, r0, r1);
+ else {
+ movr_d(_jit, r0, r1);
+ addsdr(_jit, r0, r2);
+ }
+}
+
+static void
+subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r1)
+ subssr(_jit, r0, r2);
+ else if (r0 == r2) {
+ jit_fpr_t reg = get_temp_fpr(_jit);
+ movr_f(_jit, jit_fpr_regno(reg), r0);
+ movr_f(_jit, r0, r1);
+ subssr(_jit, r0, jit_fpr_regno(reg));
+ unget_temp_fpr(_jit);
+ }
+ else {
+ movr_f(_jit, r0, r1);
+ subssr(_jit, r0, r2);
+ }
+}
+
+static void
+subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r1)
+ subsdr(_jit, r0, r2);
+ else if (r0 == r2) {
+ jit_fpr_t reg = get_temp_fpr(_jit);
+ movr_d(_jit, jit_fpr_regno(reg), r0);
+ movr_d(_jit, r0, r1);
+ subsdr(_jit, r0, jit_fpr_regno(reg));
+ unget_temp_fpr(_jit);
+ }
+ else {
+ movr_d(_jit, r0, r1);
+ subsdr(_jit, r0, r2);
+ }
+}
+
+static void
+mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r1)
+ mulssr(_jit, r0, r2);
+ else if (r0 == r2)
+ mulssr(_jit, r0, r1);
+ else {
+ movr_f(_jit, r0, r1);
+ mulssr(_jit, r0, r2);
+ }
+}
+
+static void
+mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r1)
+ mulsdr(_jit, r0, r2);
+ else if (r0 == r2)
+ mulsdr(_jit, r0, r1);
+ else {
+ movr_d(_jit, r0, r1);
+ mulsdr(_jit, r0, r2);
+ }
+}
+
+static void
+divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r1)
+ divssr(_jit, r0, r2);
+ else if (r0 == r2) {
+ jit_fpr_t reg = get_temp_fpr(_jit);
+ movr_f(_jit, jit_fpr_regno(reg), r0);
+ movr_f(_jit, r0, r1);
+ divssr(_jit, r0, jit_fpr_regno(reg));
+ unget_temp_fpr(_jit);
+ }
+ else {
+ movr_f(_jit, r0, r1);
+ divssr(_jit, r0, r2);
+ }
+}
+
+static void
+divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ if (r0 == r1)
+ divsdr(_jit, r0, r2);
+ else if (r0 == r2) {
+ jit_fpr_t reg = get_temp_fpr(_jit);
+ movr_d(_jit, jit_fpr_regno(reg), r0);
+ movr_d(_jit, r0, r1);
+ divsdr(_jit, r0, jit_fpr_regno(reg));
+ unget_temp_fpr(_jit);
+ }
+ else {
+ movr_d(_jit, r0, r1);
+ divsdr(_jit, r0, r2);
+ }
+}
+
+static void
+absr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (r0 == r1) {
+ jit_fpr_t reg = get_temp_fpr(_jit);
+ pcmpeqlr(_jit, jit_fpr_regno(reg), jit_fpr_regno(reg));
+ psrl(_jit, jit_fpr_regno(reg), 1);
+ andpsr(_jit, r0, jit_fpr_regno(reg));
+ unget_temp_fpr(_jit);
+ }
+ else {
+ pcmpeqlr(_jit, r0, r0);
+ psrl(_jit, r0, 1);
+ andpsr(_jit, r0, r1);
+ }
+}
+
+static void
+absr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ if (r0 == r1) {
+ jit_fpr_t reg = get_temp_fpr(_jit);
+ pcmpeqlr(_jit, jit_fpr_regno(reg), jit_fpr_regno(reg));
+ psrq(_jit, jit_fpr_regno(reg), 1);
+ andpdr(_jit, r0, jit_fpr_regno(reg));
+ unget_temp_fpr(_jit);
+ }
+ else {
+ pcmpeqlr(_jit, r0, r0);
+ psrq(_jit, r0, 1);
+ andpdr(_jit, r0, r1);
+ }
+}
+
+static void
+negr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t ireg = get_temp_gpr(_jit);
+ imovi(_jit, jit_gpr_regno(ireg), 0x80000000);
+ if (r0 == r1) {
+ jit_fpr_t freg = get_temp_fpr(_jit);
+ movdlxr(_jit, jit_fpr_regno(freg), jit_gpr_regno(ireg));
+ xorpsr(_jit, r0, jit_fpr_regno(freg));
+ unget_temp_fpr(_jit);
+ } else {
+ movdlxr(_jit, r0, jit_gpr_regno(ireg));
+ xorpsr(_jit, r0, r1);
+ }
+ unget_temp_gpr(_jit);
+}
+
+static void
+negr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ jit_gpr_t ireg = get_temp_gpr(_jit);
+ imovi(_jit, jit_gpr_regno(ireg), 0x80000000);
+ if (r0 == r1) {
+ jit_fpr_t freg = get_temp_fpr(_jit);
+ movdlxr(_jit, jit_fpr_regno(freg), jit_gpr_regno(ireg));
+ pslq(_jit, jit_fpr_regno(freg), 32);
+ xorpdr(_jit, r0, jit_fpr_regno(freg));
+ unget_temp_fpr(_jit);
+ } else {
+ movdlxr(_jit, r0, jit_gpr_regno(ireg));
+ pslq(_jit, r0, 32);
+ xorpdr(_jit, r0, r1);
+ }
+ unget_temp_gpr(_jit);
+}
+
+static void
+ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0))
+ movssmr(_jit, i0, _NOREG, _NOREG, _SCL1, r0);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ ldr_f(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ movssmr(_jit, 0, r1, r2, _SCL1, r0);
+}
+
+static void
+ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0))
+ movssmr(_jit, i0, r1, _NOREG, _SCL1, r0);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ ldxr_f(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ if (can_sign_extend_int_p(i0))
+ movssrm(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ str_f(_jit, jit_gpr_regno(reg), r0);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ movssrm(_jit, r2, 0, r0, r1, _SCL1);
+}
+
+static void
+stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (can_sign_extend_int_p(i0))
+ movssrm(_jit, r1, i0, r0, _NOREG, _SCL1);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ stxr_f(_jit, jit_gpr_regno(reg), r0, r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static jit_reloc_t
+bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomissr(_jit, r1, r0);
+ return ja(_jit);
+}
+
+static jit_reloc_t
+bler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomissr(_jit, r1, r0);
+ return jae(_jit);
+}
+
+static jit_reloc_t
+beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomissr(_jit, r0, r1);
+ jit_reloc_t pos = jps(_jit);
+ jit_reloc_t ret = je(_jit);
+ jit_patch_here(_jit, pos);
+ return ret;
+}
+
+static jit_reloc_t
+bger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomissr(_jit, r0, r1);
+ return jae(_jit);
+}
+
+static jit_reloc_t
+bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomissr(_jit, r0, r1);
+ return ja(_jit);
+}
+
+static jit_reloc_t
+bner_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomissr(_jit, r0, r1);
+ jit_reloc_t pos = jps(_jit);
+ jit_reloc_t zero = jzs(_jit);
+ jit_patch_here(_jit, pos);
+ jit_reloc_t ret = jmp(_jit);
+ jit_patch_here(_jit, zero);
+ return ret;
+}
+
+static jit_reloc_t
+bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomissr(_jit, r0, r1);
+ return jnae(_jit);
+}
+
+static jit_reloc_t
+bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomissr(_jit, r0, r1);
+ return jna(_jit);
+}
+
+static jit_reloc_t
+buneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomissr(_jit, r0, r1);
+ return je(_jit);
+}
+
+static jit_reloc_t
+bunger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomissr(_jit, r1, r0);
+ return jna(_jit);
+}
+
+static jit_reloc_t
+bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomissr(_jit, r1, r0);
+ return jnae(_jit);
+}
+
+static jit_reloc_t
+bltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomissr(_jit, r0, r1);
+ return jne(_jit);
+}
+
+static jit_reloc_t
+bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomissr(_jit, r0, r1);
+ return jnp(_jit);
+}
+
+static jit_reloc_t
+bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomissr(_jit, r0, r1);
+ return jp(_jit);
+}
+
+static void
+ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0))
+ movsdmr(_jit, i0, _NOREG, _NOREG, _SCL1, r0);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ ldr_d(_jit, r0, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ movsdmr(_jit, 0, r1, r2, _SCL1, r0);
+}
+
+static void
+ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+ if (can_sign_extend_int_p(i0))
+ movsdmr(_jit, i0, r1, _NOREG, _SCL1, r0);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ ldxr_d(_jit, r0, r1, jit_gpr_regno(reg));
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+ if (can_sign_extend_int_p(i0))
+ movsdrm(_jit, r0, i0, _NOREG, _NOREG, _SCL1);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ str_d(_jit, jit_gpr_regno(reg), r0);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static void
+stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+ movsdrm(_jit, r2, 0, r0, r1, _SCL1);
+}
+
+static void
+stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+ if (can_sign_extend_int_p(i0))
+ movsdrm(_jit, r1, i0, r0, _NOREG, _SCL1);
+ else {
+ jit_gpr_t reg = get_temp_gpr(_jit);
+ movi(_jit, jit_gpr_regno(reg), i0);
+ stxr_d(_jit, jit_gpr_regno(reg), r0, r1);
+ unget_temp_gpr(_jit);
+ }
+}
+
+static jit_reloc_t
+bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomisdr(_jit, r1, r0);
+ return ja(_jit);
+}
+
+static jit_reloc_t
+bler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomisdr(_jit, r1, r0);
+ return jae(_jit);
+}
+
+static jit_reloc_t
+beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomisdr(_jit, r0, r1);
+ jit_reloc_t pos = jps(_jit);
+ jit_reloc_t ret = je(_jit);
+ jit_patch_here(_jit, pos);
+ return ret;
+}
+
+static jit_reloc_t
+bger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomisdr(_jit, r0, r1);
+ return jae(_jit);
+}
+
+static jit_reloc_t
+bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomisdr(_jit, r0, r1);
+ return ja(_jit);
+}
+
+static jit_reloc_t
+bner_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomisdr(_jit, r0, r1);
+ jit_reloc_t pos = jps(_jit);
+ jit_reloc_t zero = jzs(_jit);
+ jit_patch_here(_jit, pos);
+ jit_reloc_t ret = jmp(_jit);
+ jit_patch_here(_jit, zero);
+ return ret;
+}
+
+static jit_reloc_t
+bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomisdr(_jit, r0, r1);
+ return jnae(_jit);
+}
+
+static jit_reloc_t
+bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomisdr(_jit, r0, r1);
+ return jna(_jit);
+}
+
+static jit_reloc_t
+buneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomisdr(_jit, r0, r1);
+ return je(_jit);
+}
+
+static jit_reloc_t
+bunger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomisdr(_jit, r1, r0);
+ return jna(_jit);
+}
+
+static jit_reloc_t
+bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomisdr(_jit, r1, r0);
+ return jnae(_jit);
+}
+
+static jit_reloc_t
+bltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomisdr(_jit, r0, r1);
+ return jne(_jit);
+}
+
+static jit_reloc_t
+bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomisdr(_jit, r0, r1);
+ return jnp(_jit);
+}
+
+static jit_reloc_t
+bunordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+ ucomisdr(_jit, r0, r1);
+ return jp(_jit);
+}
diff --git a/deps/lightening/lightening/x86.c b/deps/lightening/lightening/x86.c
new file mode 100644
index 0000000..f8ac4b0
--- /dev/null
+++ b/deps/lightening/lightening/x86.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright (C) 2012-2020 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+#define _NOREG 0xffff
+
+typedef struct {
+ /* x87 present */
+ uint32_t fpu : 1;
+ /* cmpxchg8b instruction */
+ uint32_t cmpxchg8b : 1;
+ /* cmov and fcmov branchless conditional mov */
+ uint32_t cmov : 1;
+ /* mmx registers/instructions available */
+ uint32_t mmx : 1;
+ /* sse registers/instructions available */
+ uint32_t sse : 1;
+ /* sse2 registers/instructions available */
+ uint32_t sse2 : 1;
+ /* sse3 instructions available */
+ uint32_t sse3 : 1;
+ /* pcmulqdq instruction */
+ uint32_t pclmulqdq : 1;
+ /* ssse3 suplemental sse3 instructions available */
+ uint32_t ssse3 : 1;
+ /* fused multiply/add using ymm state */
+ uint32_t fma : 1;
+ /* cmpxchg16b instruction */
+ uint32_t cmpxchg16b : 1;
+ /* sse4.1 instructions available */
+ uint32_t sse4_1 : 1;
+ /* sse4.2 instructions available */
+ uint32_t sse4_2 : 1;
+ /* movbe instruction available */
+ uint32_t movbe : 1;
+ /* popcnt instruction available */
+ uint32_t popcnt : 1;
+ /* aes instructions available */
+ uint32_t aes : 1;
+ /* avx instructions available */
+ uint32_t avx : 1;
+ /* lahf/sahf available in 64 bits mode */
+ uint32_t lahf : 1;
+} jit_cpu_t;
+
+static jit_cpu_t jit_cpu;
+
+static inline jit_reloc_t
+emit_rel8_reloc (jit_state_t *_jit, uint8_t inst_start)
+{
+ uint8_t *loc = _jit->pc.uc;
+ emit_u8 (_jit, 0);
+ return jit_reloc(_jit, JIT_RELOC_REL8, inst_start, loc, _jit->pc.uc, 0);
+}
+
+static inline jit_reloc_t
+emit_rel32_reloc (jit_state_t *_jit, uint8_t inst_start)
+{
+ uint8_t *loc = _jit->pc.uc;
+ emit_u32 (_jit, 0);
+ return jit_reloc(_jit, JIT_RELOC_REL32, inst_start, loc, _jit->pc.uc, 0);
+}
+
+#include "x86-cpu.c"
+#include "x86-sse.c"
+
+jit_bool_t
+jit_get_cpu(void)
+{
+ union {
+ struct {
+ uint32_t sse3 : 1;
+ uint32_t pclmulqdq : 1;
+ uint32_t dtes64 : 1; /* amd reserved */
+ uint32_t monitor : 1;
+ uint32_t ds_cpl : 1; /* amd reserved */
+ uint32_t vmx : 1; /* amd reserved */
+ uint32_t smx : 1; /* amd reserved */
+ uint32_t est : 1; /* amd reserved */
+ uint32_t tm2 : 1; /* amd reserved */
+ uint32_t ssse3 : 1;
+ uint32_t cntx_id : 1; /* amd reserved */
+ uint32_t __reserved0 : 1;
+ uint32_t fma : 1;
+ uint32_t cmpxchg16b : 1;
+ uint32_t xtpr : 1; /* amd reserved */
+ uint32_t pdcm : 1; /* amd reserved */
+ uint32_t __reserved1 : 1;
+ uint32_t pcid : 1; /* amd reserved */
+ uint32_t dca : 1; /* amd reserved */
+ uint32_t sse4_1 : 1;
+ uint32_t sse4_2 : 1;
+ uint32_t x2apic : 1; /* amd reserved */
+ uint32_t movbe : 1; /* amd reserved */
+ uint32_t popcnt : 1;
+ uint32_t tsc : 1; /* amd reserved */
+ uint32_t aes : 1;
+ uint32_t xsave : 1;
+ uint32_t osxsave : 1;
+ uint32_t avx : 1;
+ uint32_t __reserved2 : 1; /* amd F16C */
+ uint32_t __reserved3 : 1;
+ uint32_t __alwayszero : 1; /* amd RAZ */
+ } bits;
+ jit_uword_t cpuid;
+ } ecx;
+ union {
+ struct {
+ uint32_t fpu : 1;
+ uint32_t vme : 1;
+ uint32_t de : 1;
+ uint32_t pse : 1;
+ uint32_t tsc : 1;
+ uint32_t msr : 1;
+ uint32_t pae : 1;
+ uint32_t mce : 1;
+ uint32_t cmpxchg8b : 1;
+ uint32_t apic : 1;
+ uint32_t __reserved0 : 1;
+ uint32_t sep : 1;
+ uint32_t mtrr : 1;
+ uint32_t pge : 1;
+ uint32_t mca : 1;
+ uint32_t cmov : 1;
+ uint32_t pat : 1;
+ uint32_t pse36 : 1;
+ uint32_t psn : 1; /* amd reserved */
+ uint32_t clfsh : 1;
+ uint32_t __reserved1 : 1;
+ uint32_t ds : 1; /* amd reserved */
+ uint32_t acpi : 1; /* amd reserved */
+ uint32_t mmx : 1;
+ uint32_t fxsr : 1;
+ uint32_t sse : 1;
+ uint32_t sse2 : 1;
+ uint32_t ss : 1; /* amd reserved */
+ uint32_t htt : 1;
+ uint32_t tm : 1; /* amd reserved */
+ uint32_t __reserved2 : 1;
+ uint32_t pbe : 1; /* amd reserved */
+ } bits;
+ jit_uword_t cpuid;
+ } edx;
+#if __X32
+ int ac, flags;
+#endif
+ jit_uword_t eax, ebx;
+
+#if __X32
+ /* adapted from glibc __sysconf */
+ __asm__ volatile ("pushfl;\n\t"
+ "popl %0;\n\t"
+ "movl $0x240000, %1;\n\t"
+ "xorl %0, %1;\n\t"
+ "pushl %1;\n\t"
+ "popfl;\n\t"
+ "pushfl;\n\t"
+ "popl %1;\n\t"
+ "xorl %0, %1;\n\t"
+ "pushl %0;\n\t"
+ "popfl"
+ : "=r" (flags), "=r" (ac));
+
+ /* i386 or i486 without cpuid */
+ if ((ac & (1 << 21)) == 0)
+ /* probably without x87 as well */
+ return 0;
+#endif
+
+ /* query %eax = 1 function */
+ __asm__ volatile (
+#if __X32 || __X64_32
+ "xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
+#else
+ "xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
+#endif
+ : "=a" (eax), "=r" (ebx),
+ "=c" (ecx.cpuid), "=d" (edx.cpuid)
+ : "0" (1));
+
+ jit_cpu.fpu = edx.bits.fpu;
+ jit_cpu.cmpxchg8b = edx.bits.cmpxchg8b;
+ jit_cpu.cmov = edx.bits.cmov;
+ jit_cpu.mmx = edx.bits.mmx;
+ jit_cpu.sse = edx.bits.sse;
+ jit_cpu.sse2 = edx.bits.sse2;
+ jit_cpu.sse3 = ecx.bits.sse3;
+ jit_cpu.pclmulqdq = ecx.bits.pclmulqdq;
+ jit_cpu.ssse3 = ecx.bits.ssse3;
+ jit_cpu.fma = ecx.bits.fma;
+ jit_cpu.cmpxchg16b = ecx.bits.cmpxchg16b;
+ jit_cpu.sse4_1 = ecx.bits.sse4_1;
+ jit_cpu.sse4_2 = ecx.bits.sse4_2;
+ jit_cpu.movbe = ecx.bits.movbe;
+ jit_cpu.popcnt = ecx.bits.popcnt;
+ jit_cpu.aes = ecx.bits.aes;
+ jit_cpu.avx = ecx.bits.avx;
+
+ /* query %eax = 0x80000001 function */
+#if __X64
+ __asm__ volatile (
+# if __X64_32
+ "xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
+# else
+ "xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
+# endif
+ : "=a" (eax), "=r" (ebx),
+ "=c" (ecx.cpuid), "=d" (edx.cpuid)
+ : "0" (0x80000001));
+ jit_cpu.lahf = ecx.cpuid & 1;
+#endif
+
+ return jit_cpu.sse2;
+}
+
+jit_bool_t
+jit_init(jit_state_t *_jit)
+{
+ return jit_cpu.sse2;
+}
+
+static const jit_gpr_t abi_gpr_args[] = {
+#if __X32
+ /* No GPRs in args. */
+#elif __CYGWIN__
+ _RCX, _RDX, _R8, _R9
+#else
+ _RDI, _RSI, _RDX, _RCX, _R8, _R9
+#endif
+};
+
+static const jit_fpr_t abi_fpr_args[] = {
+#if __X32
+ /* No FPRs in args. */
+#elif __CYGWIN__
+ _XMM0, _XMM1, _XMM2, _XMM3
+#else
+ _XMM0, _XMM1, _XMM2, _XMM3, _XMM4, _XMM5, _XMM6, _XMM7
+#endif
+};
+
+static const int abi_gpr_arg_count = sizeof(abi_gpr_args) / sizeof(abi_gpr_args[0]);
+static const int abi_fpr_arg_count = sizeof(abi_fpr_args) / sizeof(abi_fpr_args[0]);
+
+struct abi_arg_iterator
+{
+ const jit_operand_t *args;
+ size_t argc;
+
+ size_t arg_idx;
+ size_t gpr_idx;
+ size_t fpr_idx;
+ size_t stack_size;
+ size_t stack_padding;
+};
+
+static size_t
+jit_operand_abi_sizeof(enum jit_operand_abi abi)
+{
+ switch (abi) {
+ case JIT_OPERAND_ABI_UINT8:
+ case JIT_OPERAND_ABI_INT8:
+ return 1;
+ case JIT_OPERAND_ABI_UINT16:
+ case JIT_OPERAND_ABI_INT16:
+ return 2;
+ case JIT_OPERAND_ABI_UINT32:
+ case JIT_OPERAND_ABI_INT32:
+ return 4;
+ case JIT_OPERAND_ABI_UINT64:
+ case JIT_OPERAND_ABI_INT64:
+ return 8;
+ case JIT_OPERAND_ABI_POINTER:
+ return CHOOSE_32_64(4, 8);
+ case JIT_OPERAND_ABI_FLOAT:
+ return 4;
+ case JIT_OPERAND_ABI_DOUBLE:
+ return 8;
+ default:
+ abort();
+ }
+}
+
+static size_t
+round_size_up_to_words(size_t bytes)
+{
+ size_t word_size = CHOOSE_32_64(4, 8);
+ size_t words = (bytes + word_size - 1) / word_size;
+ return words * word_size;
+}
+
+static size_t
+jit_initial_frame_size (void)
+{
+ return __WORDSIZE / 8; // Saved return address is on stack.
+}
+
+static void
+reset_abi_arg_iterator(struct abi_arg_iterator *iter, size_t argc,
+ const jit_operand_t *args)
+{
+ memset(iter, 0, sizeof *iter);
+ iter->argc = argc;
+ iter->args = args;
+#if __CYGWIN__ && __X64
+ // Reserve slots on the stack for 4 register parameters (8 bytes each).
+ iter->stack_size = 32;
+#endif
+}
+
+static void
+next_abi_arg(struct abi_arg_iterator *iter, jit_operand_t *arg)
+{
+ ASSERT(iter->arg_idx < iter->argc);
+ enum jit_operand_abi abi = iter->args[iter->arg_idx].abi;
+ if (is_gpr_arg(abi) && iter->gpr_idx < abi_gpr_arg_count) {
+ *arg = jit_operand_gpr (abi, abi_gpr_args[iter->gpr_idx++]);
+#ifdef __CYGWIN__
+ iter->fpr_idx++;
+#endif
+ } else if (is_fpr_arg(abi) && iter->fpr_idx < abi_fpr_arg_count) {
+ *arg = jit_operand_fpr (abi, abi_fpr_args[iter->fpr_idx++]);
+#ifdef __CYGWIN__
+ iter->gpr_idx++;
+#endif
+ } else {
+ *arg = jit_operand_mem (abi, JIT_SP, iter->stack_size);
+ size_t bytes = jit_operand_abi_sizeof (abi);
+ iter->stack_size += round_size_up_to_words (bytes);
+ }
+ iter->arg_idx++;
+}
+
+static void
+jit_flush(void *fptr, void *tptr)
+{
+}
+
+static inline size_t
+jit_stack_alignment(void)
+{
+ return 16;
+}
+
+static void
+jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc, jit_pointer_t addr)
+{
+ uint8_t *loc = _jit->start + reloc.offset;
+ uint8_t *start = loc - reloc.inst_start_offset;
+ uint8_t *end = _jit->pc.uc;
+ jit_imm_t i0 = (jit_imm_t)addr;
+
+ if (loc == start)
+ return;
+
+ if (start < (uint8_t*)addr && (uint8_t*)addr <= end)
+ return;
+
+ switch (reloc.kind)
+ {
+ case JIT_RELOC_ABSOLUTE: {
+ _jit->pc.uc = start;
+ ASSERT((loc[-1] & ~7) == 0xb8); // MOVI
+ int32_t r0 = loc[-1] & 7;
+ if (start != loc - 1) {
+ ASSERT(start == loc - 2);
+ r0 |= (loc[-2] & 1) << 3;
+ }
+ return movi(_jit, r0, i0);
+ }
+ case JIT_RELOC_REL8:
+ ASSERT((loc[-1] & ~0xf) == 0x70 || loc[-1] == 0xeb); // JCCSI or JMPSI
+ /* Nothing useful to do. */
+ return;
+ case JIT_RELOC_REL32:
+ _jit->pc.uc = start;
+ if (start[0] == 0xe9) { // JMP
+ return jmpi(_jit, i0);
+ }
+ ASSERT(start[0] == 0x0f); // JCC
+ return jcci(_jit, start[1] & ~0x80, i0);
+ default:
+ /* We don't emit other kinds of reloc. */
+ abort ();
+ }
+}
+
+static void*
+bless_function_pointer(void *ptr)
+{
+ return ptr;
+}
diff --git a/deps/lightening/lightening/x86.h b/deps/lightening/lightening/x86.h
new file mode 100644
index 0000000..4eaaf95
--- /dev/null
+++ b/deps/lightening/lightening/x86.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2012-2019 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_x86_h
+#define _jit_x86_h
+
+#if __WORDSIZE == 32
+# if defined(__x86_64__)
+# define __X64 1
+# define __X64_32 1
+# define __X32 0
+# else
+# define __X64 0
+# define __X64_32 0
+# define __X32 1
+# endif
+#else
+# define __X64 1
+# define __X64_32 0
+# define __X32 0
+#endif
+
+#define _RAX JIT_GPR(0)
+#define _RCX JIT_GPR(1)
+#define _RDX JIT_GPR(2)
+#define _RBX JIT_GPR(3)
+#define _RSP JIT_GPR(4)
+#define _RBP JIT_GPR(5)
+#define _RSI JIT_GPR(6)
+#define _RDI JIT_GPR(7)
+
+#define _XMM0 JIT_FPR(0)
+#define _XMM1 JIT_FPR(1)
+#define _XMM2 JIT_FPR(2)
+#define _XMM3 JIT_FPR(3)
+#define _XMM4 JIT_FPR(4)
+#define _XMM5 JIT_FPR(5)
+#define _XMM6 JIT_FPR(6)
+#define _XMM7 JIT_FPR(7)
+
+#if __X64
+# define _R8 JIT_GPR(8)
+# define _R9 JIT_GPR(9)
+# define _R10 JIT_GPR(10)
+# define _R11 JIT_GPR(11)
+# define _R12 JIT_GPR(12)
+# define _R13 JIT_GPR(13)
+# define _R14 JIT_GPR(14)
+# define _R15 JIT_GPR(15)
+# define _XMM8 JIT_FPR(8)
+# define _XMM9 JIT_FPR(9)
+# define _XMM10 JIT_FPR(10)
+# define _XMM11 JIT_FPR(11)
+# define _XMM12 JIT_FPR(12)
+# define _XMM13 JIT_FPR(13)
+# define _XMM14 JIT_FPR(14)
+# define _XMM15 JIT_FPR(15)
+#endif
+
+#define JIT_SP _RSP
+#define JIT_LR JIT_TMP0
+#if __X32
+# define JIT_R0 _RAX
+# define JIT_R1 _RCX
+# define JIT_R2 _RDX
+# define JIT_V0 _RBP
+# define JIT_V1 _RSI
+# define JIT_V2 _RDI
+# define JIT_TMP0 _RBX
+# define JIT_F0 _XMM0
+# define JIT_F1 _XMM1
+# define JIT_F2 _XMM2
+# define JIT_F3 _XMM3
+# define JIT_F4 _XMM4
+# define JIT_F5 _XMM5
+# define JIT_F6 _XMM6
+# define JIT_FTMP _XMM7
+# define JIT_PLATFORM_CALLEE_SAVE_GPRS JIT_TMP0
+#elif __CYGWIN__
+# define JIT_R0 _RAX
+# define JIT_R1 _RCX
+# define JIT_R2 _RDX
+# define JIT_R3 _R8
+# define JIT_R4 _R9
+# define JIT_R5 _R10
+# define JIT_TMP0 _R11
+# define JIT_V0 _RBX
+# define JIT_V1 _RSI
+# define JIT_V2 _RDI
+# define JIT_V3 _R12
+# define JIT_V4 _R13
+# define JIT_V5 _R14
+# define JIT_V6 _R15
+# define JIT_F0 _XMM0
+# define JIT_F1 _XMM1
+# define JIT_F2 _XMM2
+# define JIT_F3 _XMM3
+# define JIT_F4 _XMM4
+# define JIT_FTMP _XMM5
+# define JIT_VF0 _XMM6
+# define JIT_VF1 _XMM7
+# define JIT_VF2 _XMM8
+# define JIT_VF3 _XMM9
+# define JIT_VF4 _XMM10
+# define JIT_VF5 _XMM11
+# define JIT_VF6 _XMM12
+# define JIT_VF7 _XMM13
+# define JIT_VF8 _XMM14
+# define JIT_VF9 _XMM15
+# define JIT_PLATFORM_CALLEE_SAVE_GPRS /**/
+#else
+# define JIT_R0 _RAX
+# define JIT_R1 _RCX
+# define JIT_R2 _RDX
+# define JIT_R3 _RSI
+# define JIT_R4 _RDI
+# define JIT_R5 _R8
+# define JIT_R6 _R9
+# define JIT_R7 _R10
+# define JIT_TMP0 _R11
+# define JIT_V0 _RBX
+# define JIT_V1 _R12
+# define JIT_V2 _R13
+# define JIT_V3 _R14
+# define JIT_V4 _R15
+# define JIT_F0 _XMM0
+# define JIT_F1 _XMM1
+# define JIT_F2 _XMM2
+# define JIT_F3 _XMM3
+# define JIT_F4 _XMM4
+# define JIT_F5 _XMM5
+# define JIT_F6 _XMM6
+# define JIT_F7 _XMM7
+# define JIT_F8 _XMM8
+# define JIT_F9 _XMM9
+# define JIT_F10 _XMM10
+# define JIT_F11 _XMM11
+# define JIT_F12 _XMM12
+# define JIT_F13 _XMM13
+# define JIT_F14 _XMM14
+# define JIT_FTMP _XMM15
+# define JIT_PLATFORM_CALLEE_SAVE_GPRS /**/
+#endif
+
+#define JIT_PLATFORM_CALLEE_SAVE_FPRS
+
+#endif /* _jit_x86_h */
diff --git a/deps/lightening/lightning.texi b/deps/lightening/lightning.texi
new file mode 100644
index 0000000..88f397a
--- /dev/null
+++ b/deps/lightening/lightning.texi
@@ -0,0 +1,1760 @@
+\input texinfo.tex @c -*- texinfo -*-
+@c %**start of header (This is for running Texinfo on a region.)
+
+@setfilename lightning.info
+
+@set TITLE Using @sc{gnu} @i{lightning}
+@set TOPIC installing and using
+
+@settitle @value{TITLE}
+
+@c ---------------------------------------------------------------------
+@c Common macros
+@c ---------------------------------------------------------------------
+
+@macro bulletize{a}
+@item
+\a\
+@end macro
+
+@macro rem{a}
+@r{@i{\a\}}
+@end macro
+
+@macro gnu{}
+@sc{gnu}
+@end macro
+
+@macro lightning{}
+@gnu{} @i{lightning}
+@end macro
+
+@c ---------------------------------------------------------------------
+@c Macros for Texinfo 3.1/4.0 compatibility
+@c ---------------------------------------------------------------------
+
+@c @hlink (macro), @url and @email are used instead of @uref for Texinfo 3.1
+@c compatibility
+@macro hlink{url, link}
+\link\ (\url\)
+@end macro
+
+@c ifhtml can only be true in Texinfo 4.0, which has uref
+@ifhtml
+@unmacro hlink
+
+@macro hlink{url, link}
+@uref{\url\, \link\}
+@end macro
+
+@macro email{mail}
+@uref{mailto:\mail\, , \mail\}
+@end macro
+
+@macro url{url}
+@uref{\url\}
+@end macro
+@end ifhtml
+
+@c ---------------------------------------------------------------------
+@c References to the other half of the manual
+@c ---------------------------------------------------------------------
+
+@macro usingref{node, name}
+@ref{\node\, , \name\}
+@end macro
+
+@c ---------------------------------------------------------------------
+@c End of macro section
+@c ---------------------------------------------------------------------
+
+@set UPDATED 18 June 2018
+@set UPDATED-MONTH June 2018
+@set EDITION 2.1.2
+@set VERSION 2.1.2
+
+@ifnottex
+@dircategory Software development
+@direntry
+* lightning: (lightning). Library for dynamic code generation.
+@end direntry
+@end ifnottex
+
+@ifnottex
+@node Top
+@top @lightning{}
+
+@iftex
+@macro comma
+@verbatim{|,|}
+@end macro
+@end iftex
+
+@ifnottex
+@macro comma
+@verb{|,|}
+@end macro
+@end ifnottex
+
+This document describes @value{TOPIC} the @lightning{} library for
+dynamic code generation.
+
+@menu
+* Overview:: What GNU lightning is
+* Installation:: Configuring and installing GNU lightning
+* The instruction set:: The RISC instruction set used in GNU lightning
+* GNU lightning examples:: GNU lightning's examples
+* Reentrancy:: Re-entrant usage of GNU lightning
+* Customizations:: Advanced code generation customizations
+* Acknowledgements:: Acknowledgements for GNU lightning
+@end menu
+@end ifnottex
+
+@node Overview
+@chapter Introduction to @lightning{}
+
+@iftex
+This document describes @value{TOPIC} the @lightning{} library for
+dynamic code generation.
+@end iftex
+
+Dynamic code generation is the generation of machine code
+at runtime. It is typically used to strip a layer of interpretation
+by allowing compilation to occur at runtime. One of the most
+well-known applications of dynamic code generation is perhaps that
+of interpreters that compile source code to an intermediate bytecode
+form, which is then recompiled to machine code at run-time: this
+approach effectively combines the portability of bytecode
+representations with the speed of machine code. Another common
+application of dynamic code generation is in the field of hardware
+simulators and binary emulators, which can use the same techniques
+to translate simulated instructions to the instructions of the
+underlying machine.
+
+Yet other applications come to mind: for example, windowing
+@dfn{bitblt} operations, matrix manipulations, and network packet
+filters. Albeit very powerful and relatively well known within the
+compiler community, dynamic code generation techniques are rarely
+exploited to their full potential and, with the exception of the
+two applications described above, have remained curiosities because
+of their portability and functionality barriers: binary instructions
+are generated, so programs using dynamic code generation must be
+retargeted for each machine; in addition, coding a run-time code
+generator is a tedious and error-prone task more than a difficult one.
+
+@lightning{} provides a portable, fast and easily retargetable dynamic
+code generation system.
+
+To be portable, @lightning{} abstracts over current architectures'
+quirks and unorthogonalities. The interface that it exposes to is that
+of a standardized RISC architecture loosely based on the SPARC and MIPS
+chips. There are a few general-purpose registers (six, not including
+those used to receive and pass parameters between subroutines), and
+arithmetic operations involve three operands---either three registers
+or two registers and an arbitrarily sized immediate value.
+
+On one hand, this architecture is general enough that it is possible to
+generate pretty efficient code even on CISC architectures such as the
+Intel x86 or the Motorola 68k families. On the other hand, it matches
+real architectures closely enough that, most of the time, the
+compiler's constant folding pass ends up generating code which
+assembles machine instructions without further tests.
+
+@node Installation
+@chapter Configuring and installing @lightning{}
+
+The first thing to do to use @lightning{} is to configure the
+program, picking the set of macros to be used on the host
+architecture; this configuration is automatically performed by
+the @file{configure} shell script; to run it, merely type:
+@example
+ ./configure
+@end example
+
+@lightning{} supports the @code{--enable-disassembler} option, that
+enables linking to GNU binutils and optionally print human readable
+disassembly of the jit code. This option can be disabled by the
+@code{--disable-disassembler} option.
+
+Another option that @file{configure} accepts is
+@code{--enable-assertions}, which enables several consistency checks in
+the run-time assemblers. These are not usually needed, so you can
+decide to simply forget about it; also remember that these consistency
+checks tend to slow down your code generator.
+
+After you've configured @lightning{}, run @file{make} as usual.
+
+@lightning{} has an extensive set of tests to validate it is working
+correctly in the build host. To test it run:
+@example
+ make check
+@end example
+
+The next important step is:
+@example
+ make install
+@end example
+
+This ends the process of installing @lightning{}.
+
+@node The instruction set
+@chapter @lightning{}'s instruction set
+
+@lightning{}'s instruction set was designed by deriving instructions
+that closely match those of most existing RISC architectures, or
+that can be easily syntesized if absent. Each instruction is composed
+of:
+@itemize @bullet
+@item
+an operation, like @code{sub} or @code{mul}
+
+@item
+most times, a register/immediate flag (@code{r} or @code{i})
+
+@item
+an unsigned modifier (@code{u}), a type identifier or two, when applicable.
+@end itemize
+
+Examples of legal mnemonics are @code{addr} (integer add, with three
+register operands) and @code{muli} (integer multiply, with two
+register operands and an immediate operand). Each instruction takes
+two or three operands; in most cases, one of them can be an immediate
+value instead of a register.
+
+Most @lightning{} integer operations are signed wordsize operations,
+with the exception of operations that convert types, or load or store
+values to/from memory. When applicable, the types and C types are as
+follow:
+
+@example
+ _c @r{signed char}
+ _uc @r{unsigned char}
+ _s @r{short}
+ _us @r{unsigned short}
+ _i @r{int}
+ _ui @r{unsigned int}
+ _l @r{long}
+ _f @r{float}
+ _d @r{double}
+@end example
+
+Most integer operations do not need a type modifier, and when loading or
+storing values to memory there is an alias to the proper operation
+using wordsize operands, that is, if ommited, the type is @r{int} on
+32-bit architectures and @r{long} on 64-bit architectures. Note
+that lightning also expects @code{sizeof(void*)} to match the wordsize.
+
+When an unsigned operation result differs from the equivalent signed
+operation, there is a the @code{_u} modifier.
+
+There are at least seven integer registers, of which six are
+general-purpose, while the last is used to contain the frame pointer
+(@code{FP}). The frame pointer can be used to allocate and access local
+variables on the stack, using the @code{allocai} or @code{allocar}
+instruction.
+
+Of the general-purpose registers, at least three are guaranteed to be
+preserved across function calls (@code{V0}, @code{V1} and
+@code{V2}) and at least three are not (@code{R0}, @code{R1} and
+@code{R2}). Six registers are not very much, but this
+restriction was forced by the need to target CISC architectures
+which, like the x86, are poor of registers; anyway, backends can
+specify the actual number of available registers with the calls
+@code{JIT_R_NUM} (for caller-save registers) and @code{JIT_V_NUM}
+(for callee-save registers).
+
+There are at least six floating-point registers, named @code{F0} to
+@code{F5}. These are usually caller-save and are separate from the integer
+registers on the supported architectures; on Intel architectures,
+in 32 bit mode if SSE2 is not available or use of X87 is forced,
+the register stack is mapped to a flat register file. As for the
+integer registers, the macro @code{JIT_F_NUM} yields the number of
+floating-point registers.
+
+The complete instruction set follows; as you can see, most non-memory
+operations only take integers (either signed or unsigned) as operands;
+this was done in order to reduce the instruction set, and because most
+architectures only provide word and long word operations on registers.
+There are instructions that allow operands to be extended to fit a larger
+data type, both in a signed and in an unsigned way.
+
+@table @b
+@item Binary ALU operations
+These accept three operands; the last one can be an immediate.
+@code{addx} operations must directly follow @code{addc}, and
+@code{subx} must follow @code{subc}; otherwise, results are undefined.
+Most, if not all, architectures do not support @r{float} or @r{double}
+immediate operands; lightning emulates those operations by moving the
+immediate to a temporary register and emiting the call with only
+register operands.
+@example
+addr _f _d O1 = O2 + O3
+addi _f _d O1 = O2 + O3
+addxr O1 = O2 + (O3 + carry)
+addxi O1 = O2 + (O3 + carry)
+addcr O1 = O2 + O3, set carry
+addci O1 = O2 + O3, set carry
+subr _f _d O1 = O2 - O3
+subi _f _d O1 = O2 - O3
+subxr O1 = O2 - (O3 + carry)
+subxi O1 = O2 - (O3 + carry)
+subcr O1 = O2 - O3, set carry
+subci O1 = O2 - O3, set carry
+rsbr _f _d O1 = O3 - O1
+rsbi _f _d O1 = O3 - O1
+mulr _f _d O1 = O2 * O3
+muli _f _d O1 = O2 * O3
+divr _u _f _d O1 = O2 / O3
+divi _u _f _d O1 = O2 / O3
+remr _u O1 = O2 % O3
+remi _u O1 = O2 % O3
+andr O1 = O2 & O3
+andi O1 = O2 & O3
+orr O1 = O2 | O3
+ori O1 = O2 | O3
+xorr O1 = O2 ^ O3
+xori O1 = O2 ^ O3
+lshr O1 = O2 << O3
+lshi O1 = O2 << O3
+rshr _u O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.}
+rshi _u O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.}
+@end example
+
+@item Four operand binary ALU operations
+These accept two result registers, and two operands; the last one can
+be an immediate. The first two arguments cannot be the same register.
+
+@code{qmul} stores the low word of the result in @code{O1} and the
+high word in @code{O2}. For unsigned multiplication, @code{O2} zero
+means there was no overflow. For signed multiplication, no overflow
+check is based on sign, and can be detected if @code{O2} is zero or
+minus one.
+
+@code{qdiv} stores the quotient in @code{O1} and the remainder in
+@code{O2}. It can be used as quick way to check if a division is
+exact, in which case the remainder is zero.
+
+@example
+qmulr _u O1 O2 = O3 * O4
+qmuli _u O1 O2 = O3 * O4
+qdivr _u O1 O2 = O3 / O4
+qdivi _u O1 O2 = O3 / O4
+@end example
+
+@item Unary ALU operations
+These accept two operands, both of which must be registers.
+@example
+negr _f _d O1 = -O2
+comr O1 = ~O2
+@end example
+
+These unary ALU operations are only defined for float operands.
+@example
+absr _f _d O1 = fabs(O2)
+sqrtr O1 = sqrt(O2)
+@end example
+
+Besides requiring the @code{r} modifier, there are no unary operations
+with an immediate operand.
+
+@item Compare instructions
+These accept three operands; again, the last can be an immediate.
+The last two operands are compared, and the first operand, that must be
+an integer register, is set to either 0 or 1, according to whether the
+given condition was met or not.
+
+The conditions given below are for the standard behavior of C,
+where the ``unordered'' comparison result is mapped to false.
+
+@example
+ltr _u _f _d O1 = (O2 < O3)
+lti _u _f _d O1 = (O2 < O3)
+ler _u _f _d O1 = (O2 <= O3)
+lei _u _f _d O1 = (O2 <= O3)
+gtr _u _f _d O1 = (O2 > O3)
+gti _u _f _d O1 = (O2 > O3)
+ger _u _f _d O1 = (O2 >= O3)
+gei _u _f _d O1 = (O2 >= O3)
+eqr _f _d O1 = (O2 == O3)
+eqi _f _d O1 = (O2 == O3)
+ner _f _d O1 = (O2 != O3)
+nei _f _d O1 = (O2 != O3)
+unltr _f _d O1 = !(O2 >= O3)
+unler _f _d O1 = !(O2 > O3)
+ungtr _f _d O1 = !(O2 <= O3)
+unger _f _d O1 = !(O2 < O3)
+uneqr _f _d O1 = !(O2 < O3) && !(O2 > O3)
+ltgtr _f _d O1 = !(O2 >= O3) || !(O2 <= O3)
+ordr _f _d O1 = (O2 == O2) && (O3 == O3)
+unordr _f _d O1 = (O2 != O2) || (O3 != O3)
+@end example
+
+@item Transfer operations
+These accept two operands; for @code{ext} both of them must be
+registers, while @code{mov} accepts an immediate value as the second
+operand.
+
+Unlike @code{movr} and @code{movi}, the other instructions are used
+to truncate a wordsize operand to a smaller integer data type or to
+convert float data types. You can also use @code{extr} to convert an
+integer to a floating point value: the usual options are @code{extr_f}
+and @code{extr_d}.
+
+@example
+movr _f _d O1 = O2
+movi _f _d O1 = O2
+extr _c _uc _s _us _i _ui _f _d O1 = O2
+truncr _f _d O1 = trunc(O2)
+@end example
+
+In 64-bit architectures it may be required to use @code{truncr_f_i},
+@code{truncr_f_l}, @code{truncr_d_i} and @code{truncr_d_l} to match
+the equivalent C code. Only the @code{_i} modifier is available in
+32-bit architectures.
+
+@example
+truncr_f_i = <int> O1 = <float> O2
+truncr_f_l = <long>O1 = <float> O2
+truncr_d_i = <int> O1 = <double>O2
+truncr_d_l = <long>O1 = <double>O2
+@end example
+
+The float conversion operations are @emph{destination first,
+source second}, but the order of the types is reversed. This happens
+for historical reasons.
+
+@example
+extr_f_d = <double>O1 = <float> O2
+extr_d_f = <float> O1 = <double>O2
+@end example
+
+@item Network extensions
+These accept two operands, both of which must be registers; these
+two instructions actually perform the same task, yet they are
+assigned to two mnemonics for the sake of convenience and
+completeness. As usual, the first operand is the destination and
+the second is the source.
+The @code{_ul} variant is only available in 64-bit architectures.
+@example
+htonr _us _ui _ul @r{Host-to-network (big endian) order}
+ntohr _us _ui _ul @r{Network-to-host order }
+@end example
+
+@item Load operations
+@code{ld} accepts two operands while @code{ldx} accepts three;
+in both cases, the last can be either a register or an immediate
+value. Values are extended (with or without sign, according to
+the data type specification) to fit a whole register.
+The @code{_ui} and @code{_l} types are only available in 64-bit
+architectures. For convenience, there is a version without a
+type modifier for integer or pointer operands that uses the
+appropriate wordsize call.
+@example
+ldr _c _uc _s _us _i _ui _l _f _d O1 = *O2
+ldi _c _uc _s _us _i _ui _l _f _d O1 = *O2
+ldxr _c _uc _s _us _i _ui _l _f _d O1 = *(O2+O3)
+ldxi _c _uc _s _us _i _ui _l _f _d O1 = *(O2+O3)
+@end example
+
+@item Store operations
+@code{st} accepts two operands while @code{stx} accepts three; in
+both cases, the first can be either a register or an immediate
+value. Values are sign-extended to fit a whole register.
+@example
+str _c _uc _s _us _i _ui _l _f _d *O1 = O2
+sti _c _uc _s _us _i _ui _l _f _d *O1 = O2
+stxr _c _uc _s _us _i _ui _l _f _d *(O1+O2) = O3
+stxi _c _uc _s _us _i _ui _l _f _d *(O1+O2) = O3
+@end example
+As for the load operations, the @code{_ui} and @code{_l} types are
+only available in 64-bit architectures, and for convenience, there
+is a version without a type modifier for integer or pointer operands
+that uses the appropriate wordsize call.
+
+@item Argument management
+These are:
+@example
+prepare (not specified)
+va_start (not specified)
+pushargr _f _d
+pushargi _f _d
+va_push (not specified)
+arg _c _uc _s _us _i _ui _l _f _d
+getarg _c _uc _s _us _i _ui _l _f _d
+va_arg _d
+putargr _f _d
+putargi _f _d
+ret (not specified)
+retr _f _d
+reti _f _d
+va_end (not specified)
+retval _c _uc _s _us _i _ui _l _f _d
+epilog (not specified)
+@end example
+As with other operations that use a type modifier, the @code{_ui} and
+@code{_l} types are only available in 64-bit architectures, but there
+are operations without a type modifier that alias to the appropriate
+integer operation with wordsize operands.
+
+@code{prepare}, @code{pusharg}, and @code{retval} are used by the caller,
+while @code{arg}, @code{getarg} and @code{ret} are used by the callee.
+A code snippet that wants to call another procedure and has to pass
+arguments must, in order: use the @code{prepare} instruction and use
+the @code{pushargr} or @code{pushargi} to push the arguments @strong{in
+left to right order}; and use @code{finish} or @code{call} (explained below)
+to perform the actual call.
+
+@code{va_start} returns a @code{C} compatible @code{va_list}. To fetch
+arguments, use @code{va_arg} for integers and @code{va_arg_d} for doubles.
+@code{va_push} is required when passing a @code{va_list} to another function,
+because not all architectures expect it as a single pointer. Known case
+is DEC Alpha, that requires it as a structure passed by value.
+
+@code{arg}, @code{getarg} and @code{putarg} are used by the callee.
+@code{arg} is different from other instruction in that it does not
+actually generate any code: instead, it is a function which returns
+a value to be passed to @code{getarg} or @code{putarg}. @footnote{``Return
+a value'' means that @lightning{} code that compile these
+instructions return a value when expanded.} You should call
+@code{arg} as soon as possible, before any function call or, more
+easily, right after the @code{prolog} instructions
+(which is treated later).
+
+@code{getarg} accepts a register argument and a value returned by
+@code{arg}, and will move that argument to the register, extending
+it (with or without sign, according to the data type specification)
+to fit a whole register. These instructions are more intimately
+related to the usage of the @lightning{} instruction set in code
+that generates other code, so they will be treated more
+specifically in @ref{GNU lightning examples, , Generating code at
+run-time}.
+
+@code{putarg} is a mix of @code{getarg} and @code{pusharg} in that
+it accepts as first argument a register or immediate, and as
+second argument a value returned by @code{arg}. It allows changing,
+or restoring an argument to the current function, and is a
+construct required to implement tail call optimization. Note that
+arguments in registers are very cheap, but will be overwritten
+at any moment, including on some operations, for example division,
+that on several ports is implemented as a function call.
+
+Finally, the @code{retval} instruction fetches the return value of a
+called function in a register. The @code{retval} instruction takes a
+register argument and copies the return value of the previously called
+function in that register. A function with a return value should use
+@code{retr} or @code{reti} to put the return value in the return register
+before returning. @xref{Fibonacci, the Fibonacci numbers}, for an example.
+
+@code{epilog} is an optional call, that marks the end of a function
+body. It is automatically generated by @lightning{} if starting a new
+function (what should be done after a @code{ret} call) or finishing
+generating jit.
+It is very important to note that the fact that @code{epilog} being
+optional may cause a common mistake. Consider this:
+@example
+fun1:
+ prolog
+ ...
+ ret
+fun2:
+ prolog
+@end example
+Because @code{epilog} is added when finding a new @code{prolog},
+this will cause the @code{fun2} label to actually be before the
+return from @code{fun1}. Because @lightning{} will actually
+understand it as:
+@example
+fun1:
+ prolog
+ ...
+ ret
+fun2:
+ epilog
+ prolog
+@end example
+
+You should observe a few rules when using these macros. First of
+all, if calling a varargs function, you should use the @code{ellipsis}
+call to mark the position of the ellipsis in the C prototype.
+
+You should not nest calls to @code{prepare} inside a
+@code{prepare/finish} block. Doing this will result in undefined
+behavior. Note that for functions with zero arguments you can use
+just @code{call}.
+
+@item Branch instructions
+Like @code{arg}, these also return a value which, in this case,
+is to be used to compile forward branches as explained in
+@ref{Fibonacci, , Fibonacci numbers}. They accept two operands to be
+compared; of these, the last can be either a register or an immediate.
+They are:
+@example
+bltr _u _f _d @r{if }(O2 < O3)@r{ goto }O1
+blti _u _f _d @r{if }(O2 < O3)@r{ goto }O1
+bler _u _f _d @r{if }(O2 <= O3)@r{ goto }O1
+blei _u _f _d @r{if }(O2 <= O3)@r{ goto }O1
+bgtr _u _f _d @r{if }(O2 > O3)@r{ goto }O1
+bgti _u _f _d @r{if }(O2 > O3)@r{ goto }O1
+bger _u _f _d @r{if }(O2 >= O3)@r{ goto }O1
+bgei _u _f _d @r{if }(O2 >= O3)@r{ goto }O1
+beqr _f _d @r{if }(O2 == O3)@r{ goto }O1
+beqi _f _d @r{if }(O2 == O3)@r{ goto }O1
+bner _f _d @r{if }(O2 != O3)@r{ goto }O1
+bnei _f _d @r{if }(O2 != O3)@r{ goto }O1
+
+bunltr _f _d @r{if }!(O2 >= O3)@r{ goto }O1
+bunler _f _d @r{if }!(O2 > O3)@r{ goto }O1
+bungtr _f _d @r{if }!(O2 <= O3)@r{ goto }O1
+bunger _f _d @r{if }!(O2 < O3)@r{ goto }O1
+buneqr _f _d @r{if }!(O2 < O3) && !(O2 > O3)@r{ goto }O1
+bltgtr _f _d @r{if }!(O2 >= O3) || !(O2 <= O3)@r{ goto }O1
+bordr _f _d @r{if } (O2 == O2) && (O3 == O3)@r{ goto }O1
+bunordr _f _d @r{if }!(O2 != O2) || (O3 != O3)@r{ goto }O1
+
+bmsr @r{if }O2 & O3@r{ goto }O1
+bmsi @r{if }O2 & O3@r{ goto }O1
+bmcr @r{if }!(O2 & O3)@r{ goto }O1
+bmci @r{if }!(O2 & O3)@r{ goto }O1@footnote{These mnemonics mean, respectively, @dfn{branch if mask set} and @dfn{branch if mask cleared}.}
+boaddr _u O2 += O3@r{, goto }O1@r{ if overflow}
+boaddi _u O2 += O3@r{, goto }O1@r{ if overflow}
+bxaddr _u O2 += O3@r{, goto }O1@r{ if no overflow}
+bxaddi _u O2 += O3@r{, goto }O1@r{ if no overflow}
+bosubr _u O2 -= O3@r{, goto }O1@r{ if overflow}
+bosubi _u O2 -= O3@r{, goto }O1@r{ if overflow}
+bxsubr _u O2 -= O3@r{, goto }O1@r{ if no overflow}
+bxsubi _u O2 -= O3@r{, goto }O1@r{ if no overflow}
+@end example
+
+@item Jump and return operations
+These accept one argument except @code{ret} and @code{jmpi} which
+have none; the difference between @code{finishi} and @code{calli}
+is that the latter does not clean the stack from pushed parameters
+(if any) and the former must @strong{always} follow a @code{prepare}
+instruction.
+@example
+callr (not specified) @r{function call to register O1}
+calli (not specified) @r{function call to immediate O1}
+finishr (not specified) @r{function call to register O1}
+finishi (not specified) @r{function call to immediate O1}
+jmpr (not specified) @r{unconditional jump to register}
+jmpi (not specified) @r{unconditional jump}
+ret (not specified) @r{return from subroutine}
+retr _c _uc _s _us _i _ui _l _f _d
+reti _c _uc _s _us _i _ui _l _f _d
+retval _c _uc _s _us _i _ui _l _f _d @r{move return value}
+ @r{to register}
+@end example
+
+Like branch instruction, @code{jmpi} also returns a value which is to
+be used to compile forward branches. @xref{Fibonacci, , Fibonacci
+numbers}.
+
+@item Labels
+There are 3 @lightning{} instructions to create labels:
+@example
+label (not specified) @r{simple label}
+forward (not specified) @r{forward label}
+indirect (not specified) @r{special simple label}
+@end example
+
+@code{label} is normally used as @code{patch_at} argument for backward
+jumps.
+
+@example
+ jit_node_t *jump, *label;
+label = jit_label();
+ ...
+ jump = jit_beqr(JIT_R0, JIT_R1);
+ jit_patch_at(jump, label);
+@end example
+
+@code{forward} is used to patch code generation before the actual
+position of the label is known.
+
+@example
+ jit_node_t *jump, *label;
+label = jit_forward();
+ jump = jit_beqr(JIT_R0, JIT_R1);
+ jit_patch_at(jump, label);
+ ...
+ jit_link(label);
+@end example
+
+@code{indirect} is useful when creating jump tables, and tells
+@lightning{} to not optimize out a label that is not the target of
+any jump, because an indirect jump may land where it is defined.
+
+@example
+ jit_node_t *jump, *label;
+ ...
+ jmpr(JIT_R0); @rem{/* may jump to label */}
+ ...
+label = jit_indirect();
+@end example
+
+@code{indirect} is an special case of @code{note} and @code{name}
+because it is a valid argument to @code{address}.
+
+Note that the usual idiom to write the previous example is
+@example
+ jit_node_t *addr, *jump;
+addr = jit_movi(JIT_R0, 0); @rem{/* immediate is ignored */}
+ ...
+ jmpr(JIT_R0);
+ ...
+ jit_patch(addr); @rem{/* implicit label added */}
+@end example
+
+that automatically binds the implicit label added by @code{patch} with
+the @code{movi}, but on some special conditions it is required to create
+an "unbound" label.
+
+@item Function prolog
+
+These macros are used to set up a function prolog. The @code{allocai}
+call accept a single integer argument and returns an offset value
+for stack storage access. The @code{allocar} accepts two registers
+arguments, the first is set to the offset for stack access, and the
+second is the size in bytes argument.
+
+@example
+prolog (not specified) @r{function prolog}
+allocai (not specified) @r{reserve space on the stack}
+allocar (not specified) @r{allocate space on the stack}
+@end example
+
+@code{allocai} receives the number of bytes to allocate and returns
+the offset from the frame pointer register @code{FP} to the base of
+the area.
+
+@code{allocar} receives two register arguments. The first is where
+to store the offset from the frame pointer register @code{FP} to the
+base of the area. The second argument is the size in bytes. Note
+that @code{allocar} is dynamic allocation, and special attention
+should be taken when using it. If called in a loop, every iteration
+will allocate stack space. Stack space is aligned from 8 to 64 bytes
+depending on backend requirements, even if allocating only one byte.
+It is advisable to not use it with @code{frame} and @code{tramp}; it
+should work with @code{frame} with special care to call only once,
+but is not supported if used in @code{tramp}, even if called only
+once.
+
+As a small appetizer, here is a small function that adds 1 to the input
+parameter (an @code{int}). I'm using an assembly-like syntax here which
+is a bit different from the one used when writing real subroutines with
+@lightning{}; the real syntax will be introduced in @xref{GNU lightning
+examples, , Generating code at run-time}.
+
+@example
+incr:
+ prolog
+in = arg @rem{! We have an integer argument}
+ getarg R0, in @rem{! Move it to R0}
+ addi R0, R0, 1 @rem{! Add 1}
+ retr R0 @rem{! And return the result}
+@end example
+
+And here is another function which uses the @code{printf} function from
+the standard C library to write a number in hexadecimal notation:
+
+@example
+printhex:
+ prolog
+in = arg @rem{! Same as above}
+ getarg R0, in
+ prepare @rem{! Begin call sequence for printf}
+ pushargi "%x" @rem{! Push format string}
+ ellipsis @rem{! Varargs start here}
+ pushargr R0 @rem{! Push second argument}
+ finishi printf @rem{! Call printf}
+ ret @rem{! Return to caller}
+@end example
+
+@item Trampolines, continuations and tail call optimization
+
+Frequently it is required to generate jit code that must jump to
+code generated later, possibly from another @code{jit_context_t}.
+These require compatible stack frames.
+
+@lightning{} provides two primitives from where trampolines,
+continuations and tail call optimization can be implemented.
+
+@example
+frame (not specified) @r{create stack frame}
+tramp (not specified) @r{assume stack frame}
+@end example
+
+@code{frame} receives an integer argument@footnote{It is not
+automatically computed because it does not know about the
+requirement of later generated code.} that defines the size in
+bytes for the stack frame of the current, @code{C} callable,
+jit function. To calculate this value, a good formula is maximum
+number of arguments to any called native function times
+eight@footnote{Times eight so that it works for double arguments.
+And would not need conditionals for ports that pass arguments in
+the stack.}, plus the sum of the arguments to any call to
+@code{jit_allocai}. @lightning{} automatically adjusts this value
+for any backend specific stack memory it may need, or any
+alignment constraint.
+
+@code{frame} also instructs @lightning{} to save all callee
+save registers in the prolog and reload in the epilog.
+
+@example
+main: @rem{! jit entry point}
+ prolog @rem{! function prolog}
+ frame 256 @rem{! save all callee save registers and}
+ @rem{! reserve at least 256 bytes in stack}
+main_loop:
+ ...
+ jmpi handler @rem{! jumps to external code}
+ ...
+ ret @rem{! return to the caller}
+@end example
+
+@code{tramp} differs from @code{frame} only that a prolog and epilog
+will not be generated. Note that @code{prolog} must still be used.
+The code under @code{tramp} must be ready to be entered with a jump
+at the prolog position, and instead of a return, it must end with
+a non conditional jump. @code{tramp} exists solely for the fact
+that it allows optimizing out prolog and epilog code that would
+never be executed.
+
+@example
+handler: @rem{! handler entry point}
+ prolog @rem{! function prolog}
+ tramp 256 @rem{! assumes all callee save registers}
+ @rem{! are saved and there is at least}
+ @rem{! 256 bytes in stack}
+ ...
+ jmpi main_loop @rem{! return to the main loop}
+@end example
+
+@lightning{} only supports Tail Call Optimization using the
+@code{tramp} construct. Any other way is not guaranteed to
+work on all ports.
+
+An example of a simple (recursive) tail call optimization:
+
+@example
+factorial: @rem{! Entry point of the factorial function}
+ prolog
+in = arg @rem{! Receive an integer argument}
+ getarg R0, in @rem{! Move argument to RO}
+ prepare
+ pushargi 1 @rem{! This is the accumulator}
+ pushargr R0 @rem{! This is the argument}
+ finishi fact @rem{! Call the tail call optimized function}
+ retval R0 @rem{! Fetch the result}
+ retr R0 @rem{! Return it}
+ epilog @rem{! Epilog *before* label before prolog}
+
+fact: @rem{! Entry point of the helper function}
+ prolog
+ frame 16 @rem{! Reserve 16 bytes in the stack}
+fact_entry: @rem{! This is the tail call entry point}
+ac = arg @rem{! The accumulator is the first argument}
+in = arg @rem{! The factorial argument}
+ getarg R0, ac @rem{! Move the accumulator to R0}
+ getarg R1, in @rem{! Move the argument to R1}
+ blei fact_out, R1, 1 @rem{! Done if argument is one or less}
+ mulr R0, R0, R1 @rem{! accumulator *= argument}
+ putargr R0, ac @rem{! Update the accumulator}
+ subi R1, R1, 1 @rem{! argument -= 1}
+ putargr R1, in @rem{! Update the argument}
+ jmpi fact_entry @rem{! Tail Call Optimize it!}
+fact_out:
+ retr R0 @rem{! Return the accumulator}
+@end example
+
+@item Predicates
+@example
+forward_p (not specified) @r{forward label predicate}
+indirect_p (not specified) @r{indirect label predicate}
+target_p (not specified) @r{used label predicate}
+arg_register_p (not specified) @r{argument kind predicate}
+callee_save_p (not specified) @r{callee save predicate}
+pointer_p (not specified) @r{pointer predicate}
+@end example
+
+@code{forward_p} expects a @code{jit_node_t*} argument, and
+returns non zero if it is a forward label reference, that is,
+a label returned by @code{forward}, that still needs a
+@code{link} call.
+
+@code{indirect_p} expects a @code{jit_node_t*} argument, and returns
+non zero if it is an indirect label reference, that is, a label that
+was returned by @code{indirect}.
+
+@code{target_p} expects a @code{jit_node_t*} argument, that is any
+kind of label, and will return non zero if there is at least one
+jump or move referencing it.
+
+@code{arg_register_p} expects a @code{jit_node_t*} argument, that must
+have been returned by @code{arg}, @code{arg_f} or @code{arg_d}, and
+will return non zero if the argument lives in a register. This call
+is useful to know the live range of register arguments, as those
+are very fast to read and write, but have volatile values.
+
+@code{callee_save_p} exects a valid @code{JIT_Rn}, @code{JIT_Vn}, or
+@code{JIT_Fn}, and will return non zero if the register is callee
+save. This call is useful because on several ports, the @code{JIT_Rn}
+and @code{JIT_Fn} registers are actually callee save; no need
+to save and load the values when making function calls.
+
+@code{pointer_p} expects a pointer argument, and will return non
+zero if the pointer is inside the generated jit code. Must be
+called after @code{jit_emit} and before @code{jit_destroy_state}.
+@end table
+
+@node GNU lightning examples
+@chapter Generating code at run-time
+
+To use @lightning{}, you should include the @file{lightning.h} file that
+is put in your include directory by the @samp{make install} command.
+
+Each of the instructions above translates to a macro or function call.
+All you have to do is prepend @code{jit_} (lowercase) to opcode names
+and @code{JIT_} (uppercase) to register names. Of course, parameters
+are to be put between parentheses.
+
+This small tutorial presents three examples:
+
+@iftex
+@itemize @bullet
+@item
+The @code{incr} function found in @ref{The instruction set, ,
+@lightning{}'s instruction set}:
+
+@item
+A simple function call to @code{printf}
+
+@item
+An RPN calculator.
+
+@item
+Fibonacci numbers
+@end itemize
+@end iftex
+@ifnottex
+@menu
+* incr:: A function which increments a number by one
+* printf:: A simple function call to printf
+* RPN calculator:: A more complex example, an RPN calculator
+* Fibonacci:: Calculating Fibonacci numbers
+@end menu
+@end ifnottex
+
+@node incr
+@section A function which increments a number by one
+
+Let's see how to create and use the sample @code{incr} function created
+in @ref{The instruction set, , @lightning{}'s instruction set}:
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int); @rem{/* Pointer to Int Function of Int */}
+
+int main(int argc, char *argv[])
+@{
+ jit_node_t *in;
+ pifi incr;
+
+ init_jit(argv[0]);
+ _jit = jit_new_state();
+
+ jit_prolog(); @rem{/* @t{ prolog } */}
+ in = jit_arg(); @rem{/* @t{ in = arg } */}
+ jit_getarg(JIT_R0, in); @rem{/* @t{ getarg R0 } */}
+ jit_addi(JIT_R0, JIT_R0, 1); @rem{/* @t{ addi R0@comma{} R0@comma{} 1 } */}
+ jit_retr(JIT_R0); @rem{/* @t{ retr R0 } */}
+
+ incr = jit_emit();
+ jit_clear_state();
+
+ @rem{/* call the generated code@comma{} passing 5 as an argument */}
+ printf("%d + 1 = %d\n", 5, incr(5));
+
+ jit_destroy_state();
+ finish_jit();
+ return 0;
+@}
+@end example
+
+Let's examine the code line by line (well, almost@dots{}):
+
+@table @t
+@item #include <lightning.h>
+You already know about this. It defines all of @lightning{}'s macros.
+
+@item static jit_state_t *_jit;
+You might wonder about what is @code{jit_state_t}. It is a structure
+that stores jit code generation information. The name @code{_jit} is
+special, because since multiple jit generators can run at the same
+time, you must either @r{#define _jit my_jit_state} or name it
+@code{_jit}.
+
+@item typedef int (*pifi)(int);
+Just a handy typedef for a pointer to a function that takes an
+@code{int} and returns another.
+
+@item jit_node_t *in;
+Declares a variable to hold an identifier for a function argument. It
+is an opaque pointer, that will hold the return of a call to @code{arg}
+and be used as argument to @code{getarg}.
+
+@item pifi incr;
+Declares a function pointer variable to a function that receives an
+@code{int} and returns an @code{int}.
+
+@item init_jit(argv[0]);
+You must call this function before creating a @code{jit_state_t}
+object. This function does global state initialization, and may need
+to detect CPU or Operating System features. It receives a string
+argument that is later used to read symbols from a shared object using
+GNU binutils if disassembly was enabled at configure time. If no
+disassembly will be performed a NULL pointer can be used as argument.
+
+@item _jit = jit_new_state();
+This call initializes a @lightning{} jit state.
+
+@item jit_prolog();
+Ok, so we start generating code for our beloved function@dots{}
+
+@item in = jit_arg();
+@itemx jit_getarg(JIT_R0, in);
+We retrieve the first (and only) argument, an integer, and store it
+into the general-purpose register @code{R0}.
+
+@item jit_addi(JIT_R0, JIT_R0, 1);
+We add one to the content of the register.
+
+@item jit_retr(JIT_R0);
+This instruction generates a standard function epilog that returns
+the contents of the @code{R0} register.
+
+@item incr = jit_emit();
+This instruction is very important. It actually translates the
+@lightning{} macros used before to machine code, flushes the generated
+code area out of the processor's instruction cache and return a
+pointer to the start of the code.
+
+@item jit_clear_state();
+This call cleanups any data not required for jit execution. Note
+that it must be called after any call to @code{jit_print} or
+@code{jit_address}, as this call destroy the @lightning{}
+intermediate representation.
+
+@item printf("%d + 1 = %d", 5, incr(5));
+Calling our function is this simple---it is not distinguishable from
+a normal C function call, the only difference being that @code{incr}
+is a variable.
+
+@item jit_destroy_state();
+Releases all memory associated with the jit context. It should be
+called after known the jit will no longer be called.
+
+@item finish_jit();
+This call cleanups any global state hold by @lightning{}, and is
+advisable to call it once jit code will no longer be generated.
+@end table
+
+@lightning{} abstracts two phases of dynamic code generation: selecting
+instructions that map the standard representation, and emitting binary
+code for these instructions. The client program has the responsibility
+of describing the code to be generated using the standard @lightning{}
+instruction set.
+
+Let's examine the code generated for @code{incr} on the SPARC and x86_64
+architecture (on the right is the code that an assembly-language
+programmer would write):
+
+@table @b
+@item SPARC
+@example
+ save %sp, -112, %sp
+ mov %i0, %g2 retl
+ inc %g2 inc %o0
+ mov %g2, %i0
+ restore
+ retl
+ nop
+@end example
+In this case, @lightning{} introduces overhead to create a register
+window (not knowing that the procedure is a leaf procedure) and to
+move the argument to the general purpose register @code{R0} (which
+maps to @code{%g2} on the SPARC).
+@end table
+
+@table @b
+@item x86_64
+@example
+ sub $0x30,%rsp
+ mov %rbp,(%rsp)
+ mov %rsp,%rbp
+ sub $0x18,%rsp
+ mov %rdi,%rax mov %rdi, %rax
+ add $0x1,%rax inc %rax
+ mov %rbp,%rsp
+ mov (%rsp),%rbp
+ add $0x30,%rsp
+ retq retq
+@end example
+In this case, the main overhead is due to the function's prolog and
+epilog, and stack alignment after reserving stack space for word
+to/from float conversions or moving data from/to x87 to/from SSE.
+Note that besides allocating space to save callee saved registers,
+no registers are saved/restored because @lightning{} notices those
+registers are not modified. There is currently no logic to detect
+if it needs to allocate stack space for type conversions neither
+proper leaf function detection, but these are subject to change
+(FIXME).
+@end table
+
+@node printf
+@section A simple function call to @code{printf}
+
+Again, here is the code for the example:
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef void (*pvfi)(int); @rem{/* Pointer to Void Function of Int */}
+
+int main(int argc, char *argv[])
+@{
+ pvfi myFunction; @rem{/* ptr to generated code */}
+ jit_node_t *start, *end; @rem{/* a couple of labels */}
+ jit_node_t *in; @rem{/* to get the argument */}
+
+ init_jit(argv[0]);
+ _jit = jit_new_state();
+
+ start = jit_note(__FILE__, __LINE__);
+ jit_prolog();
+ in = jit_arg();
+ jit_getarg(JIT_R1, in);
+ jit_pushargi((jit_word_t)"generated %d bytes\n");
+ jit_ellipsis();
+ jit_pushargr(JIT_R1);
+ jit_finishi(printf);
+ jit_ret();
+ jit_epilog();
+ end = jit_note(__FILE__, __LINE__);
+
+ myFunction = jit_emit();
+
+ @rem{/* call the generated code@comma{} passing its size as argument */}
+ myFunction((char*)jit_address(end) - (char*)jit_address(start));
+ jit_clear_state();
+
+ jit_disassemble();
+
+ jit_destroy_state();
+ finish_jit();
+ return 0;
+@}
+@end example
+
+The function shows how many bytes were generated. Most of the code
+is not very interesting, as it resembles very closely the program
+presented in @ref{incr, , A function which increments a number by one}.
+
+For this reason, we're going to concentrate on just a few statements.
+
+@table @t
+@item start = jit_note(__FILE__, __LINE__);
+@itemx @r{@dots{}}
+@itemx end = jit_note(__FILE__, __LINE__);
+These two instruction call the @code{jit_note} macro, which creates
+a note in the jit code; arguments to @code{jit_note} usually are a
+filename string and line number integer, but using NULL for the
+string argument is perfectly valid if only need to create a simple
+marker in the code.
+
+@item jit_ellipsis();
+@code{ellipsis} usually is only required if calling varargs functions
+with double arguments, but it is a good practice to properly describe
+the @r{@dots{}} in the call sequence.
+
+@item jit_pushargi((jit_word_t)"generated %d bytes\n");
+Note the use of the @code{(jit_word_t)} cast, that is used only
+to avoid a compiler warning, due to using a pointer where a
+wordsize integer type was expected.
+
+@item jit_prepare();
+@itemx @r{@dots{}}
+@itemx jit_finishi(printf);
+Once the arguments to @code{printf} have been pushed, what means
+moving them to stack or register arguments, the @code{printf}
+function is called and the stack cleaned. Note how @lightning{}
+abstracts the differences between different architectures and
+ABI's -- the client program does not know how parameter passing
+works on the host architecture.
+
+@item jit_epilog();
+Usually it is not required to call @code{epilog}, but because it
+is implicitly called when noticing the end of a function, if the
+@code{end} variable was set with a @code{note} call after the
+@code{ret}, it would not consider the function epilog.
+
+@item myFunction((char*)jit_address(end) - (char*)jit_address(start));
+This calls the generate jit function passing as argument the offset
+difference from the @code{start} and @code{end} notes. The @code{address}
+call must be done after the @code{emit} call or either a fatal error
+will happen (if @lightning{} is built with assertions enable) or an
+undefined value will be returned.
+
+@item jit_clear_state();
+Note that @code{jit_clear_state} was called after executing jit in
+this example. It was done because it must be called after any call
+to @code{jit_address} or @code{jit_print}.
+
+@item jit_disassemble();
+@code{disassemble} will dump the generated code to standard output,
+unless @lightning{} was built with the disassembler disabled, in which
+case no output will be shown.
+@end table
+
+@node RPN calculator
+@section A more complex example, an RPN calculator
+
+We create a small stack-based RPN calculator which applies a series
+of operators to a given parameter and to other numeric operands.
+Unlike previous examples, the code generator is fully parameterized
+and is able to compile different formulas to different functions.
+Here is the code for the expression compiler; a sample usage will
+follow.
+
+Since @lightning{} does not provide push/pop instruction, this
+example uses a stack-allocated area to store the data. Such an
+area can be allocated using the macro @code{allocai}, which
+receives the number of bytes to allocate and returns the offset
+from the frame pointer register @code{FP} to the base of the
+area.
+
+Usually, you will use the @code{ldxi} and @code{stxi} instruction
+to access stack-allocated variables. However, it is possible to
+use operations such as @code{add} to compute the address of the
+variables, and pass the address around.
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+typedef int (*pifi)(int); @rem{/* Pointer to Int Function of Int */}
+
+static jit_state_t *_jit;
+
+void stack_push(int reg, int *sp)
+@{
+ jit_stxi_i (*sp, JIT_FP, reg);
+ *sp += sizeof (int);
+@}
+
+void stack_pop(int reg, int *sp)
+@{
+ *sp -= sizeof (int);
+ jit_ldxi_i (reg, JIT_FP, *sp);
+@}
+
+jit_node_t *compile_rpn(char *expr)
+@{
+ jit_node_t *in, *fn;
+ int stack_base, stack_ptr;
+
+ fn = jit_note(NULL, 0);
+ jit_prolog();
+ in = jit_arg();
+ stack_ptr = stack_base = jit_allocai (32 * sizeof (int));
+
+ jit_getarg_i(JIT_R2, in);
+
+ while (*expr) @{
+ char buf[32];
+ int n;
+ if (sscanf(expr, "%[0-9]%n", buf, &n)) @{
+ expr += n - 1;
+ stack_push(JIT_R0, &stack_ptr);
+ jit_movi(JIT_R0, atoi(buf));
+ @} else if (*expr == 'x') @{
+ stack_push(JIT_R0, &stack_ptr);
+ jit_movr(JIT_R0, JIT_R2);
+ @} else if (*expr == '+') @{
+ stack_pop(JIT_R1, &stack_ptr);
+ jit_addr(JIT_R0, JIT_R1, JIT_R0);
+ @} else if (*expr == '-') @{
+ stack_pop(JIT_R1, &stack_ptr);
+ jit_subr(JIT_R0, JIT_R1, JIT_R0);
+ @} else if (*expr == '*') @{
+ stack_pop(JIT_R1, &stack_ptr);
+ jit_mulr(JIT_R0, JIT_R1, JIT_R0);
+ @} else if (*expr == '/') @{
+ stack_pop(JIT_R1, &stack_ptr);
+ jit_divr(JIT_R0, JIT_R1, JIT_R0);
+ @} else @{
+ fprintf(stderr, "cannot compile: %s\n", expr);
+ abort();
+ @}
+ ++expr;
+ @}
+ jit_retr(JIT_R0);
+ jit_epilog();
+ return fn;
+@}
+@end example
+
+The principle on which the calculator is based is easy: the stack top
+is held in R0, while the remaining items of the stack are held in the
+memory area that we allocate with @code{allocai}. Compiling a numeric
+operand or the argument @code{x} pushes the old stack top onto the
+stack and moves the operand into R0; compiling an operator pops the
+second operand off the stack into R1, and compiles the operation so
+that the result goes into R0, thus becoming the new stack top.
+
+This example allocates a fixed area for 32 @code{int}s. This is not
+a problem when the function is a leaf like in this case; in a full-blown
+compiler you will want to analyze the input and determine the number
+of needed stack slots---a very simple example of register allocation.
+The area is then managed like a stack using @code{stack_push} and
+@code{stack_pop}.
+
+Source code for the client (which lies in the same source file) follows:
+
+@example
+int main(int argc, char *argv[])
+@{
+ jit_node_t *nc, *nf;
+ pifi c2f, f2c;
+ int i;
+
+ init_jit(argv[0]);
+ _jit = jit_new_state();
+
+ nc = compile_rpn("32x9*5/+");
+ nf = compile_rpn("x32-5*9/");
+ (void)jit_emit();
+ c2f = (pifi)jit_address(nc);
+ f2c = (pifi)jit_address(nf);
+ jit_clear_state();
+
+ printf("\nC:");
+ for (i = 0; i <= 100; i += 10) printf("%3d ", i);
+ printf("\nF:");
+ for (i = 0; i <= 100; i += 10) printf("%3d ", c2f(i));
+ printf("\n");
+
+ printf("\nF:");
+ for (i = 32; i <= 212; i += 18) printf("%3d ", i);
+ printf("\nC:");
+ for (i = 32; i <= 212; i += 18) printf("%3d ", f2c(i));
+ printf("\n");
+
+ jit_destroy_state();
+ finish_jit();
+ return 0;
+@}
+@end example
+
+The client displays a conversion table between Celsius and Fahrenheit
+degrees (both Celsius-to-Fahrenheit and Fahrenheit-to-Celsius). The
+formulas are, @math{F(c) = c*9/5+32} and @math{C(f) = (f-32)*5/9},
+respectively.
+
+Providing the formula as an argument to @code{compile_rpn} effectively
+parameterizes code generation, making it possible to use the same code
+to compile different functions; this is what makes dynamic code
+generation so powerful.
+
+@node Fibonacci
+@section Fibonacci numbers
+
+The code in this section calculates the Fibonacci sequence. That is
+modeled by the recurrence relation:
+@display
+ f(0) = 0
+ f(1) = f(2) = 1
+ f(n) = f(n-1) + f(n-2)
+@end display
+
+The purpose of this example is to introduce branches. There are two
+kind of branches: backward branches and forward branches. We'll
+present the calculation in a recursive and iterative form; the
+former only uses forward branches, while the latter uses both.
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int); @rem{/* Pointer to Int Function of Int */}
+
+int main(int argc, char *argv[])
+@{
+ pifi fib;
+ jit_node_t *label;
+ jit_node_t *call;
+ jit_node_t *in; @rem{/* offset of the argument */}
+ jit_node_t *ref; @rem{/* to patch the forward reference */}
+ jit_node_t *zero; @rem{/* to patch the forward reference */}
+
+ init_jit(argv[0]);
+ _jit = jit_new_state();
+
+ label = jit_label();
+ jit_prolog ();
+ in = jit_arg ();
+ jit_getarg (JIT_V0, in); @rem{/* R0 = n */}
+ zero = jit_beqi (JIT_R0, 0);
+ jit_movr (JIT_V0, JIT_R0); /* V0 = R0 */
+ jit_movi (JIT_R0, 1);
+ ref = jit_blei (JIT_V0, 2);
+ jit_subi (JIT_V1, JIT_V0, 1); @rem{/* V1 = n-1 */}
+ jit_subi (JIT_V2, JIT_V0, 2); @rem{/* V2 = n-2 */}
+ jit_prepare();
+ jit_pushargr(JIT_V1);
+ call = jit_finishi(NULL);
+ jit_patch_at(call, label);
+ jit_retval(JIT_V1); @rem{/* V1 = fib(n-1) */}
+ jit_prepare();
+ jit_pushargr(JIT_V2);
+ call = jit_finishi(NULL);
+ jit_patch_at(call, label);
+ jit_retval(JIT_R0); @rem{/* R0 = fib(n-2) */}
+ jit_addr(JIT_R0, JIT_R0, JIT_V1); @rem{/* R0 = R0 + V1 */}
+
+ jit_patch(ref); @rem{/* patch jump */}
+ jit_patch(zero); @rem{/* patch jump */}
+ jit_retr(JIT_R0);
+
+ @rem{/* call the generated code@comma{} passing 32 as an argument */}
+ fib = jit_emit();
+ jit_clear_state();
+ printf("fib(%d) = %d\n", 32, fib(32));
+ jit_destroy_state();
+ finish_jit();
+ return 0;
+@}
+@end example
+
+As said above, this is the first example of dynamically compiling
+branches. Branch instructions have two operands containing the
+values to be compared, and return a @code{jit_note_t *} object
+to be patched.
+
+Because labels final address are only known after calling @code{emit},
+it is required to call @code{patch} or @code{patch_at}, what does
+tell @lightning{} that the target to patch is actually a pointer to
+a @code{jit_node_t *} object, otherwise, it would assume that is
+a pointer to a C function. Note that conditional branches do not
+receive a label argument, so they must be patched.
+
+You need to call @code{patch_at} on the return of value @code{calli},
+@code{finishi}, and @code{calli} if it is actually referencing a label
+in the jit code. All branch instructions do not receive a label
+argument. Note that @code{movi} is an special case, and patching it
+is usually done to get the final address of a label, usually to later
+call @code{jmpr}.
+
+Now, here is the iterative version:
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int); @rem{/* Pointer to Int Function of Int */}
+
+int main(int argc, char *argv[])
+@{
+ pifi fib;
+ jit_node_t *in; @rem{/* offset of the argument */}
+ jit_node_t *ref; @rem{/* to patch the forward reference */}
+ jit_node_t *zero; @rem{/* to patch the forward reference */}
+ jit_node_t *jump; @rem{/* jump to start of loop */}
+ jit_node_t *loop; @rem{/* start of the loop */}
+
+ init_jit(argv[0]);
+ _jit = jit_new_state();
+
+ jit_prolog ();
+ in = jit_arg ();
+ jit_getarg (JIT_R0, in); @rem{/* R0 = n */}
+ zero = jit_beqi (JIT_R0, 0);
+ jit_movr (JIT_R1, JIT_R0);
+ jit_movi (JIT_R0, 1);
+ ref = jit_blti (JIT_R1, 2);
+ jit_subi (JIT_R2, JIT_R2, 2);
+ jit_movr (JIT_R1, JIT_R0);
+
+ loop= jit_label();
+ jit_subi (JIT_R2, JIT_R2, 1); @rem{/* decr. counter */}
+ jit_movr (JIT_V0, JIT_R0); /* V0 = R0 */
+ jit_addr (JIT_R0, JIT_R0, JIT_R1); /* R0 = R0 + R1 */
+ jit_movr (JIT_R1, JIT_V0); /* R1 = V0 */
+ jump= jit_bnei (JIT_R2, 0); /* if (R2) goto loop; */
+ jit_patch_at(jump, loop);
+
+ jit_patch(ref); @rem{/* patch forward jump */}
+ jit_patch(zero); @rem{/* patch forward jump */}
+ jit_retr (JIT_R0);
+
+ @rem{/* call the generated code@comma{} passing 36 as an argument */}
+ fib = jit_emit();
+ jit_clear_state();
+ printf("fib(%d) = %d\n", 36, fib(36));
+ jit_destroy_state();
+ finish_jit();
+ return 0;
+@}
+@end example
+
+This code calculates the recurrence relation using iteration (a
+@code{for} loop in high-level languages). There are no function
+calls anymore: instead, there is a backward jump (the @code{bnei} at
+the end of the loop).
+
+Note that the program must remember the address for backward jumps;
+for forward jumps it is only required to remember the jump code,
+and call @code{patch} for the implicit label.
+
+@node Reentrancy
+@chapter Re-entrant usage of @lightning{}
+
+@lightning{} uses the special @code{_jit} identifier. To be able
+to be able to use multiple jit generation states at the same
+time, it is required to used code similar to:
+
+@example
+ struct jit_state lightning;
+ #define lightning _jit
+@end example
+
+This will cause the symbol defined to @code{_jit} to be passed as
+the first argument to the underlying @lightning{} implementation,
+that is usually a function with an @code{_} (underscode) prefix
+and with an argument named @code{_jit}, in the pattern:
+
+@example
+ static void _jit_mnemonic(jit_state_t *, jit_gpr_t, jit_gpr_t);
+ #define jit_mnemonic(u, v) _jit_mnemonic(_jit, u, v);
+@end example
+
+The reason for this is to use the same syntax as the initial lightning
+implementation and to avoid needing the user to keep adding an extra
+argument to every call, as multiple jit states generating code in
+paralell should be very uncommon.
+
+@section Registers
+@chapter Accessing the whole register file
+
+As mentioned earlier in this chapter, all @lightning{} back-ends are
+guaranteed to have at least six general-purpose integer registers and
+six floating-point registers, but many back-ends will have more.
+
+To access the entire register files, you can use the
+@code{JIT_R}, @code{JIT_V} and @code{JIT_F} macros. They
+accept a parameter that identifies the register number, which
+must be strictly less than @code{JIT_R_NUM}, @code{JIT_V_NUM}
+and @code{JIT_F_NUM} respectively; the number need not be
+constant. Of course, expressions like @code{JIT_R0} and
+@code{JIT_R(0)} denote the same register, and likewise for
+integer callee-saved, or floating-point, registers.
+
+@node Customizations
+@chapter Customizations
+
+Frequently it is desirable to have more control over how code is
+generated or how memory is used during jit generation or execution.
+
+@section Memory functions
+To aid in complete control of memory allocation and deallocation
+@lightning{} provides wrappers that default to standard @code{malloc},
+@code{realloc} and @code{free}. These are loosely based on the
+GNU GMP counterparts, with the difference that they use the same
+prototype of the system allocation functions, that is, no @code{size}
+for @code{free} or @code{old_size} for @code{realloc}.
+
+@deftypefun void jit_set_memory_functions (@* void *(*@var{alloc_func_ptr}) (size_t), @* void *(*@var{realloc_func_ptr}) (void *, size_t), @* void (*@var{free_func_ptr}) (void *))
+@lightning{} guarantees that memory is only allocated or released
+using these wrapped functions, but you must note that if lightning
+was linked to GNU binutils, malloc is probably will be called multiple
+times from there when initializing the disassembler.
+
+Because @code{init_jit} may call memory functions, if you need to call
+@code{jit_set_memory_functions}, it must be called before @code{init_jit},
+otherwise, when calling @code{finish_jit}, a pointer allocated with the
+previous or default wrappers will be passed.
+@end deftypefun
+
+@deftypefun void jit_get_memory_functions (@* void *(**@var{alloc_func_ptr}) (size_t), @* void *(**@var{realloc_func_ptr}) (void *, size_t), @* void (**@var{free_func_ptr}) (void *))
+Get the current memory allocation function. Also, unlike the GNU GMP
+counterpart, it is an error to pass @code{NULL} pointers as arguments.
+@end deftypefun
+
+@section Alternate code buffer
+To instruct @lightning{} to use an alternate code buffer it is required
+to call @code{jit_realize} before @code{jit_emit}, and then query states
+and customize as appropriate.
+
+@deftypefun void jit_realize ()
+Must be called once, before @code{jit_emit}, to instruct @lightning{}
+that no other @code{jit_xyz} call will be made.
+@end deftypefun
+
+@deftypefun jit_pointer_t jit_get_code (jit_word_t *@var{code_size})
+Returns NULL or the previous value set with @code{jit_set_code}, and
+sets the @var{code_size} argument to an appropriate value.
+If @code{jit_get_code} is called before @code{jit_emit}, the
+@var{code_size} argument is set to the expected amount of bytes
+required to generate code.
+If @code{jit_get_code} is called after @code{jit_emit}, the
+@var{code_size} argument is set to the exact amount of bytes used
+by the code.
+@end deftypefun
+
+@deftypefun void jit_set_code (jit_ponter_t @var{code}, jit_word_t @var{size})
+Instructs @lightning{} to output to the @var{code} argument and
+use @var{size} as a guard to not write to invalid memory. If during
+@code{jit_emit} @lightning{} finds out that the code would not fit
+in @var{size} bytes, it halts code emit and returns @code{NULL}.
+@end deftypefun
+
+A simple example of a loop using an alternate buffer is:
+
+@example
+ jit_uint8_t *code;
+ int *(func)(int); @rem{/* function pointer */}
+ jit_word_t code_size;
+ jit_word_t real_code_size;
+ @rem{...}
+ jit_realize(); @rem{/* ready to generate code */}
+ jit_get_code(&code_size); @rem{/* get expected code size */}
+ code_size = (code_size + 4095) & -4096;
+ do (;;) @{
+ code = mmap(NULL, code_size, PROT_EXEC | PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+ jit_set_code(code, code_size);
+ if ((func = jit_emit()) == NULL) @{
+ munmap(code, code_size);
+ code_size += 4096;
+ @}
+ @} while (func == NULL);
+ jit_get_code(&real_code_size); @rem{/* query exact size of the code */}
+@end example
+
+The first call to @code{jit_get_code} should return @code{NULL} and set
+the @code{code_size} argument to the expected amount of bytes required
+to emit code.
+The second call to @code{jit_get_code} is after a successful call to
+@code{jit_emit}, and will return the value previously set with
+@code{jit_set_code} and set the @code{real_code_size} argument to the
+exact amount of bytes used to emit the code.
+
+@section Alternate data buffer
+Sometimes it may be desirable to customize how, or to prevent
+@lightning{} from using an extra buffer for constants or debug
+annotation. Usually when also using an alternate code buffer.
+
+@deftypefun jit_pointer_t jit_get_data (jit_word_t *@var{data_size}, jit_word_t *@var{note_size})
+Returns @code{NULL} or the previous value set with @code{jit_set_data},
+and sets the @var{data_size} argument to how many bytes are required
+for the constants data buffer, and @var{note_size} to how many bytes
+are required to store the debug note information.
+Note that it always preallocate one debug note entry even if
+@code{jit_name} or @code{jit_note} are never called, but will return
+zero in the @var{data_size} argument if no constant is required;
+constants are only used for the @code{float} and @code{double} operations
+that have an immediate argument, and not in all @lightning{} ports.
+@end deftypefun
+
+@deftypefun void jit_set_data (jit_pointer_t @var{data}, jit_word_t @var{size}, jit_word_t @var{flags})
+
+@var{data} can be NULL if disabling constants and annotations, otherwise,
+a valid pointer must be passed. An assertion is done that the data will
+fit in @var{size} bytes (but that is a noop if @lightning{} was built
+with @code{-DNDEBUG}).
+
+@var{size} tells the space in bytes available in @var{data}.
+
+@var{flags} can be zero to tell to just use the alternate data buffer,
+or a composition of @code{JIT_DISABLE_DATA} and @code{JIT_DISABLE_NOTE}
+
+@table @t
+@item JIT_DISABLE_DATA
+@cindex JIT_DISABLE_DATA
+Instructs @lightning{} to not use a constant table, but to use an
+alternate method to synthesize those, usually with a larger code
+sequence using stack space to transfer the value from a GPR to a
+FPR register.
+
+@item JIT_DISABLE_NOTE
+@cindex JIT_DISABLE_NOTE
+Instructs @lightning{} to not store file or function name, and
+line numbers in the constant buffer.
+@end table
+@end deftypefun
+
+A simple example of a preventing usage of a data buffer is:
+
+@example
+ @rem{...}
+ jit_realize(); @rem{/* ready to generate code */}
+ jit_get_data(NULL, NULL);
+ jit_set_data(NULL, 0, JIT_DISABLE_DATA | JIT_DISABLE_NOTE);
+ @rem{...}
+@end example
+
+Or to only use a data buffer, if required:
+
+@example
+ jit_uint8_t *data;
+ jit_word_t data_size;
+ @rem{...}
+ jit_realize(); @rem{/* ready to generate code */}
+ jit_get_data(&data_size, NULL);
+ if (data_size)
+ data = malloc(data_size);
+ else
+ data = NULL;
+ jit_set_data(data, data_size, JIT_DISABLE_NOTE);
+ @rem{...}
+ if (data)
+ free(data);
+ @rem{...}
+@end example
+
+@node Acknowledgements
+@chapter Acknowledgements
+
+As far as I know, the first general-purpose portable dynamic code
+generator is @sc{dcg}, by Dawson R.@: Engler and T.@: A.@: Proebsting.
+Further work by Dawson R. Engler resulted in the @sc{vcode} system;
+unlike @sc{dcg}, @sc{vcode} used no intermediate representation and
+directly inspired @lightning{}.
+
+Thanks go to Ian Piumarta, who kindly accepted to release his own
+program @sc{ccg} under the GNU General Public License, thereby allowing
+@lightning{} to use the run-time assemblers he had wrote for @sc{ccg}.
+@sc{ccg} provides a way of dynamically assemble programs written in the
+underlying architecture's assembly language. So it is not portable,
+yet very interesting.
+
+I also thank Steve Byrne for writing GNU Smalltalk, since @lightning{}
+was first developed as a tool to be used in GNU Smalltalk's dynamic
+translator from bytecodes to native code.
+
+@c %**end of header (This is for running Texinfo on a region.)
+
+@c ***********************************************************************
+
+@bye
diff --git a/deps/lightening/tests/Makefile b/deps/lightening/tests/Makefile
new file mode 100644
index 0000000..793f225
--- /dev/null
+++ b/deps/lightening/tests/Makefile
@@ -0,0 +1,87 @@
+TESTS ?= $(sort $(basename $(wildcard *.c)))
+TARGETS ?= native ia32 aarch64 armv7 mips64el mipsel ppc64le
+
+# Suitable values of cross-compiler variables for Debian:
+#
+# make test CC_IA32=i668-linux-gnu-gcc CC_AARCH64=aarch64-linux-gnu-gcc
+#
+# The relevant packages that you need to run this:
+#
+# dpkg --add-architecture i386
+# dpkg --add-architecture arm64
+# apt-get update -qq
+# apt-get install -y \
+# libc6-dev:amd64 gcc make \
+# qemu binfmt-support qemu-user-static \
+# gcc-i686-linux-gnu libc6-dev-i386-cross libc6:i386 \
+# gcc-aarch64-linux-gnu libc6-dev-arm64-cross libc6:arm64
+#
+CC = gcc
+CC_IA32=guix environment --pure -s i686-linux --ad-hoc gcc-toolchain -- gcc
+CC_AARCH64=guix environment --pure -s aarch64-linux --ad-hoc gcc-toolchain -- gcc
+CC_ARMv7=guix environment --pure -s armhf-linux --ad-hoc gcc-toolchain -- gcc
+CC_MIPS64EL=guix environment --pure -s mips64el-linux --ad-hoc gcc-toolchain -- gcc
+CC_MIPSEL=guix environment --pure -s mipsel-linux --ad-hoc gcc-toolchain -- gcc
+CC_PPC64LE=guix environment --pure -s powerpc64le-linux --ad-hoc gcc-toolchain -- gcc
+CFLAGS = -Wall -O0 -g $(DEBUG)
+LDFLAGS = -lpthread
+RUNNER =
+
+all: $(foreach TARGET,$(TARGETS),$(addprefix test-$(TARGET)-,$(TESTS)))
+
+check: $(addprefix test-$(TARGET),$(TARGETS))
+
+test-vg-%: $(addprefix test-%-,$(TESTS))
+ @echo "Running unit tests..."
+ @set -e; for test in $?; do \
+ echo "Testing: $$test"; \
+ valgrind -q --error-exitcode=1 ./$$test; \
+ done
+ @echo "Success."
+
+test-%: $(addprefix test-%-,$(TESTS))
+ @echo "Running unit tests..."
+ @set -e; for test in $?; do \
+ echo "Testing: $$test"; \
+ ./$$test; \
+ done
+ @echo "Success."
+
+.PHONY: test check
+
+lightening-%.o: ../lightening.h ../lightening/*.c ../lightening/*.h
+ $(CC) $(CFLAGS) $(CPPFLAGS) -I.. -o $@ -c ../lightening/lightening.c
+
+test-native-%: %.c lightening-native.o test.h
+ $(CC) $(CFLAGS) $(CPPFLAGS) -I.. -o $@ lightening-native.o $< $(LDFLAGS)
+
+test-ia32-%: CC = $(CC_IA32)
+test-ia32-%: %.c lightening-ia32.o test.h
+ $(CC) $(CFLAGS) $(CPPFLAGS) -I.. -o $@ lightening-ia32.o $< $(LDFLAGS)
+
+test-aarch64-%: CC = $(CC_AARCH64)
+test-aarch64-%: %.c lightening-aarch64.o test.h
+ $(CC) $(CFLAGS) $(CPPFLAGS) -I.. -o $@ lightening-aarch64.o $< $(LDFLAGS)
+
+test-armv7-%: CC = $(CC_ARMv7)
+test-armv7-%: %.c lightening-armv7.o test.h
+ $(CC) $(CFLAGS) $(CPPFLAGS) -I.. -o $@ lightening-armv7.o $< $(LDFLAGS)
+
+test-mips64el-%: CC = $(CC_MIPS64EL)
+test-mips64el-%: %.c lightening-mips64el.o test.h
+ $(CC) $(CFLAGS) $(CPPFLAGS) -I.. -o $@ lightening-mips64el.o $< $(LDFLAGS)
+
+test-mipsel-%: CC = $(CC_MIPSEL)
+test-mipsel-%: %.c lightening-mipsel.o test.h
+ $(CC) $(CFLAGS) $(CPPFLAGS) -I.. -o $@ lightening-mipsel.o $< $(LDFLAGS)
+
+test-ppc64le-%: CC = $(CC_PPC64LE)
+test-ppc64le-%: %.c lightening-ppc64le.o test.h
+ $(CC) $(CFLAGS) $(CPPFLAGS) -I.. -o $@ lightening-ppc64le.o $< $(LDFLAGS)
+
+.PRECIOUS: $(foreach TARGET,$(TARGETS),$(addprefix test-$(TARGET)-,$(TESTS)))
+.PRECIOUS: $(foreach TARGET,$(TARGETS),lightening-$(TARGET).o)
+
+clean:
+ rm -f $(foreach TARGET,$(TARGETS),$(addprefix test-$(TARGET)-,$(TESTS)))
+ rm -f $(foreach TARGET,$(TARGETS),lightening-$(TARGET).o)
diff --git a/deps/lightening/tests/absr_d.c b/deps/lightening/tests/absr_d.c
new file mode 100644
index 0000000..00b8fa4
--- /dev/null
+++ b/deps/lightening/tests/absr_d.c
@@ -0,0 +1,26 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0));
+
+ jit_absr_d(j, JIT_F0, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_d(j, JIT_F0);
+
+ double (*f)(double) = jit_end(j, NULL);
+
+ ASSERT(f(0.0) == 0.0);
+ ASSERT(f(-0.0) == 0.0);
+ ASSERT(f(0.5) == 0.5);
+ ASSERT(f(-0.5) == 0.5);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/absr_f.c b/deps/lightening/tests/absr_f.c
new file mode 100644
index 0000000..e019b5f
--- /dev/null
+++ b/deps/lightening/tests/absr_f.c
@@ -0,0 +1,26 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0));
+
+ jit_absr_f(j, JIT_F0, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_f(j, JIT_F0);
+
+ float (*f)(float) = jit_end(j, NULL);
+
+ ASSERT(f(0.0) == 0.0);
+ ASSERT(f(-0.0) == 0.0);
+ ASSERT(f(0.5) == 0.5);
+ ASSERT(f(-0.5) == 0.5);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/addi.c b/deps/lightening/tests/addi.c
new file mode 100644
index 0000000..756d070
--- /dev/null
+++ b/deps/lightening/tests/addi.c
@@ -0,0 +1,25 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_addi(j, JIT_R0, JIT_R0, 69);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ int (*f)(int) = ret;
+ ASSERT(f(42) == 111);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/addr.c b/deps/lightening/tests/addr.c
new file mode 100644
index 0000000..6ee76e2
--- /dev/null
+++ b/deps/lightening/tests/addr.c
@@ -0,0 +1,26 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_addr(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ int (*f)(int, int) = ret;
+ ASSERT(f(42, 69) == 111);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/addr_d.c b/deps/lightening/tests/addr_d.c
new file mode 100644
index 0000000..1121620
--- /dev/null
+++ b/deps/lightening/tests/addr_d.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1));
+
+ jit_addr_d(j, JIT_F0, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_d(j, JIT_F0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ double (*f)(double, double) = ret;
+ ASSERT(f(42., 69.) == 111.);
+ ASSERT(f(42.5, 69.5) == 112.);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/addr_f.c b/deps/lightening/tests/addr_f.c
new file mode 100644
index 0000000..4317dfe
--- /dev/null
+++ b/deps/lightening/tests/addr_f.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1));
+
+ jit_addr_f(j, JIT_F0, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_f(j, JIT_F0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ float (*f)(float, float) = ret;
+ ASSERT(f(42.f, 69.f) == 111.f);
+ ASSERT(f(42.5f, 69.5f) == 112.f);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/addx.c b/deps/lightening/tests/addx.c
new file mode 100644
index 0000000..417cd1a
--- /dev/null
+++ b/deps/lightening/tests/addx.c
@@ -0,0 +1,63 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_movi(j, JIT_R2, 0);
+ jit_addcr(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_addxi(j, JIT_R2, JIT_R2, 0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R2);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = ret;
+
+ ASSERT(f(0, 0) == 0);
+
+#if __WORDSIZE == 32
+ /* carry */
+ ASSERT(f(0xffffffff, 0xffffffff) == 1);
+ /* overflow */
+ ASSERT(f(0x7fffffff, 1) == 0);
+ /* overflow */
+ ASSERT(f(0x7fffffff, 0x7fffffff) == 0);
+ /* carry */
+ ASSERT(f(0x7fffffff, 0x80000000) == 0);
+ /* carry+overflow */
+ ASSERT(f(0x80000000, 0x80000000) == 1);
+#else
+ /* nothing */
+ ASSERT(f(0xffffffff, 0xffffffff) == 0);
+ /* nothing */
+ ASSERT(f(0x7fffffff, 1) == 0);
+ /* nothing */
+ ASSERT(f(0x7fffffff, 0x7fffffff) == 0);
+ /* nothing */
+ ASSERT(f(0x7fffffff, 0x80000000) == 0);
+ /* nothing */
+ ASSERT(f(0x80000000, 0x80000000) == 0);
+ /* carry */
+ ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 1);
+ /* overflow */
+ ASSERT(f(0x7fffffffffffffff, 1) == 0);
+ /* overflow */
+ ASSERT(f(0x7fffffffffffffff, 0x7fffffffffffffff) == 0);
+ /* overflow */
+ ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == 0);
+ /* carry+overflow */
+ ASSERT(f(0x8000000000000000, 0x8000000000000000) == 1);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/andi.c b/deps/lightening/tests/andi.c
new file mode 100644
index 0000000..c6f39d7
--- /dev/null
+++ b/deps/lightening/tests/andi.c
@@ -0,0 +1,31 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_andi(j, JIT_R0, JIT_R0, 1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ jit_word_t (*f)(jit_word_t) = ret;
+
+ ASSERT(f(0x7fffffff) == 1);
+ ASSERT(f(0x80000000) == 0);
+#if __WORDSIZE == 64
+ ASSERT(f(0x7fffffffffffffff) == 1);
+ ASSERT(f(0x8000000000000000) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/andr.c b/deps/lightening/tests/andr.c
new file mode 100644
index 0000000..1114ef9
--- /dev/null
+++ b/deps/lightening/tests/andr.c
@@ -0,0 +1,48 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_andr(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = ret;
+
+ ASSERT(f(0x7fffffff, 1) == 1);
+ ASSERT(f(1, 0x7fffffff) == 1);
+ ASSERT(f(0x80000000, 1) == 0);
+ ASSERT(f(1, 0x80000000) == 0);
+ ASSERT(f(0x7fffffff, 0x80000000) == 0);
+ ASSERT(f(0x80000000, 0x7fffffff) == 0);
+ ASSERT(f(0x7fffffff, 0xffffffff) == 0x7fffffff);
+ ASSERT(f(0xffffffff, 0x7fffffff) == 0x7fffffff);
+ ASSERT(f(0xffffffff, 0xffffffff) == 0xffffffff);
+ ASSERT(f(0x7fffffff, 0) == 0);
+ ASSERT(f(0, 0x7fffffff) == 0);
+#if __WORDSIZE == 64
+ ASSERT(f(0x7fffffffffffffff, 1) == 1);
+ ASSERT(f(1, 0x7fffffffffffffff) == 1);
+ ASSERT(f(0x8000000000000000, 1) == 0);
+ ASSERT(f(1, 0x8000000000000000) == 0);
+ ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == 0);
+ ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == 0);
+ ASSERT(f(0x7fffffffffffffff, 0xffffffffffffffff) == 0x7fffffffffffffff);
+ ASSERT(f(0xffffffffffffffff, 0x7fffffffffffffff) == 0x7fffffffffffffff);
+ ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 0xffffffffffffffff);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/beqi.c b/deps/lightening/tests/beqi.c
new file mode 100644
index 0000000..dcb012f
--- /dev/null
+++ b/deps/lightening/tests/beqi.c
@@ -0,0 +1,32 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_reloc_t r = jit_beqi(j, JIT_R0, -1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0);
+ ASSERT(f(1) == 0);
+ ASSERT(f(-1) == 1);
+
+#if __WORDSIZE == 64
+ ASSERT(f(0xffffffffff) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/beqr.c b/deps/lightening/tests/beqr.c
new file mode 100644
index 0000000..0100400
--- /dev/null
+++ b/deps/lightening/tests/beqr.c
@@ -0,0 +1,36 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_beqr(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 0);
+ ASSERT(f(1, 1) == 1);
+
+#if __WORDSIZE == 64
+ ASSERT(f(0xffffffffff, -1) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/beqr_d.c b/deps/lightening/tests/beqr_d.c
new file mode 100644
index 0000000..a84b6a7
--- /dev/null
+++ b/deps/lightening/tests/beqr_d.c
@@ -0,0 +1,35 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1));
+
+ jit_reloc_t r = jit_beqr_d(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(double, double) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 0);
+ ASSERT(f(1, 1) == 1);
+
+ ASSERT(f(0, 0.0/0.0) == 0);
+ ASSERT(f(0.0/0.0, 0) == 0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/beqr_f.c b/deps/lightening/tests/beqr_f.c
new file mode 100644
index 0000000..7b5cc27
--- /dev/null
+++ b/deps/lightening/tests/beqr_f.c
@@ -0,0 +1,35 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1));
+
+ jit_reloc_t r = jit_beqr_f(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(float, float) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 0);
+ ASSERT(f(1, 1) == 1);
+
+ ASSERT(f(0, 0.0/0.0) == 0);
+ ASSERT(f(0.0/0.0, 0) == 0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bgei.c b/deps/lightening/tests/bgei.c
new file mode 100644
index 0000000..f30901e
--- /dev/null
+++ b/deps/lightening/tests/bgei.c
@@ -0,0 +1,32 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_reloc_t r = jit_bgei(j, JIT_R0, 0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 1);
+ ASSERT(f(1) == 1);
+ ASSERT(f(-1) == 0);
+
+#if __WORDSIZE == 64
+ ASSERT(f(0xffffffffff) == 1);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bgei_u.c b/deps/lightening/tests/bgei_u.c
new file mode 100644
index 0000000..d61089f
--- /dev/null
+++ b/deps/lightening/tests/bgei_u.c
@@ -0,0 +1,32 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_reloc_t r = jit_bgei_u(j, JIT_R0, 1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0);
+ ASSERT(f(1) == 1);
+ ASSERT(f(-1) == 1);
+
+#if __WORDSIZE == 64
+ ASSERT(f(0xff00000000) == 1);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bger.c b/deps/lightening/tests/bger.c
new file mode 100644
index 0000000..920e820
--- /dev/null
+++ b/deps/lightening/tests/bger.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_bger(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 1);
+#if __WORDSIZE == 64
+ ASSERT(f(0xffffffffff, 1) == 1);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bger_d.c b/deps/lightening/tests/bger_d.c
new file mode 100644
index 0000000..712b118
--- /dev/null
+++ b/deps/lightening/tests/bger_d.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1));
+
+ jit_reloc_t r = jit_bger_d(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(double, double) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 1);
+
+ ASSERT(f(0, 0.0/0.0) == 0);
+ ASSERT(f(0.0/0.0, 0) == 0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bger_f.c b/deps/lightening/tests/bger_f.c
new file mode 100644
index 0000000..b9d5478
--- /dev/null
+++ b/deps/lightening/tests/bger_f.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1));
+
+ jit_reloc_t r = jit_bger_f(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(float, float) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 1);
+
+ ASSERT(f(0, 0.0/0.0) == 0);
+ ASSERT(f(0.0/0.0, 0) == 0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bger_u.c b/deps/lightening/tests/bger_u.c
new file mode 100644
index 0000000..1978765
--- /dev/null
+++ b/deps/lightening/tests/bger_u.c
@@ -0,0 +1,35 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_bger_u(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 0);
+
+#if __WORDSIZE == 64
+ ASSERT(f(0xff00000000, 1) == 1);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bgti.c b/deps/lightening/tests/bgti.c
new file mode 100644
index 0000000..89eecae
--- /dev/null
+++ b/deps/lightening/tests/bgti.c
@@ -0,0 +1,33 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_reloc_t r = jit_bgti(j, JIT_R0, 1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0);
+ ASSERT(f(1) == 0);
+ ASSERT(f(2) == 1);
+ ASSERT(f(-1) == 0);
+
+#if __WORDSIZE == 64
+ ASSERT(f(0xffffffffff) == 1);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bgti_u.c b/deps/lightening/tests/bgti_u.c
new file mode 100644
index 0000000..51bb754
--- /dev/null
+++ b/deps/lightening/tests/bgti_u.c
@@ -0,0 +1,31 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_reloc_t r = jit_bgti_u(j, JIT_R0, 1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0);
+ ASSERT(f(1) == 0);
+ ASSERT(f(-1) == 1);
+#if __WORDSIZE == 64
+ ASSERT(f(0xff00000000) == 1);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bgtr.c b/deps/lightening/tests/bgtr.c
new file mode 100644
index 0000000..c4dcd51
--- /dev/null
+++ b/deps/lightening/tests/bgtr.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_bgtr(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 1);
+#if __WORDSIZE == 64
+ ASSERT(f(0xffffffffff, 1) == 1);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bgtr_d.c b/deps/lightening/tests/bgtr_d.c
new file mode 100644
index 0000000..d3c2436
--- /dev/null
+++ b/deps/lightening/tests/bgtr_d.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1));
+
+ jit_reloc_t r = jit_bgtr_d(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(double, double) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 1);
+
+ ASSERT(f(0, 0.0/0.0) == 0);
+ ASSERT(f(0.0/0.0, 0) == 0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bgtr_f.c b/deps/lightening/tests/bgtr_f.c
new file mode 100644
index 0000000..91cb8c0
--- /dev/null
+++ b/deps/lightening/tests/bgtr_f.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1));
+
+ jit_reloc_t r = jit_bgtr_f(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(float, float) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 1);
+
+ ASSERT(f(0, 0.0/0.0) == 0);
+ ASSERT(f(0.0/0.0, 0) == 0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bgtr_u.c b/deps/lightening/tests/bgtr_u.c
new file mode 100644
index 0000000..34ad257
--- /dev/null
+++ b/deps/lightening/tests/bgtr_u.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_bgtr_u(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 0);
+#if __WORDSIZE == 64
+ ASSERT(f(0xff00000000, 1) == 1);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/blei.c b/deps/lightening/tests/blei.c
new file mode 100644
index 0000000..5725032
--- /dev/null
+++ b/deps/lightening/tests/blei.c
@@ -0,0 +1,31 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_reloc_t r = jit_blei(j, JIT_R0, 0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 1);
+ ASSERT(f(1) == 0);
+ ASSERT(f(-1) == 1);
+#if __WORDSIZE == 64
+ ASSERT(f(0xffffffffff) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/blei_u.c b/deps/lightening/tests/blei_u.c
new file mode 100644
index 0000000..f6862ad
--- /dev/null
+++ b/deps/lightening/tests/blei_u.c
@@ -0,0 +1,31 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_reloc_t r = jit_blei_u(j, JIT_R0, 1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 1);
+ ASSERT(f(1) == 1);
+ ASSERT(f(-1) == 0);
+#if __WORDSIZE == 64
+ ASSERT(f(0xff00000000) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bler.c b/deps/lightening/tests/bler.c
new file mode 100644
index 0000000..0b37785
--- /dev/null
+++ b/deps/lightening/tests/bler.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_bler(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 0);
+#if __WORDSIZE == 64
+ ASSERT(f(0xffffffffff, 1) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bler_d.c b/deps/lightening/tests/bler_d.c
new file mode 100644
index 0000000..507dac5
--- /dev/null
+++ b/deps/lightening/tests/bler_d.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1));
+
+ jit_reloc_t r = jit_bler_d(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(double, double) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 0);
+
+ ASSERT(f(0, 0.0/0.0) == 0);
+ ASSERT(f(0.0/0.0, 0) == 0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bler_f.c b/deps/lightening/tests/bler_f.c
new file mode 100644
index 0000000..191b649
--- /dev/null
+++ b/deps/lightening/tests/bler_f.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1));
+
+ jit_reloc_t r = jit_bler_f(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(float, float) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 0);
+
+ ASSERT(f(0, 0.0/0.0) == 0);
+ ASSERT(f(0.0/0.0, 0) == 0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bler_u.c b/deps/lightening/tests/bler_u.c
new file mode 100644
index 0000000..0830668
--- /dev/null
+++ b/deps/lightening/tests/bler_u.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_bler_u(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 1);
+#if __WORDSIZE == 64
+ ASSERT(f(0xff00000000, 1) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bltgtr_d.c b/deps/lightening/tests/bltgtr_d.c
new file mode 100644
index 0000000..3d8835d
--- /dev/null
+++ b/deps/lightening/tests/bltgtr_d.c
@@ -0,0 +1,36 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1));
+
+ jit_reloc_t r = jit_bltgtr_d(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(double, double) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 1);
+ ASSERT(f(1, 1) == 0);
+
+ ASSERT(f(0, 0.0/0.0) == 0);
+ ASSERT(f(0.0/0.0, 0) == 0);
+ ASSERT(f(0.0/0.0, 0.0/0.0) == 0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bltgtr_f.c b/deps/lightening/tests/bltgtr_f.c
new file mode 100644
index 0000000..fbdbc3b
--- /dev/null
+++ b/deps/lightening/tests/bltgtr_f.c
@@ -0,0 +1,36 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1));
+
+ jit_reloc_t r = jit_bltgtr_f(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(float, float) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 1);
+ ASSERT(f(1, 1) == 0);
+
+ ASSERT(f(0, 0.0/0.0) == 0);
+ ASSERT(f(0.0/0.0, 0) == 0);
+ ASSERT(f(0.0/0.0, 0.0/0.0) == 0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/blti.c b/deps/lightening/tests/blti.c
new file mode 100644
index 0000000..d073337
--- /dev/null
+++ b/deps/lightening/tests/blti.c
@@ -0,0 +1,31 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_reloc_t r = jit_blti(j, JIT_R0, 0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0);
+ ASSERT(f(1) == 0);
+ ASSERT(f(-1) == 1);
+#if __WORDSIZE == 64
+ ASSERT(f(0xffffffffff) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/blti_u.c b/deps/lightening/tests/blti_u.c
new file mode 100644
index 0000000..04a7037
--- /dev/null
+++ b/deps/lightening/tests/blti_u.c
@@ -0,0 +1,31 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_reloc_t r = jit_blti_u(j, JIT_R0, 1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 1);
+ ASSERT(f(1) == 0);
+ ASSERT(f(-1) == 0);
+#if __WORDSIZE == 64
+ ASSERT(f(0xff00000000) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bltr.c b/deps/lightening/tests/bltr.c
new file mode 100644
index 0000000..a928fab
--- /dev/null
+++ b/deps/lightening/tests/bltr.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_bltr(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 0);
+#if __WORDSIZE == 64
+ ASSERT(f(0xffffffffffff, 0) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bltr_d.c b/deps/lightening/tests/bltr_d.c
new file mode 100644
index 0000000..2d62609
--- /dev/null
+++ b/deps/lightening/tests/bltr_d.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1));
+
+ jit_reloc_t r = jit_bltr_d(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(double, double) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 0);
+
+ ASSERT(f(0, 0.0/0.0) == 0);
+ ASSERT(f(0.0/0.0, 0) == 0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bltr_f.c b/deps/lightening/tests/bltr_f.c
new file mode 100644
index 0000000..eebd3da
--- /dev/null
+++ b/deps/lightening/tests/bltr_f.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1));
+
+ jit_reloc_t r = jit_bltr_f(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(float, float) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 0);
+
+ ASSERT(f(0, 0.0/0.0) == 0);
+ ASSERT(f(0.0/0.0, 0) == 0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bltr_u.c b/deps/lightening/tests/bltr_u.c
new file mode 100644
index 0000000..c66f3d5
--- /dev/null
+++ b/deps/lightening/tests/bltr_u.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_bltr_u(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 1);
+#if __WORDSIZE == 64
+ ASSERT(f(0xff00000000, 1) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bmci.c b/deps/lightening/tests/bmci.c
new file mode 100644
index 0000000..e6c355a
--- /dev/null
+++ b/deps/lightening/tests/bmci.c
@@ -0,0 +1,36 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+#if __WORDSIZE == 64
+ jit_reloc_t r = jit_bmci(j, JIT_R0, 0xff00000001);
+#else
+ jit_reloc_t r = jit_bmci(j, JIT_R0, 1);
+#endif
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 1);
+ ASSERT(f(1) == 0);
+ ASSERT(f(-1) == 0);
+ ASSERT(f(2) == 1);
+#if __WORDSIZE == 64
+ ASSERT(f(0xfffffffff0) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bmcr.c b/deps/lightening/tests/bmcr.c
new file mode 100644
index 0000000..64c1ee6
--- /dev/null
+++ b/deps/lightening/tests/bmcr.c
@@ -0,0 +1,38 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_bmcr(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 1);
+ ASSERT(f(1, 1) == 0);
+ ASSERT(f(1, -1) == 0);
+ ASSERT(f(-1, 1) == 0);
+ ASSERT(f(-1, -1) == 0);
+#if __WORDSIZE == 64
+ ASSERT(f(0xffffffffff, 0xff00000000) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bmsi.c b/deps/lightening/tests/bmsi.c
new file mode 100644
index 0000000..d0919f7
--- /dev/null
+++ b/deps/lightening/tests/bmsi.c
@@ -0,0 +1,36 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+#if __WORDSIZE == 64
+ jit_reloc_t r = jit_bmsi(j, JIT_R0, 0xff00000001);
+#else
+ jit_reloc_t r = jit_bmsi(j, JIT_R0, 1);
+#endif
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0);
+ ASSERT(f(1) == 1);
+ ASSERT(f(-1) == 1);
+ ASSERT(f(2) == 0);
+#if __WORDSIZE == 64
+ ASSERT(f(0xfffffffff0) == 1);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bmsr.c b/deps/lightening/tests/bmsr.c
new file mode 100644
index 0000000..b92eb6e
--- /dev/null
+++ b/deps/lightening/tests/bmsr.c
@@ -0,0 +1,38 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_bmsr(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 0);
+ ASSERT(f(1, 1) == 1);
+ ASSERT(f(1, -1) == 1);
+ ASSERT(f(-1, 1) == 1);
+ ASSERT(f(-1, -1) == 1);
+#if __WORDSIZE == 64
+ ASSERT(f(0xfffffffff0, 0xff00000001) == 1);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bnei.c b/deps/lightening/tests/bnei.c
new file mode 100644
index 0000000..ee077e3
--- /dev/null
+++ b/deps/lightening/tests/bnei.c
@@ -0,0 +1,31 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_reloc_t r = jit_bnei(j, JIT_R0, 0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0);
+ ASSERT(f(1) == 1);
+ ASSERT(f(-1) == 1);
+#if __WORDSIZE == 64
+ ASSERT(f(0xff00000000) == 1);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bner.c b/deps/lightening/tests/bner.c
new file mode 100644
index 0000000..7a8cd0f
--- /dev/null
+++ b/deps/lightening/tests/bner.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_bner(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 1);
+#if __WORDSIZE == 64
+ ASSERT(f(0xff00000000, 0x1000000000) == 1);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bner_d.c b/deps/lightening/tests/bner_d.c
new file mode 100644
index 0000000..079fda4
--- /dev/null
+++ b/deps/lightening/tests/bner_d.c
@@ -0,0 +1,36 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1));
+
+ jit_reloc_t r = jit_bner_d(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(double, double) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 1);
+ ASSERT(f(1, 1) == 0);
+
+ ASSERT(f(0, 0.0/0.0) == 1);
+ ASSERT(f(0.0/0.0, 0) == 1);
+ ASSERT(f(0.0/0.0, 0.0/0.0) == 1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bner_f.c b/deps/lightening/tests/bner_f.c
new file mode 100644
index 0000000..011df67
--- /dev/null
+++ b/deps/lightening/tests/bner_f.c
@@ -0,0 +1,36 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1));
+
+ jit_reloc_t r = jit_bner_f(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(float, float) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 1);
+ ASSERT(f(1, 1) == 0);
+
+ ASSERT(f(0, 0.0/0.0) == 1);
+ ASSERT(f(0.0/0.0, 0) == 1);
+ ASSERT(f(0.0/0.0, 0.0/0.0) == 1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/boaddi.c b/deps/lightening/tests/boaddi.c
new file mode 100644
index 0000000..1e47297
--- /dev/null
+++ b/deps/lightening/tests/boaddi.c
@@ -0,0 +1,41 @@
+#include "test.h"
+
+static const jit_word_t overflowed = 0xcabba9e5;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_reloc_t r = jit_boaddi(j, JIT_R0, 1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+ jit_patch_here(j, r);
+ jit_movi(j, JIT_R0, overflowed);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(-1) == 0);
+ ASSERT(f(0) == 1);
+ ASSERT(f(1) == 2);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0x7fffffff) == overflowed);
+ ASSERT(f(0x80000000) == 0x80000001);
+ ASSERT(f(0xffffffff) == 0);
+#else
+ ASSERT(f(0x7fffffffffffffff) == overflowed);
+ ASSERT(f(0x8000000000000000) == 0x8000000000000001);
+ ASSERT(f(0xffffffffffffffff) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/boaddi_u.c b/deps/lightening/tests/boaddi_u.c
new file mode 100644
index 0000000..21c71df
--- /dev/null
+++ b/deps/lightening/tests/boaddi_u.c
@@ -0,0 +1,41 @@
+#include "test.h"
+
+static const jit_word_t overflowed = 0xcabba9e5;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_reloc_t r = jit_boaddi_u(j, JIT_R0, 1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+ jit_patch_here(j, r);
+ jit_movi(j, JIT_R0, overflowed);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(-1) == overflowed);
+ ASSERT(f(0) == 1);
+ ASSERT(f(1) == 2);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0x7fffffff) == 0x80000000);
+ ASSERT(f(0x80000000) == 0x80000001);
+ ASSERT(f(0xffffffff) == overflowed);
+#else
+ ASSERT(f(0x7fffffffffffffff) == 0x8000000000000000);
+ ASSERT(f(0x8000000000000000) == 0x8000000000000001);
+ ASSERT(f(0xffffffffffffffff) == overflowed);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/boaddr.c b/deps/lightening/tests/boaddr.c
new file mode 100644
index 0000000..8bab91e
--- /dev/null
+++ b/deps/lightening/tests/boaddr.c
@@ -0,0 +1,51 @@
+#include "test.h"
+
+static const jit_word_t overflowed = 0xcabba9e5;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_boaddr(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+ jit_patch_here(j, r);
+ jit_movi(j, JIT_R0, overflowed);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(1, 1) == 2);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0xffffffff, 0xffffffff) == -2);
+ ASSERT(f(0x7fffffff, 0) == 0x7fffffff);
+ ASSERT(f(0x7fffffff, 1) == overflowed);
+ ASSERT(f(0x7fffffff, 0x7fffffff) == overflowed);
+ ASSERT(f(0x7fffffff, 0x80000000) == -1);
+ ASSERT(f(0x80000000, 0x80000000) == overflowed);
+#else
+ ASSERT(f(0xffffffff, 0xffffffff) == 0xffffffffull + 0xffffffffull);
+ ASSERT(f(0x7fffffff, 1) == 0x80000000);
+ ASSERT(f(0x7fffffff, 0x7fffffff) == 0x7fffffffull + 0x7fffffffull);
+ ASSERT(f(0x7fffffff, 0x80000000) == 0xffffffff);
+ ASSERT(f(0x80000000, 0x80000000) == 0x100000000);
+ ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == -2);
+ ASSERT(f(0x7fffffffffffffff, 1) == overflowed);
+ ASSERT(f(0x7fffffffffffffff, 0x7fffffffffffffff) == overflowed);
+ ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == -1);
+ ASSERT(f(0x8000000000000000, 0x8000000000000000) == overflowed);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/boaddr_u.c b/deps/lightening/tests/boaddr_u.c
new file mode 100644
index 0000000..f4bacde
--- /dev/null
+++ b/deps/lightening/tests/boaddr_u.c
@@ -0,0 +1,51 @@
+#include "test.h"
+
+static const jit_word_t overflowed = 0xcabba9e5;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_boaddr_u(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+ jit_patch_here(j, r);
+ jit_movi(j, JIT_R0, overflowed);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(1, 1) == 2);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0xffffffff, 0xffffffff) == overflowed);
+ ASSERT(f(0x7fffffff, 0) == 0x7fffffff);
+ ASSERT(f(0x7fffffff, 1) == 0x80000000);
+ ASSERT(f(0x7fffffff, 0x7fffffff) == 0x7fffffffu + 0x7fffffffu);
+ ASSERT(f(0x7fffffff, 0x80000000) == 0xffffffff);
+ ASSERT(f(0x80000000, 0x80000000) == overflowed);
+#else
+ ASSERT(f(0xffffffff, 0xffffffff) == 0xffffffffull + 0xffffffffull);
+ ASSERT(f(0x7fffffff, 1) == 0x80000000);
+ ASSERT(f(0x7fffffff, 0x7fffffff) == 0x7fffffffull + 0x7fffffffull);
+ ASSERT(f(0x7fffffff, 0x80000000) == 0xffffffff);
+ ASSERT(f(0x80000000, 0x80000000) == 0x100000000);
+ ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == overflowed);
+ ASSERT(f(0x7fffffffffffffff, 1) == 0x8000000000000000);
+ ASSERT(f(0x7fffffffffffffff, 0x7fffffffffffffff) == -2);
+ ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == -1);
+ ASSERT(f(0x8000000000000000, 0x8000000000000000) == overflowed);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bordr_d.c b/deps/lightening/tests/bordr_d.c
new file mode 100644
index 0000000..9227f22
--- /dev/null
+++ b/deps/lightening/tests/bordr_d.c
@@ -0,0 +1,36 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1));
+
+ jit_reloc_t r = jit_bordr_d(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(double, double) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 1);
+ ASSERT(f(1, 1) == 1);
+
+ ASSERT(f(0, 0.0/0.0) == 0);
+ ASSERT(f(0.0/0.0, 0) == 0);
+ ASSERT(f(0.0/0.0, 0.0/0.0) == 0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bordr_f.c b/deps/lightening/tests/bordr_f.c
new file mode 100644
index 0000000..25808e5
--- /dev/null
+++ b/deps/lightening/tests/bordr_f.c
@@ -0,0 +1,36 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1));
+
+ jit_reloc_t r = jit_bordr_f(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(float, float) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 1);
+ ASSERT(f(1, 1) == 1);
+
+ ASSERT(f(0, 0.0/0.0) == 0);
+ ASSERT(f(0.0/0.0, 0) == 0);
+ ASSERT(f(0.0/0.0, 0.0/0.0) == 0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bosubi.c b/deps/lightening/tests/bosubi.c
new file mode 100644
index 0000000..f10d90a
--- /dev/null
+++ b/deps/lightening/tests/bosubi.c
@@ -0,0 +1,41 @@
+#include "test.h"
+
+static const jit_word_t overflowed = 0xcabba9e5;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_reloc_t r = jit_bosubi(j, JIT_R0, 1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+ jit_patch_here(j, r);
+ jit_movi(j, JIT_R0, overflowed);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(-1) == -2);
+ ASSERT(f(0) == -1);
+ ASSERT(f(1) == 0);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0x7fffffff) == 0x7ffffffe);
+ ASSERT(f(0x80000000) == overflowed);
+ ASSERT(f(0x80000001) == 0x80000000);
+#else
+ ASSERT(f(0x7fffffffffffffff) == 0x7ffffffffffffffe);
+ ASSERT(f(0x8000000000000000) == overflowed);
+ ASSERT(f(0x8000000000000001) == 0x8000000000000000);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bosubi_u.c b/deps/lightening/tests/bosubi_u.c
new file mode 100644
index 0000000..50af6ad
--- /dev/null
+++ b/deps/lightening/tests/bosubi_u.c
@@ -0,0 +1,37 @@
+#include "test.h"
+
+static const jit_word_t overflowed = 0xcabba9e5;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_reloc_t r = jit_bosubi_u(j, JIT_R0, 1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+ jit_patch_here(j, r);
+ jit_movi(j, JIT_R0, overflowed);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(-1) == -2);
+ ASSERT(f(0) == overflowed);
+ ASSERT(f(1) == 0);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0x80000000) == 0x7fffffff);
+#else
+ ASSERT(f(0x8000000000000000) == 0x7fffffffffffffff);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bosubr.c b/deps/lightening/tests/bosubr.c
new file mode 100644
index 0000000..cf68ad6
--- /dev/null
+++ b/deps/lightening/tests/bosubr.c
@@ -0,0 +1,48 @@
+#include "test.h"
+
+static const jit_word_t overflowed = 0xcabba9e5;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_bosubr(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+ jit_patch_here(j, r);
+ jit_movi(j, JIT_R0, overflowed);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == -1);
+ ASSERT(f(1, 1) == 0);
+ ASSERT(f(1, -1) == 2);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0xffffffff, 0xffffffff) == 0);
+ ASSERT(f(0x7fffffff, 0) == 0x7fffffff);
+ ASSERT(f(0x7fffffff, 1) == 0x7ffffffe);
+ ASSERT(f(0x7fffffff, 0x7fffffff) == 0);
+ ASSERT(f(0x80000000, 0x7fffffff) == overflowed);
+ ASSERT(f(0x7fffffff, 0x80000000) == overflowed);
+ ASSERT(f(0x80000000, 0x80000000) == 0);
+#else
+ ASSERT(f(0x7fffffffffffffff, 0x7fffffffffffffff) == 0);
+ ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == overflowed);
+ ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == overflowed);
+ ASSERT(f(0x8000000000000000, 0x8000000000000000) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bosubr_u.c b/deps/lightening/tests/bosubr_u.c
new file mode 100644
index 0000000..b5e6b39
--- /dev/null
+++ b/deps/lightening/tests/bosubr_u.c
@@ -0,0 +1,47 @@
+#include "test.h"
+
+static const jit_word_t overflowed = 0xcabba9e5;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_bosubr_u(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+ jit_patch_here(j, r);
+ jit_movi(j, JIT_R0, overflowed);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(1, 1) == 0);
+ ASSERT(f(0, 1) == overflowed);
+ ASSERT(f(1, 0) == 1);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0xffffffff, 0xffffffff) == 0);
+ ASSERT(f(0x7fffffff, 0) == 0x7fffffff);
+ ASSERT(f(0x7fffffff, 1) == 0x7ffffffe);
+ ASSERT(f(0x7fffffff, 0x7fffffff) == 0);
+ ASSERT(f(0x7fffffff, 0x80000000) == overflowed);
+ ASSERT(f(0x80000000, 0x80000000) == 0);
+#else
+ ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 0);
+ ASSERT(f(0x7fffffffffffffff, 0x7fffffffffffffff) == 0);
+ ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == overflowed);
+ ASSERT(f(0x8000000000000000, 0x8000000000000000) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bswapr_ui.c b/deps/lightening/tests/bswapr_ui.c
new file mode 100644
index 0000000..c1eb9fd
--- /dev/null
+++ b/deps/lightening/tests/bswapr_ui.c
@@ -0,0 +1,28 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_bswapr_ui(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0);
+ ASSERT(f(0x12345678) == 0x78563412);
+#if __WORDSIZE > 32
+ ASSERT(f(0xff12345678) == 0x78563412);
+ ASSERT(f(0xff00000000) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bswapr_ul.c b/deps/lightening/tests/bswapr_ul.c
new file mode 100644
index 0000000..a3a11b3
--- /dev/null
+++ b/deps/lightening/tests/bswapr_ul.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+#if __WORDSIZE > 32
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_bswapr_ul(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0);
+ ASSERT(f(0x12345678) == 0x7856341200000000);
+ ASSERT(f(0xff12345678) == 0x78563412ff000000);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bswapr_us.c b/deps/lightening/tests/bswapr_us.c
new file mode 100644
index 0000000..0ff777e
--- /dev/null
+++ b/deps/lightening/tests/bswapr_us.c
@@ -0,0 +1,24 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_bswapr_us(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0);
+ ASSERT(f(0x12345678) == 0x7856);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/buneqr_d.c b/deps/lightening/tests/buneqr_d.c
new file mode 100644
index 0000000..1d08e32
--- /dev/null
+++ b/deps/lightening/tests/buneqr_d.c
@@ -0,0 +1,35 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1));
+
+ jit_reloc_t r = jit_buneqr_d(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(double, double) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 0);
+ ASSERT(f(1, 1) == 1);
+
+ ASSERT(f(0, 0.0/0.0) == 1);
+ ASSERT(f(0.0/0.0, 0) == 1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/buneqr_f.c b/deps/lightening/tests/buneqr_f.c
new file mode 100644
index 0000000..49d9062
--- /dev/null
+++ b/deps/lightening/tests/buneqr_f.c
@@ -0,0 +1,35 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1));
+
+ jit_reloc_t r = jit_buneqr_f(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(float, float) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 0);
+ ASSERT(f(1, 1) == 1);
+
+ ASSERT(f(0, 0.0/0.0) == 1);
+ ASSERT(f(0.0/0.0, 0) == 1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bunger_d.c b/deps/lightening/tests/bunger_d.c
new file mode 100644
index 0000000..57888af
--- /dev/null
+++ b/deps/lightening/tests/bunger_d.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1));
+
+ jit_reloc_t r = jit_bunger_d(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(double, double) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 1);
+
+ ASSERT(f(0, 0.0/0.0) == 1);
+ ASSERT(f(0.0/0.0, 0) == 1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bunger_f.c b/deps/lightening/tests/bunger_f.c
new file mode 100644
index 0000000..f3103dc
--- /dev/null
+++ b/deps/lightening/tests/bunger_f.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1));
+
+ jit_reloc_t r = jit_bunger_f(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(float, float) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 1);
+
+ ASSERT(f(0, 0.0/0.0) == 1);
+ ASSERT(f(0.0/0.0, 0) == 1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bungtr_d.c b/deps/lightening/tests/bungtr_d.c
new file mode 100644
index 0000000..649d61f
--- /dev/null
+++ b/deps/lightening/tests/bungtr_d.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1));
+
+ jit_reloc_t r = jit_bungtr_d(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(double, double) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 1);
+
+ ASSERT(f(0, 0.0/0.0) == 1);
+ ASSERT(f(0.0/0.0, 0) == 1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bungtr_f.c b/deps/lightening/tests/bungtr_f.c
new file mode 100644
index 0000000..fea66dc
--- /dev/null
+++ b/deps/lightening/tests/bungtr_f.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1));
+
+ jit_reloc_t r = jit_bungtr_f(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(float, float) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 1);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 1);
+
+ ASSERT(f(0, 0.0/0.0) == 1);
+ ASSERT(f(0.0/0.0, 0) == 1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bunler_d.c b/deps/lightening/tests/bunler_d.c
new file mode 100644
index 0000000..e59382c
--- /dev/null
+++ b/deps/lightening/tests/bunler_d.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1));
+
+ jit_reloc_t r = jit_bunler_d(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(double, double) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 0);
+
+ ASSERT(f(0, 0.0/0.0) == 1);
+ ASSERT(f(0.0/0.0, 0) == 1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bunler_f.c b/deps/lightening/tests/bunler_f.c
new file mode 100644
index 0000000..fddce6b
--- /dev/null
+++ b/deps/lightening/tests/bunler_f.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1));
+
+ jit_reloc_t r = jit_bunler_f(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(float, float) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 1);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 0);
+
+ ASSERT(f(0, 0.0/0.0) == 1);
+ ASSERT(f(0.0/0.0, 0) == 1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bunltr_d.c b/deps/lightening/tests/bunltr_d.c
new file mode 100644
index 0000000..2ab0051
--- /dev/null
+++ b/deps/lightening/tests/bunltr_d.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1));
+
+ jit_reloc_t r = jit_bunltr_d(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(double, double) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 0);
+
+ ASSERT(f(0, 0.0/0.0) == 1);
+ ASSERT(f(0.0/0.0, 0) == 1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bunltr_f.c b/deps/lightening/tests/bunltr_f.c
new file mode 100644
index 0000000..ade228b
--- /dev/null
+++ b/deps/lightening/tests/bunltr_f.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1));
+
+ jit_reloc_t r = jit_bunltr_f(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(float, float) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 1);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 1);
+ ASSERT(f(0, -1) == 0);
+
+ ASSERT(f(0, 0.0/0.0) == 1);
+ ASSERT(f(0.0/0.0, 0) == 1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bunordr_d.c b/deps/lightening/tests/bunordr_d.c
new file mode 100644
index 0000000..6b04f0e
--- /dev/null
+++ b/deps/lightening/tests/bunordr_d.c
@@ -0,0 +1,36 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1));
+
+ jit_reloc_t r = jit_bunordr_d(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(double, double) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 0);
+ ASSERT(f(1, 1) == 0);
+
+ ASSERT(f(0, 0.0/0.0) == 1);
+ ASSERT(f(0.0/0.0, 0) == 1);
+ ASSERT(f(0.0/0.0, 0.0/0.0) == 1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bunordr_f.c b/deps/lightening/tests/bunordr_f.c
new file mode 100644
index 0000000..ce4fc7b
--- /dev/null
+++ b/deps/lightening/tests/bunordr_f.c
@@ -0,0 +1,36 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1));
+
+ jit_reloc_t r = jit_bunordr_f(j, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 0);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_reti(j, 1);
+
+ jit_word_t (*f)(float, float) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == 0);
+ ASSERT(f(1, 0) == 0);
+ ASSERT(f(-1, 0) == 0);
+ ASSERT(f(0, -1) == 0);
+ ASSERT(f(1, 1) == 0);
+
+ ASSERT(f(0, 0.0/0.0) == 1);
+ ASSERT(f(0.0/0.0, 0) == 1);
+ ASSERT(f(0.0/0.0, 0.0/0.0) == 1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bxaddi.c b/deps/lightening/tests/bxaddi.c
new file mode 100644
index 0000000..6e872da
--- /dev/null
+++ b/deps/lightening/tests/bxaddi.c
@@ -0,0 +1,39 @@
+#include "test.h"
+
+static const jit_word_t overflowed = 0xcabba9e5;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_reloc_t r = jit_bxaddi(j, JIT_R0, 1);
+ jit_movi(j, JIT_R0, overflowed);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(-1) == 0);
+ ASSERT(f(0) == 1);
+ ASSERT(f(1) == 2);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0x7fffffff) == overflowed);
+ ASSERT(f(0x80000000) == 0x80000001);
+ ASSERT(f(0xffffffff) == 0);
+#else
+ ASSERT(f(0x7fffffffffffffff) == overflowed);
+ ASSERT(f(0x8000000000000000) == 0x8000000000000001);
+ ASSERT(f(0xffffffffffffffff) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bxaddi_u.c b/deps/lightening/tests/bxaddi_u.c
new file mode 100644
index 0000000..e71aeb7
--- /dev/null
+++ b/deps/lightening/tests/bxaddi_u.c
@@ -0,0 +1,39 @@
+#include "test.h"
+
+static const jit_word_t overflowed = 0xcabba9e5;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_reloc_t r = jit_bxaddi_u(j, JIT_R0, 1);
+ jit_movi(j, JIT_R0, overflowed);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(-1) == overflowed);
+ ASSERT(f(0) == 1);
+ ASSERT(f(1) == 2);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0x7fffffff) == 0x80000000);
+ ASSERT(f(0x80000000) == 0x80000001);
+ ASSERT(f(0xffffffff) == overflowed);
+#else
+ ASSERT(f(0x7fffffffffffffff) == 0x8000000000000000);
+ ASSERT(f(0x8000000000000000) == 0x8000000000000001);
+ ASSERT(f(0xffffffffffffffff) == overflowed);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bxaddr.c b/deps/lightening/tests/bxaddr.c
new file mode 100644
index 0000000..c1f6f23
--- /dev/null
+++ b/deps/lightening/tests/bxaddr.c
@@ -0,0 +1,49 @@
+#include "test.h"
+
+static const jit_word_t overflowed = 0xcabba9e5;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_bxaddr(j, JIT_R0, JIT_R1);
+ jit_movi(j, JIT_R0, overflowed);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(1, 1) == 2);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0xffffffff, 0xffffffff) == -2);
+ ASSERT(f(0x7fffffff, 0) == 0x7fffffff);
+ ASSERT(f(0x7fffffff, 1) == overflowed);
+ ASSERT(f(0x7fffffff, 0x7fffffff) == overflowed);
+ ASSERT(f(0x7fffffff, 0x80000000) == -1);
+ ASSERT(f(0x80000000, 0x80000000) == overflowed);
+#else
+ ASSERT(f(0xffffffff, 0xffffffff) == 0xffffffffull + 0xffffffffull);
+ ASSERT(f(0x7fffffff, 1) == 0x80000000);
+ ASSERT(f(0x7fffffff, 0x7fffffff) == 0x7fffffffull + 0x7fffffffull);
+ ASSERT(f(0x7fffffff, 0x80000000) == 0xffffffff);
+ ASSERT(f(0x80000000, 0x80000000) == 0x100000000);
+ ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == -2);
+ ASSERT(f(0x7fffffffffffffff, 1) == overflowed);
+ ASSERT(f(0x7fffffffffffffff, 0x7fffffffffffffff) == overflowed);
+ ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == -1);
+ ASSERT(f(0x8000000000000000, 0x8000000000000000) == overflowed);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bxaddr_u.c b/deps/lightening/tests/bxaddr_u.c
new file mode 100644
index 0000000..d674f82
--- /dev/null
+++ b/deps/lightening/tests/bxaddr_u.c
@@ -0,0 +1,49 @@
+#include "test.h"
+
+static const jit_word_t overflowed = 0xcabba9e5;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_bxaddr_u(j, JIT_R0, JIT_R1);
+ jit_movi(j, JIT_R0, overflowed);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(1, 1) == 2);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0xffffffff, 0xffffffff) == overflowed);
+ ASSERT(f(0x7fffffff, 0) == 0x7fffffff);
+ ASSERT(f(0x7fffffff, 1) == 0x80000000);
+ ASSERT(f(0x7fffffff, 0x7fffffff) == 0x7fffffffu + 0x7fffffffu);
+ ASSERT(f(0x7fffffff, 0x80000000) == 0xffffffff);
+ ASSERT(f(0x80000000, 0x80000000) == overflowed);
+#else
+ ASSERT(f(0xffffffff, 0xffffffff) == 0xffffffffull + 0xffffffffull);
+ ASSERT(f(0x7fffffff, 1) == 0x80000000);
+ ASSERT(f(0x7fffffff, 0x7fffffff) == 0x7fffffffull + 0x7fffffffull);
+ ASSERT(f(0x7fffffff, 0x80000000) == 0xffffffff);
+ ASSERT(f(0x80000000, 0x80000000) == 0x100000000);
+ ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == overflowed);
+ ASSERT(f(0x7fffffffffffffff, 1) == 0x8000000000000000);
+ ASSERT(f(0x7fffffffffffffff, 0x7fffffffffffffff) == -2);
+ ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == -1);
+ ASSERT(f(0x8000000000000000, 0x8000000000000000) == overflowed);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bxsubi.c b/deps/lightening/tests/bxsubi.c
new file mode 100644
index 0000000..1b642c7
--- /dev/null
+++ b/deps/lightening/tests/bxsubi.c
@@ -0,0 +1,39 @@
+#include "test.h"
+
+static const jit_word_t overflowed = 0xcabba9e5;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_reloc_t r = jit_bxsubi(j, JIT_R0, 1);
+ jit_movi(j, JIT_R0, overflowed);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(-1) == -2);
+ ASSERT(f(0) == -1);
+ ASSERT(f(1) == 0);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0x7fffffff) == 0x7ffffffe);
+ ASSERT(f(0x80000000) == overflowed);
+ ASSERT(f(0x80000001) == 0x80000000);
+#else
+ ASSERT(f(0x7fffffffffffffff) == 0x7ffffffffffffffe);
+ ASSERT(f(0x8000000000000000) == overflowed);
+ ASSERT(f(0x8000000000000001) == 0x8000000000000000);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bxsubi_u.c b/deps/lightening/tests/bxsubi_u.c
new file mode 100644
index 0000000..1345bd2
--- /dev/null
+++ b/deps/lightening/tests/bxsubi_u.c
@@ -0,0 +1,35 @@
+#include "test.h"
+
+static const jit_word_t overflowed = 0xcabba9e5;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_reloc_t r = jit_bxsubi_u(j, JIT_R0, 1);
+ jit_movi(j, JIT_R0, overflowed);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(-1) == -2);
+ ASSERT(f(0) == overflowed);
+ ASSERT(f(1) == 0);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0x80000000) == 0x7fffffff);
+#else
+ ASSERT(f(0x8000000000000000) == 0x7fffffffffffffff);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bxsubr.c b/deps/lightening/tests/bxsubr.c
new file mode 100644
index 0000000..d40d182
--- /dev/null
+++ b/deps/lightening/tests/bxsubr.c
@@ -0,0 +1,46 @@
+#include "test.h"
+
+static const jit_word_t overflowed = 0xcabba9e5;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_bxsubr(j, JIT_R0, JIT_R1);
+ jit_movi(j, JIT_R0, overflowed);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(0, 1) == -1);
+ ASSERT(f(1, 1) == 0);
+ ASSERT(f(1, -1) == 2);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0xffffffff, 0xffffffff) == 0);
+ ASSERT(f(0x7fffffff, 0) == 0x7fffffff);
+ ASSERT(f(0x7fffffff, 1) == 0x7ffffffe);
+ ASSERT(f(0x7fffffff, 0x7fffffff) == 0);
+ ASSERT(f(0x80000000, 0x7fffffff) == overflowed);
+ ASSERT(f(0x7fffffff, 0x80000000) == overflowed);
+ ASSERT(f(0x80000000, 0x80000000) == 0);
+#else
+ ASSERT(f(0x7fffffffffffffff, 0x7fffffffffffffff) == 0);
+ ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == overflowed);
+ ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == overflowed);
+ ASSERT(f(0x8000000000000000, 0x8000000000000000) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/bxsubr_u.c b/deps/lightening/tests/bxsubr_u.c
new file mode 100644
index 0000000..54a8d28
--- /dev/null
+++ b/deps/lightening/tests/bxsubr_u.c
@@ -0,0 +1,45 @@
+#include "test.h"
+
+static const jit_word_t overflowed = 0xcabba9e5;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_reloc_t r = jit_bxsubr_u(j, JIT_R0, JIT_R1);
+ jit_movi(j, JIT_R0, overflowed);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0, 0) == 0);
+ ASSERT(f(1, 1) == 0);
+ ASSERT(f(0, 1) == overflowed);
+ ASSERT(f(1, 0) == 1);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0xffffffff, 0xffffffff) == 0);
+ ASSERT(f(0x7fffffff, 0) == 0x7fffffff);
+ ASSERT(f(0x7fffffff, 1) == 0x7ffffffe);
+ ASSERT(f(0x7fffffff, 0x7fffffff) == 0);
+ ASSERT(f(0x7fffffff, 0x80000000) == overflowed);
+ ASSERT(f(0x80000000, 0x80000000) == 0);
+#else
+ ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 0);
+ ASSERT(f(0x7fffffffffffffff, 0x7fffffffffffffff) == 0);
+ ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == overflowed);
+ ASSERT(f(0x8000000000000000, 0x8000000000000000) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/call_10.c b/deps/lightening/tests/call_10.c
new file mode 100644
index 0000000..d99bcb8
--- /dev/null
+++ b/deps/lightening/tests/call_10.c
@@ -0,0 +1,54 @@
+#include "test.h"
+
+static int32_t f(int32_t a, int32_t b, int32_t c, int32_t d, int32_t e,
+ int32_t f, int32_t g, int32_t h, int32_t i, int32_t j) {
+ ASSERT(a == 0);
+ ASSERT(b == 1);
+ ASSERT(c == 2);
+ ASSERT(d == 3);
+ ASSERT(e == 4);
+ ASSERT(f == 5);
+ ASSERT(g == 6);
+ ASSERT(h == 7);
+ ASSERT(i == 8);
+ ASSERT(j == 9);
+ return 42;
+}
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0));
+
+ jit_operand_t args[10] = {
+ jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 0 * sizeof(int32_t)),
+ jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 1 * sizeof(int32_t)),
+ jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 2 * sizeof(int32_t)),
+ jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 3 * sizeof(int32_t)),
+ jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 4 * sizeof(int32_t)),
+ jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 5 * sizeof(int32_t)),
+ jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 6 * sizeof(int32_t)),
+ jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 7 * sizeof(int32_t)),
+ jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 8 * sizeof(int32_t)),
+ jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 9 * sizeof(int32_t))
+ };
+ jit_calli(j, f, 10, args);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ int32_t (*f)(int32_t*) = ret;
+
+ int32_t iargs[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+ ASSERT(f(iargs) == 42);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/call_double.c b/deps/lightening/tests/call_double.c
new file mode 100644
index 0000000..2aad1d0
--- /dev/null
+++ b/deps/lightening/tests/call_double.c
@@ -0,0 +1,38 @@
+#include "test.h"
+
+static double f(int32_t a, double b) {
+ return b + a;
+}
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1));
+
+ jit_operand_t args[2] = {
+ jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 0),
+ jit_operand_mem(JIT_OPERAND_ABI_DOUBLE, JIT_R1, 0)
+ };
+ jit_calli(j, f, 2, args);
+ jit_retval_d(j, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_d(j, JIT_F0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ double (*f)(int32_t*, double*) = ret;
+
+ double d = 22.0f;
+ int32_t i = 20;
+ ASSERT(f(&i, &d) == 42.0f);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/call_float.c b/deps/lightening/tests/call_float.c
new file mode 100644
index 0000000..e9bbd71
--- /dev/null
+++ b/deps/lightening/tests/call_float.c
@@ -0,0 +1,38 @@
+#include "test.h"
+
+static float f(int32_t a, float b) {
+ return b + a;
+}
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1));
+
+ jit_operand_t args[2] = {
+ jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 0),
+ jit_operand_mem(JIT_OPERAND_ABI_FLOAT, JIT_R1, 0)
+ };
+ jit_calli(j, f, 2, args);
+ jit_retval_f(j, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_f(j, JIT_F0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ float (*f)(int32_t*, float*) = ret;
+
+ float d = 22.0f;
+ int32_t i = 20;
+ ASSERT(f(&i, &d) == 42.0f);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/callee_9.c b/deps/lightening/tests/callee_9.c
new file mode 100644
index 0000000..b7f1a46
--- /dev/null
+++ b/deps/lightening/tests/callee_9.c
@@ -0,0 +1,68 @@
+#include "test.h"
+
+struct args
+{
+ int8_t a;
+ int16_t b;
+ int32_t c;
+ jit_word_t d;
+ uint16_t e;
+ float f;
+ double g;
+ float h;
+};
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 3, 0, 0);
+
+ jit_operand_t args[9] = {
+ jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr(JIT_OPERAND_ABI_INT8, JIT_R1),
+ jit_operand_gpr(JIT_OPERAND_ABI_INT16, JIT_R2),
+ jit_operand_gpr(JIT_OPERAND_ABI_INT32, JIT_V0),
+ jit_operand_gpr(JIT_OPERAND_ABI_WORD, JIT_V1),
+ jit_operand_gpr(JIT_OPERAND_ABI_UINT16, JIT_V2),
+ jit_operand_fpr(JIT_OPERAND_ABI_FLOAT, JIT_F0),
+ jit_operand_fpr(JIT_OPERAND_ABI_DOUBLE, JIT_F1),
+ jit_operand_fpr(JIT_OPERAND_ABI_FLOAT, JIT_F2),
+ };
+ jit_load_args(j, 9, args);
+ jit_stxi_c(j, offsetof(struct args, a), JIT_R0, JIT_R1); // a
+ jit_stxi_s(j, offsetof(struct args, b), JIT_R0, JIT_R2); // b
+ jit_stxi_i(j, offsetof(struct args, c), JIT_R0, JIT_V0); // c
+ jit_stxi(j, offsetof(struct args, d), JIT_R0, JIT_V1); // d
+ jit_stxi_s(j, offsetof(struct args, e), JIT_R0, JIT_V2); // e
+ jit_stxi_f(j, offsetof(struct args, f), JIT_R0, JIT_F0); // f
+ jit_stxi_d(j, offsetof(struct args, g), JIT_R0, JIT_F1); // g
+ jit_stxi_f(j, offsetof(struct args, h), JIT_R0, JIT_F2); // h
+
+ jit_leave_jit_abi(j, 3, 0, align);
+ jit_retr(j, JIT_R0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ struct args* (*f)(struct args*, int8_t, int16_t, int32_t, jit_word_t,
+ uint16_t, float, double, float) = ret;
+
+ struct args in = { 0, 1, 2, 3, 4, 5, 6, 7 };
+ struct args out;
+ ASSERT(f(&out, in.a, in.b, in.c, in.d, in.e, in.f, in.g, in.h) == &out);
+ ASSERT(in.a == out.a);
+ ASSERT(in.b == out.b);
+ ASSERT(in.c == out.c);
+ ASSERT(in.d == out.d);
+ ASSERT(in.e == out.e);
+ ASSERT(in.f == out.f);
+ ASSERT(in.g == out.g);
+ ASSERT(in.h == out.h);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/cas_atomic.c b/deps/lightening/tests/cas_atomic.c
new file mode 100644
index 0000000..11c9a22
--- /dev/null
+++ b/deps/lightening/tests/cas_atomic.c
@@ -0,0 +1,33 @@
+#include "test.h"
+
+static long data[] = { 0x12121212, 0x00000000, 0x34343434 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_3(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_INT32, JIT_R1),
+ jit_operand_gpr (JIT_OPERAND_ABI_INT32, JIT_R2));
+
+ jit_cas_atomic(j, JIT_R0, JIT_R0, JIT_R1, JIT_R2);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(void*, int32_t, int32_t) = jit_end(j, NULL);
+
+ ASSERT(data[0] == 0x12121212);
+ ASSERT(data[1] == 0x00);
+ ASSERT(data[2] == 0x34343434);
+ f(&data[1], 0, 0x0f0f0f0f);
+ ASSERT(data[0] == 0x12121212);
+ ASSERT(data[1] == 0x0f0f0f0f);
+ ASSERT(data[2] == 0x34343434);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/comr.c b/deps/lightening/tests/comr.c
new file mode 100644
index 0000000..c2e7d18
--- /dev/null
+++ b/deps/lightening/tests/comr.c
@@ -0,0 +1,41 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_comr(j, JIT_R0, JIT_R0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0) == 0xffffffff);
+ ASSERT(f(1) == 0xfffffffe);
+ ASSERT(f(0xffffffff) == 0);
+ ASSERT(f(0x80000000) == 0x7fffffff);
+ ASSERT(f(0x7fffffff) == 0x80000000);
+ ASSERT(f(0x80000001) == 0x7ffffffe);
+#else
+ ASSERT(f(0) == 0xffffffffffffffff);
+ ASSERT(f(1) == 0xfffffffffffffffe);
+ ASSERT(f(0xffffffff) == 0xffffffff00000000);
+ ASSERT(f(0x80000000) == 0xffffffff7fffffff);
+ ASSERT(f(0x7fffffff) == 0xffffffff80000000);
+ ASSERT(f(0x80000001) == 0xffffffff7ffffffe);
+ ASSERT(f(0xffffffffffffffff) == 0);
+ ASSERT(f(0x8000000000000000) == 0x7fffffffffffffff);
+ ASSERT(f(0x7fffffffffffffff) == 0x8000000000000000);
+ ASSERT(f(0x8000000000000001) == 0x7ffffffffffffffe);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/divr.c b/deps/lightening/tests/divr.c
new file mode 100644
index 0000000..399d70d
--- /dev/null
+++ b/deps/lightening/tests/divr.c
@@ -0,0 +1,60 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_divr(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = ret;
+
+ ASSERT(f(0x7fffffff, 1) == 0x7fffffff);
+ ASSERT(f(1, 0x7fffffff) == 0);
+ ASSERT(f(0x80000000, 1) == 0x80000000);
+ ASSERT(f(1, 0x80000000) == 0);
+ ASSERT(f(0x7fffffff, 2) == 0x3fffffff);
+ ASSERT(f(2, 0x7fffffff) == 0);
+ ASSERT(f(2, 0x80000000) == 0);
+ ASSERT(f(0x7fffffff, 0x80000000) == 0);
+ ASSERT(f(0, 0x7fffffff) == 0);
+ ASSERT(f(0xffffffff, 0xffffffff) == 1);
+#if __WORDSIZE == 32
+ ASSERT(f(0x80000000, 2) == 0xc0000000);
+ ASSERT(f(0x80000000, 0x7fffffff) == 0xffffffff);
+ ASSERT(f(0x7fffffff, 0xffffffff) == 0x80000001);
+ ASSERT(f(0xffffffff, 0x7fffffff) == 0);
+#else
+ ASSERT(f(0x80000000, 2) == 0x40000000);
+ ASSERT(f(0x80000000, 0x7fffffff) == 1);
+ ASSERT(f(0x7fffffff, 0xffffffff) == 0);
+ ASSERT(f(0xffffffff, 0x7fffffff) == 2);
+ ASSERT(f(0x7fffffffffffffff, 1) == 0x7fffffffffffffff);
+ ASSERT(f(1, 0x7fffffffffffffff) == 0);
+ ASSERT(f(0x8000000000000000, 1) == 0x8000000000000000);
+ ASSERT(f(1, 0x8000000000000000) == 0);
+ ASSERT(f(0x7fffffffffffffff, 2) == 0x3fffffffffffffff);
+ ASSERT(f(2, 0x7fffffffffffffff) == 0);
+ ASSERT(f(0x8000000000000000, 2) == 0xc000000000000000);
+ ASSERT(f(2, 0x8000000000000000) == 0);
+ ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == 0);
+ ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == 0xffffffffffffffff);
+ ASSERT(f(0x7fffffffffffffff, 0xffffffffffffffff) == 0x8000000000000001);
+ ASSERT(f(0xffffffffffffffff, 0x7fffffffffffffff) == 0);
+ ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 1);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/divr_d.c b/deps/lightening/tests/divr_d.c
new file mode 100644
index 0000000..9d21cb5
--- /dev/null
+++ b/deps/lightening/tests/divr_d.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1));
+
+ jit_divr_d(j, JIT_F0, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_d(j, JIT_F0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ double (*f)(double, double) = ret;
+ ASSERT(f(-0.5f, 0.5f) == -1.0f);
+ ASSERT(f(1.25f, 0.5f) == 2.5f);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/divr_f.c b/deps/lightening/tests/divr_f.c
new file mode 100644
index 0000000..de519dc
--- /dev/null
+++ b/deps/lightening/tests/divr_f.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1));
+
+ jit_divr_f(j, JIT_F0, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_f(j, JIT_F0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ float (*f)(float, float) = ret;
+ ASSERT(f(-0.5f, 0.5f) == -1.0f);
+ ASSERT(f(1.25f, 0.5f) == 2.5f);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/divr_u.c b/deps/lightening/tests/divr_u.c
new file mode 100644
index 0000000..b8305f7
--- /dev/null
+++ b/deps/lightening/tests/divr_u.c
@@ -0,0 +1,55 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_divr_u(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = ret;
+
+ ASSERT(f(0x7fffffff, 1) == 0x7fffffff);
+ ASSERT(f(1, 0x7fffffff) == 0);
+ ASSERT(f(0x80000000, 1) == 0x80000000);
+ ASSERT(f(1, 0x80000000) == 0);
+ ASSERT(f(0x7fffffff, 2) == 0x3fffffff);
+ ASSERT(f(2, 0x7fffffff) == 0);
+ ASSERT(f(0x80000000, 2) == 0x40000000);
+ ASSERT(f(2, 0x80000000) == 0);
+ ASSERT(f(0x7fffffff, 0x80000000) == 0);
+ ASSERT(f(0x80000000, 0x7fffffff) == 1);
+ ASSERT(f(0, 0x7fffffff) == 0);
+ ASSERT(f(0x7fffffff, 0xffffffff) == 0);
+ ASSERT(f(0xffffffff, 0x7fffffff) == 2);
+ ASSERT(f(0xffffffff, 0xffffffff) == 1);
+#if __WORDSIZE != 32
+ ASSERT(f(0x7fffffffffffffff, 1) == 0x7fffffffffffffff);
+ ASSERT(f(1, 0x7fffffffffffffff) == 0);
+ ASSERT(f(0x8000000000000000, 1) == 0x8000000000000000);
+ ASSERT(f(1, 0x8000000000000000) == 0);
+ ASSERT(f(0x7fffffffffffffff, 2) == 0x3fffffffffffffff);
+ ASSERT(f(2, 0x7fffffffffffffff) == 0);
+ ASSERT(f(0x8000000000000000, 2) == 0x4000000000000000);
+ ASSERT(f(2, 0x8000000000000000) == 0);
+ ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == 0);
+ ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == 1);
+ ASSERT(f(0x7fffffffffffffff, 0xffffffffffffffff) == 0);
+ ASSERT(f(0xffffffffffffffff, 0x7fffffffffffffff) == 2);
+ ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 1);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/extr_c.c b/deps/lightening/tests/extr_c.c
new file mode 100644
index 0000000..043068d
--- /dev/null
+++ b/deps/lightening/tests/extr_c.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_extr_c(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0);
+ ASSERT(f(1) == 1);
+ ASSERT(f(0xf) == 0xf);
+ ASSERT(f(0xff) == -1);
+ ASSERT(f(0xfff) == -1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/extr_d.c b/deps/lightening/tests/extr_d.c
new file mode 100644
index 0000000..af0fe91
--- /dev/null
+++ b/deps/lightening/tests/extr_d.c
@@ -0,0 +1,25 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_extr_d(j, JIT_F0, JIT_R0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_d(j, JIT_F0);
+
+ double (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0.0);
+ ASSERT(f(1) == 1.0);
+ ASSERT(f(-100) == -100.0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/extr_d_f.c b/deps/lightening/tests/extr_d_f.c
new file mode 100644
index 0000000..049eb5f
--- /dev/null
+++ b/deps/lightening/tests/extr_d_f.c
@@ -0,0 +1,26 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0));
+
+ jit_extr_d_f(j, JIT_F0, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_f(j, JIT_F0);
+
+ float (*f)(double) = jit_end(j, NULL);
+
+ ASSERT(f(0.0) == 0.0f);
+ ASSERT(f(0.5) == 0.5f);
+ ASSERT(f(1.0 / 0.0) == 1.0f / 0.0f);
+ ASSERT(f(1.25) == 1.25f);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/extr_f.c b/deps/lightening/tests/extr_f.c
new file mode 100644
index 0000000..b57830c
--- /dev/null
+++ b/deps/lightening/tests/extr_f.c
@@ -0,0 +1,25 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_extr_f(j, JIT_F0, JIT_R0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_f(j, JIT_F0);
+
+ float (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0.0f);
+ ASSERT(f(1) == 1.0f);
+ ASSERT(f(-100) == -100.0f);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/extr_f_d.c b/deps/lightening/tests/extr_f_d.c
new file mode 100644
index 0000000..5fa5007
--- /dev/null
+++ b/deps/lightening/tests/extr_f_d.c
@@ -0,0 +1,26 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0));
+
+ jit_extr_f_d(j, JIT_F0, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_d(j, JIT_F0);
+
+ double (*f)(float) = jit_end(j, NULL);
+
+ ASSERT(f(0.0f) == 0.0);
+ ASSERT(f(0.5f) == 0.5);
+ ASSERT(f(1.0f / 0.0f) == 1.0 / 0.0);
+ ASSERT(f(1.25f) == 1.25);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/extr_i.c b/deps/lightening/tests/extr_i.c
new file mode 100644
index 0000000..d26a576
--- /dev/null
+++ b/deps/lightening/tests/extr_i.c
@@ -0,0 +1,30 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+#if __WORDSIZE > 32
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_extr_i(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0);
+ ASSERT(f(1) == 1);
+ ASSERT(f(0xfffffff) == 0xfffffff);
+ ASSERT(f(0xffffffff) == -1);
+ ASSERT(f(0xfffffffff) == -1);
+ ASSERT(f(0xf00000000) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/extr_s.c b/deps/lightening/tests/extr_s.c
new file mode 100644
index 0000000..5b39af3
--- /dev/null
+++ b/deps/lightening/tests/extr_s.c
@@ -0,0 +1,28 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_extr_s(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0);
+ ASSERT(f(1) == 1);
+ ASSERT(f(0xfff) == 0xfff);
+ ASSERT(f(0xffff) == -1);
+ ASSERT(f(0xfffff) == -1);
+ ASSERT(f(0xf0000) == 0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/extr_uc.c b/deps/lightening/tests/extr_uc.c
new file mode 100644
index 0000000..a42e603
--- /dev/null
+++ b/deps/lightening/tests/extr_uc.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_extr_uc(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0);
+ ASSERT(f(1) == 1);
+ ASSERT(f(0xff) == 0xff);
+ ASSERT(f(0xfff) == 0xff);
+ ASSERT(f(0xf00) == 0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/extr_ui.c b/deps/lightening/tests/extr_ui.c
new file mode 100644
index 0000000..37964da
--- /dev/null
+++ b/deps/lightening/tests/extr_ui.c
@@ -0,0 +1,29 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+#if __WORDSIZE > 32
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_extr_ui(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0);
+ ASSERT(f(1) == 1);
+ ASSERT(f(0xffffffff) == 0xffffffff);
+ ASSERT(f(0xfffffffff) == 0xffffffff);
+ ASSERT(f(0xf00000000) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/extr_us.c b/deps/lightening/tests/extr_us.c
new file mode 100644
index 0000000..38a7c39
--- /dev/null
+++ b/deps/lightening/tests/extr_us.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_extr_us(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0);
+ ASSERT(f(1) == 1);
+ ASSERT(f(0xffff) == 0xffff);
+ ASSERT(f(0xfffff) == 0xffff);
+ ASSERT(f(0xf0000) == 0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/jmp0.c b/deps/lightening/tests/jmp0.c
new file mode 100644
index 0000000..261a399
--- /dev/null
+++ b/deps/lightening/tests/jmp0.c
@@ -0,0 +1,24 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_reloc_t r = jit_jmp(j);
+ jit_patch_here(j, r);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+ ASSERT(f(42) == 42);
+ ASSERT(f(-1) == -1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/jmp_table.c b/deps/lightening/tests/jmp_table.c
new file mode 100644
index 0000000..f90ab16
--- /dev/null
+++ b/deps/lightening/tests/jmp_table.c
@@ -0,0 +1,61 @@
+#include "test.h"
+
+#define NTARGETS ((size_t) 4)
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0));
+
+ jit_reloc_t default_target = jit_bgei_u(j, JIT_R0, NTARGETS);
+
+ // FIXME: need ldxr with word stride, then can eliminate lshi.
+ jit_lshi(j, JIT_R0, JIT_R0, sizeof(intptr_t) == 4 ? 2 : 3);
+ jit_reloc_t table = jit_mov_addr(j, JIT_R1);
+ jit_ldxr(j, JIT_R1, JIT_R1, JIT_R0);
+ jit_jmpr(j, JIT_R1);
+
+ jit_begin_data (j, (NTARGETS + 1) * sizeof(intptr_t));
+ jit_align(j, sizeof(intptr_t));
+ jit_patch_here(j, table);
+ jit_reloc_t targets[NTARGETS];
+ jit_reloc_t tails[NTARGETS];
+ for (size_t i = 0; i < NTARGETS; i++) {
+ targets[i] = jit_emit_addr(j);
+ }
+ jit_end_data (j);
+
+ for (size_t i = 0; i < NTARGETS; i++) {
+ jit_patch_here(j, targets[i]);
+ jit_movi(j, JIT_R0, i * i);
+ tails[i] = jit_jmp(j);
+ }
+
+ jit_patch_here(j, default_target);
+ jit_movi(j, JIT_R0, 42);
+ for (int i = 0; i < NTARGETS; i++) {
+ jit_patch_here(j, tails[i]);
+ }
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ for (int i = -2; i < ((int) NTARGETS) + 2; i++) {
+ if (i < 0) {
+ ASSERT(f(i) == 42);
+ } else if (i < NTARGETS) {
+ ASSERT(f(i) == i * i);
+ } else {
+ ASSERT(f(i) == 42);
+ }
+ }
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/jmpi.c b/deps/lightening/tests/jmpi.c
new file mode 100644
index 0000000..e73ace0
--- /dev/null
+++ b/deps/lightening/tests/jmpi.c
@@ -0,0 +1,41 @@
+#include "test.h"
+
+void *tail;
+
+static void *target;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ jit_enter_jit_abi(j, 0, 0, 0);
+ jit_movi(j, JIT_R0, 42);
+ jit_jmpi(j, target);
+ // Unreachable.
+ jit_breakpoint(j);
+ int (*f)(void) = jit_end(j, NULL);
+ ASSERT(f() == 42);
+}
+
+// Make the tail-call target via a separate main_helper because probably the new
+// arena will be allocated farther away, forcing nonlocal jumps.
+static void
+make_target(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ // Tail call target assumes tail caller called enter_jit_abi with compatible
+ // parameters.
+ target = jit_address(j);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+ jit_end(j, NULL);
+
+ main_helper(0, NULL, run_test);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, make_target);
+}
diff --git a/deps/lightening/tests/jmpi_local.c b/deps/lightening/tests/jmpi_local.c
new file mode 100644
index 0000000..49e4507
--- /dev/null
+++ b/deps/lightening/tests/jmpi_local.c
@@ -0,0 +1,25 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+
+ jit_reloc_t r = jit_jmp (j);
+ jit_reti (j, 0);
+ jit_pointer_t addr = jit_address (j);
+ jit_reti (j, 1);
+ jit_patch_here (j, r);
+ jit_jmpi (j, addr);
+ jit_reti (j, 2);
+
+ int (*f)(void) = jit_end(j, NULL);
+
+ ASSERT(f() == 1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/jmpr.c b/deps/lightening/tests/jmpr.c
new file mode 100644
index 0000000..8840897
--- /dev/null
+++ b/deps/lightening/tests/jmpr.c
@@ -0,0 +1,23 @@
+#include "test.h"
+
+static int tail(void) { return 42; }
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0));
+ jit_leave_jit_abi(j, 0, 0, align);
+
+ jit_jmpr(j, JIT_R0);
+
+ int (*f)(void*) = jit_end(j, NULL);
+ ASSERT(f(tail) == 42);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldi_c.c b/deps/lightening/tests/ldi_c.c
new file mode 100644
index 0000000..9d5de82
--- /dev/null
+++ b/deps/lightening/tests/ldi_c.c
@@ -0,0 +1,24 @@
+#include "test.h"
+
+static uint8_t data[] = { 0xff, 0x00, 0x42 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+
+ jit_ldi_c(j, JIT_R0, &data[0]);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void) = jit_end(j, NULL);
+
+ ASSERT(f() == -1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldi_d.c b/deps/lightening/tests/ldi_d.c
new file mode 100644
index 0000000..b72cdda
--- /dev/null
+++ b/deps/lightening/tests/ldi_d.c
@@ -0,0 +1,24 @@
+#include "test.h"
+
+static double data = -1.5;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+
+ jit_ldi_d(j, JIT_F0, &data);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_d(j, JIT_F0);
+
+ double (*f)(void) = jit_end(j, NULL);
+
+ ASSERT(f() == data);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldi_f.c b/deps/lightening/tests/ldi_f.c
new file mode 100644
index 0000000..13e5fd4
--- /dev/null
+++ b/deps/lightening/tests/ldi_f.c
@@ -0,0 +1,24 @@
+#include "test.h"
+
+static float data = -1.5f;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+
+ jit_ldi_f(j, JIT_F0, &data);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_f(j, JIT_F0);
+
+ float (*f)(void) = jit_end(j, NULL);
+
+ ASSERT(f() == data);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldi_i.c b/deps/lightening/tests/ldi_i.c
new file mode 100644
index 0000000..e389788
--- /dev/null
+++ b/deps/lightening/tests/ldi_i.c
@@ -0,0 +1,24 @@
+#include "test.h"
+
+static uint32_t data = 0xffffffff;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+
+ jit_ldi_i(j, JIT_R0, &data);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void) = jit_end(j, NULL);
+
+ ASSERT(f() == -1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldi_l.c b/deps/lightening/tests/ldi_l.c
new file mode 100644
index 0000000..f3fa729
--- /dev/null
+++ b/deps/lightening/tests/ldi_l.c
@@ -0,0 +1,26 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+#if __WORDSIZE > 32
+ static uint64_t data = 0xffffffffffffffff;
+
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+
+ jit_ldi_l(j, JIT_R0, &data);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void) = jit_end(j, NULL);
+
+ ASSERT(f() == -1);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldi_s.c b/deps/lightening/tests/ldi_s.c
new file mode 100644
index 0000000..d9d1c47
--- /dev/null
+++ b/deps/lightening/tests/ldi_s.c
@@ -0,0 +1,24 @@
+#include "test.h"
+
+static uint16_t data = 0xffff;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+
+ jit_ldi_s(j, JIT_R0, &data);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void) = jit_end(j, NULL);
+
+ ASSERT(f() == -1);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldi_uc.c b/deps/lightening/tests/ldi_uc.c
new file mode 100644
index 0000000..12f18bf
--- /dev/null
+++ b/deps/lightening/tests/ldi_uc.c
@@ -0,0 +1,24 @@
+#include "test.h"
+
+static uint8_t data[] = { 0xff, 0x00, 0x42 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+
+ jit_ldi_uc(j, JIT_R0, data);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void) = jit_end(j, NULL);
+
+ ASSERT(f() == 0xff);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldi_ui.c b/deps/lightening/tests/ldi_ui.c
new file mode 100644
index 0000000..d233694
--- /dev/null
+++ b/deps/lightening/tests/ldi_ui.c
@@ -0,0 +1,26 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+#if __WORDSIZE > 32
+ static uint32_t data[] = { 0xffffffff, 0x00000000, 0x42424242 };
+
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+
+ jit_ldi_ui(j, JIT_R0, data);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void) = jit_end(j, NULL);
+
+ ASSERT(f() == data[0]);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldi_us.c b/deps/lightening/tests/ldi_us.c
new file mode 100644
index 0000000..70eb4a0
--- /dev/null
+++ b/deps/lightening/tests/ldi_us.c
@@ -0,0 +1,24 @@
+#include "test.h"
+
+static uint16_t data[] = { 0xffff, 0x0000, 0x4242 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+
+ jit_ldi_us(j, JIT_R0, data);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void) = jit_end(j, NULL);
+
+ ASSERT(f() == data[0]);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldr_atomic.c b/deps/lightening/tests/ldr_atomic.c
new file mode 100644
index 0000000..73a8c0f
--- /dev/null
+++ b/deps/lightening/tests/ldr_atomic.c
@@ -0,0 +1,28 @@
+#include "test.h"
+
+static long data[] = { 0x0f0f0f0f, 0x00000000, 0x42424242 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1));
+
+ /* atm does not test for actual atomicity, just that no segfaults etc happen */
+ jit_ldr_atomic(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void*) = jit_end(j, NULL);
+
+ ASSERT(f(&data[0]) == 0x0f0f0f0f);
+ ASSERT(f(&data[1]) == 0);
+ ASSERT(f(&data[2]) == 0x42424242);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldr_c.c b/deps/lightening/tests/ldr_c.c
new file mode 100644
index 0000000..07a5931
--- /dev/null
+++ b/deps/lightening/tests/ldr_c.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static uint8_t data[] = { 0xff, 0x00, 0x42 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1));
+
+ jit_ldr_c(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void*) = jit_end(j, NULL);
+
+ ASSERT(f(&data[0]) == -1);
+ ASSERT(f(&data[1]) == 0);
+ ASSERT(f(&data[2]) == 0x42);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldr_d.c b/deps/lightening/tests/ldr_d.c
new file mode 100644
index 0000000..37c75f0
--- /dev/null
+++ b/deps/lightening/tests/ldr_d.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static double data[] = { -1.0, 0.0, 0.5 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1));
+
+ jit_ldr_d(j, JIT_F0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_d(j, JIT_F0);
+
+ double (*f)(void*) = jit_end(j, NULL);
+
+ ASSERT(f(&data[0]) == data[0]);
+ ASSERT(f(&data[1]) == data[1]);
+ ASSERT(f(&data[2]) == data[2]);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldr_f.c b/deps/lightening/tests/ldr_f.c
new file mode 100644
index 0000000..bb68278
--- /dev/null
+++ b/deps/lightening/tests/ldr_f.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static float data[] = { -1.0, 0.0, 0.5 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1));
+
+ jit_ldr_f(j, JIT_F0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_f(j, JIT_F0);
+
+ float (*f)(void*) = jit_end(j, NULL);
+
+ ASSERT(f(&data[0]) == data[0]);
+ ASSERT(f(&data[1]) == data[1]);
+ ASSERT(f(&data[2]) == data[2]);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldr_i.c b/deps/lightening/tests/ldr_i.c
new file mode 100644
index 0000000..3de9e5f
--- /dev/null
+++ b/deps/lightening/tests/ldr_i.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static uint32_t data[] = { 0xffffffff, 0x00000000, 0x42424242 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1));
+
+ jit_ldr_i(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void*) = jit_end(j, NULL);
+
+ ASSERT(f(&data[0]) == -1);
+ ASSERT(f(&data[1]) == 0);
+ ASSERT(f(&data[2]) == 0x42424242);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldr_l.c b/deps/lightening/tests/ldr_l.c
new file mode 100644
index 0000000..15f0080
--- /dev/null
+++ b/deps/lightening/tests/ldr_l.c
@@ -0,0 +1,29 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+#if __WORDSIZE > 32
+ static uint64_t data[] = { 0xffffffffffffffff, 0, 0x4242424212345678 };
+
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1));
+
+ jit_ldr_l(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void*) = jit_end(j, NULL);
+
+ ASSERT(f(&data[0]) == -1);
+ ASSERT(f(&data[1]) == 0);
+ ASSERT(f(&data[2]) == data[2]);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldr_s.c b/deps/lightening/tests/ldr_s.c
new file mode 100644
index 0000000..cf668d5
--- /dev/null
+++ b/deps/lightening/tests/ldr_s.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static uint16_t data[] = { 0xffff, 0x0000, 0x4242 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1));
+
+ jit_ldr_s(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void*) = jit_end(j, NULL);
+
+ ASSERT(f(&data[0]) == -1);
+ ASSERT(f(&data[1]) == 0);
+ ASSERT(f(&data[2]) == 0x4242);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldr_uc.c b/deps/lightening/tests/ldr_uc.c
new file mode 100644
index 0000000..a48f370
--- /dev/null
+++ b/deps/lightening/tests/ldr_uc.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static uint8_t data[] = { 0xff, 0x00, 0x42 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1));
+
+ jit_ldr_uc(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void*) = jit_end(j, NULL);
+
+ ASSERT(f(&data[0]) == 0xff);
+ ASSERT(f(&data[1]) == 0);
+ ASSERT(f(&data[2]) == 0x42);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldr_ui.c b/deps/lightening/tests/ldr_ui.c
new file mode 100644
index 0000000..7668778
--- /dev/null
+++ b/deps/lightening/tests/ldr_ui.c
@@ -0,0 +1,29 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+#if __WORDSIZE > 32
+ static uint32_t data[] = { 0xffffffff, 0x00000000, 0x42424242 };
+
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1));
+
+ jit_ldr_ui(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void*) = jit_end(j, NULL);
+
+ ASSERT(f(&data[0]) == data[0]);
+ ASSERT(f(&data[1]) == data[1]);
+ ASSERT(f(&data[2]) == data[2]);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldr_us.c b/deps/lightening/tests/ldr_us.c
new file mode 100644
index 0000000..bb9928b
--- /dev/null
+++ b/deps/lightening/tests/ldr_us.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static uint16_t data[] = { 0xffff, 0x0000, 0x4242 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1));
+
+ jit_ldr_us(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void*) = jit_end(j, NULL);
+
+ ASSERT(f(&data[0]) == data[0]);
+ ASSERT(f(&data[1]) == data[1]);
+ ASSERT(f(&data[2]) == data[2]);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldxi_c.c b/deps/lightening/tests/ldxi_c.c
new file mode 100644
index 0000000..4271f97
--- /dev/null
+++ b/deps/lightening/tests/ldxi_c.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static uint8_t data[] = { 0xff, 0x00, 0x42 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_ldxi_c(j, JIT_R0, JIT_R0, (uintptr_t)&data);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == -1);
+ ASSERT(f(1) == 0);
+ ASSERT(f(2) == 0x42);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldxi_d.c b/deps/lightening/tests/ldxi_d.c
new file mode 100644
index 0000000..6bcf632
--- /dev/null
+++ b/deps/lightening/tests/ldxi_d.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static double data[] = { -1.0, 0.0, 0.5 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_ldxi_d(j, JIT_F0, JIT_R0, (uintptr_t)data);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_d(j, JIT_F0);
+
+ double (*f)(jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == data[0]);
+ ASSERT(f(8) == data[1]);
+ ASSERT(f(16) == data[2]);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldxi_f.c b/deps/lightening/tests/ldxi_f.c
new file mode 100644
index 0000000..9e65321
--- /dev/null
+++ b/deps/lightening/tests/ldxi_f.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static float data[] = { -1.0, 0.0, 0.5 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_ldxi_f(j, JIT_F0, JIT_R0, (uintptr_t)data);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_f(j, JIT_F0);
+
+ float (*f)(jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == data[0]);
+ ASSERT(f(4) == data[1]);
+ ASSERT(f(8) == data[2]);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldxi_i.c b/deps/lightening/tests/ldxi_i.c
new file mode 100644
index 0000000..d1f7b56
--- /dev/null
+++ b/deps/lightening/tests/ldxi_i.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static uint32_t data[] = { 0xffffffff, 0x00000000, 0x42424242 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0));
+
+ jit_ldxi_i(j, JIT_R0, JIT_R0, (uintptr_t)data);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == -1);
+ ASSERT(f(4) == 0);
+ ASSERT(f(8) == 0x42424242);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldxi_l.c b/deps/lightening/tests/ldxi_l.c
new file mode 100644
index 0000000..bb1a8b2
--- /dev/null
+++ b/deps/lightening/tests/ldxi_l.c
@@ -0,0 +1,29 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+#if __WORDSIZE > 32
+ static uint64_t data[] = { 0xffffffffffffffff, 0, 0x4242424212345678 };
+
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_ldxi_l(j, JIT_R0, JIT_R0, (uintptr_t)data);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == -1);
+ ASSERT(f(8) == 0);
+ ASSERT(f(16) == data[2]);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldxi_s.c b/deps/lightening/tests/ldxi_s.c
new file mode 100644
index 0000000..c9376d0
--- /dev/null
+++ b/deps/lightening/tests/ldxi_s.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static uint16_t data[] = { 0xffff, 0x0000, 0x4242 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_ldxi_s(j, JIT_R0, JIT_R0, (uintptr_t)data);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == -1);
+ ASSERT(f(2) == 0);
+ ASSERT(f(4) == 0x4242);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldxi_uc.c b/deps/lightening/tests/ldxi_uc.c
new file mode 100644
index 0000000..31d7b73
--- /dev/null
+++ b/deps/lightening/tests/ldxi_uc.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static uint8_t data[] = { 0xff, 0x00, 0x42 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_ldxi_uc(j, JIT_R0, JIT_R0, (uintptr_t)data);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0xff);
+ ASSERT(f(1) == 0);
+ ASSERT(f(2) == 0x42);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldxi_ui.c b/deps/lightening/tests/ldxi_ui.c
new file mode 100644
index 0000000..4f7e304
--- /dev/null
+++ b/deps/lightening/tests/ldxi_ui.c
@@ -0,0 +1,29 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+#if __WORDSIZE > 32
+ static uint32_t data[] = { 0xffffffff, 0x00000000, 0x42424242 };
+
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_ldxi_ui(j, JIT_R0, JIT_R0, (uintptr_t)data);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == data[0]);
+ ASSERT(f(4) == data[1]);
+ ASSERT(f(8) == data[2]);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldxi_us.c b/deps/lightening/tests/ldxi_us.c
new file mode 100644
index 0000000..81c984f
--- /dev/null
+++ b/deps/lightening/tests/ldxi_us.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static uint16_t data[] = { 0xffff, 0x0000, 0x4242 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_ldxi_us(j, JIT_R0, JIT_R0, (uintptr_t)data);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == data[0]);
+ ASSERT(f(2) == data[1]);
+ ASSERT(f(4) == data[2]);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldxr_c.c b/deps/lightening/tests/ldxr_c.c
new file mode 100644
index 0000000..366f5b2
--- /dev/null
+++ b/deps/lightening/tests/ldxr_c.c
@@ -0,0 +1,28 @@
+#include "test.h"
+
+static uint8_t data[] = { 0xff, 0x00, 0x42 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_ldxr_c(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void*, jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(data, 0) == -1);
+ ASSERT(f(data, 1) == 0);
+ ASSERT(f(data, 2) == 0x42);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldxr_d.c b/deps/lightening/tests/ldxr_d.c
new file mode 100644
index 0000000..38a12fd
--- /dev/null
+++ b/deps/lightening/tests/ldxr_d.c
@@ -0,0 +1,28 @@
+#include "test.h"
+
+static double data[] = { -1.0, 0.0, 0.5 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_ldxr_d(j, JIT_F0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_d(j, JIT_F0);
+
+ double (*f)(void*, jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(data, 0) == data[0]);
+ ASSERT(f(data, 8) == data[1]);
+ ASSERT(f(data, 16) == data[2]);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldxr_f.c b/deps/lightening/tests/ldxr_f.c
new file mode 100644
index 0000000..c48b11f
--- /dev/null
+++ b/deps/lightening/tests/ldxr_f.c
@@ -0,0 +1,28 @@
+#include "test.h"
+
+static float data[] = { -1.0, 0.0, 0.5 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_ldxr_f(j, JIT_F0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_f(j, JIT_F0);
+
+ float (*f)(void*, jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(data, 0) == data[0]);
+ ASSERT(f(data, 4) == data[1]);
+ ASSERT(f(data, 8) == data[2]);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldxr_i.c b/deps/lightening/tests/ldxr_i.c
new file mode 100644
index 0000000..e4149aa
--- /dev/null
+++ b/deps/lightening/tests/ldxr_i.c
@@ -0,0 +1,28 @@
+#include "test.h"
+
+static uint32_t data[] = { 0xffffffff, 0x00000000, 0x42424242 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_ldxr_i(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void*, jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(data, 0) == -1);
+ ASSERT(f(data, 4) == 0);
+ ASSERT(f(data, 8) == 0x42424242);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldxr_l.c b/deps/lightening/tests/ldxr_l.c
new file mode 100644
index 0000000..ee9f156
--- /dev/null
+++ b/deps/lightening/tests/ldxr_l.c
@@ -0,0 +1,30 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+#if __WORDSIZE > 32
+ static uint64_t data[] = { 0xffffffffffffffff, 0, 0x4242424212345678 };
+
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_ldxr_l(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void*, jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(data, 0) == -1);
+ ASSERT(f(data, 8) == 0);
+ ASSERT(f(data, 16) == data[2]);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldxr_s.c b/deps/lightening/tests/ldxr_s.c
new file mode 100644
index 0000000..fbb5c09
--- /dev/null
+++ b/deps/lightening/tests/ldxr_s.c
@@ -0,0 +1,28 @@
+#include "test.h"
+
+static uint16_t data[] = { 0xffff, 0x0000, 0x4242 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_ldxr_s(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void*, jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(data, 0) == -1);
+ ASSERT(f(data, 2) == 0);
+ ASSERT(f(data, 4) == 0x4242);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldxr_uc.c b/deps/lightening/tests/ldxr_uc.c
new file mode 100644
index 0000000..846c552
--- /dev/null
+++ b/deps/lightening/tests/ldxr_uc.c
@@ -0,0 +1,28 @@
+#include "test.h"
+
+static uint8_t data[] = { 0xff, 0x00, 0x42 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_ldxr_uc(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void*, jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(data, 0) == 0xff);
+ ASSERT(f(data, 1) == 0);
+ ASSERT(f(data, 2) == 0x42);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldxr_ui.c b/deps/lightening/tests/ldxr_ui.c
new file mode 100644
index 0000000..cd774d3
--- /dev/null
+++ b/deps/lightening/tests/ldxr_ui.c
@@ -0,0 +1,30 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+#if __WORDSIZE > 32
+ static uint32_t data[] = { 0xffffffff, 0x00000000, 0x42424242 };
+
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_ldxr_ui(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void*, jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(data, 0) == data[0]);
+ ASSERT(f(data, 4) == data[1]);
+ ASSERT(f(data, 8) == data[2]);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ldxr_us.c b/deps/lightening/tests/ldxr_us.c
new file mode 100644
index 0000000..b7e408b
--- /dev/null
+++ b/deps/lightening/tests/ldxr_us.c
@@ -0,0 +1,28 @@
+#include "test.h"
+
+static uint16_t data[] = { 0xffff, 0x0000, 0x4242 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_ldxr_us(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void*, jit_uword_t) = jit_end(j, NULL);
+
+ ASSERT(f(data, 0) == data[0]);
+ ASSERT(f(data, 2) == data[1]);
+ ASSERT(f(data, 4) == data[2]);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/link-register.c b/deps/lightening/tests/link-register.c
new file mode 100644
index 0000000..96ee959
--- /dev/null
+++ b/deps/lightening/tests/link-register.c
@@ -0,0 +1,35 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0));
+
+ jit_reloc_t call_tramp = jit_jmp (j);
+
+ void *tramp = jit_address (j);
+ jit_pop_link_register (j);
+ jit_movr (j, JIT_R0, JIT_LR);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr (j, JIT_R0);
+
+ jit_patch_here (j, call_tramp);
+ jit_jmpi_with_link (j, tramp);
+
+ void *expected_link = jit_address_to_function_pointer (jit_address (j));
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ void* (*f)(void) = ret;
+
+ ASSERT(f() == expected_link);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/lshi.c b/deps/lightening/tests/lshi.c
new file mode 100644
index 0000000..e721af5
--- /dev/null
+++ b/deps/lightening/tests/lshi.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_lshi(j, JIT_R0, JIT_R0, 31);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+#if __WORDSIZE == 32
+ ASSERT(f(-0x7f) == 0x80000000);
+#else
+ ASSERT(f(-0x7f) == 0xffffffc080000000);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/lshr.c b/deps/lightening/tests/lshr.c
new file mode 100644
index 0000000..f81aa69
--- /dev/null
+++ b/deps/lightening/tests/lshr.c
@@ -0,0 +1,69 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_lshr(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = ret;
+
+ ASSERT(f(0x7f, 1) == 0xfe);
+ ASSERT(f(0x7fff, 2) == 0x1fffc);
+ ASSERT(f(0x81, 16) == 0x810000);
+ ASSERT(f(0xff, 15) == 0x7f8000);
+ ASSERT(f(0x7fffffff, 0) == 0x7fffffff);
+#if __WORDSIZE == 32
+ ASSERT(f(0xffffffff, 8) == 0xffffff00);
+ ASSERT(f(0x7fffffff, 3) == 0xfffffff8);
+ ASSERT(f(-0x7f, 31) == 0x80000000);
+ ASSERT(f(-0x7fff, 30) == 0x40000000);
+ ASSERT(f(-0x7fffffff, 29) == 0x20000000);
+ ASSERT(f(0x80000001, 28) == 0x10000000);
+ ASSERT(f(0x8001, 17) == 0x20000);
+ ASSERT(f(0x80000001, 18) == 0x40000);
+ ASSERT(f(-0xffff, 24) == 0x1000000);
+#else
+ ASSERT(f(0xffffffff, 8) == 0xffffffff00);
+ ASSERT(f(0x7fffffff, 3) == 0x3fffffff8);
+ ASSERT(f(-0x7f, 31) == 0xffffffc080000000);
+ ASSERT(f(-0x7fff, 30) == 0xffffe00040000000);
+ ASSERT(f(-0x7fffffff, 29) == 0xf000000020000000);
+ ASSERT(f(0x80000001, 28) == 0x800000010000000);
+ ASSERT(f(0x8001, 17) == 0x100020000);
+ ASSERT(f(0x80000001, 18) == 0x2000000040000);
+ ASSERT(f(-0xffff, 24) == 0xffffff0001000000);
+ ASSERT(f(0x7f, 33) == 0xfe00000000);
+ ASSERT(f(0x7ffff, 34) == 0x1ffffc00000000);
+ ASSERT(f(0x7fffffff, 35) == 0xfffffff800000000);
+ ASSERT(f(-0x7f, 63) == 0x8000000000000000);
+ ASSERT(f(-0x7fff, 62) == 0x4000000000000000);
+ ASSERT(f(-0x7fffffff, 61) == 0x2000000000000000);
+ ASSERT(f(0x80000001, 60) == 0x1000000000000000);
+ ASSERT(f(0x81, 48) == 0x81000000000000);
+ ASSERT(f(0x8001, 49) == 0x2000000000000);
+ ASSERT(f(0x80000001, 40) == 0x10000000000);
+ ASSERT(f(0xff, 47) == 0x7f800000000000);
+ ASSERT(f(0xffff0001, 56) == 0x100000000000000);
+ ASSERT(f(0xffffffff, 40) == 0xffffff0000000000);
+ ASSERT(f(0x7fffffffff, 33) == 0xfffffffe00000000);
+ ASSERT(f(-0x7fffffffff, 63) == 0x8000000000000000);
+ ASSERT(f(0x8000000001, 48) == 0x1000000000000);
+ ASSERT(f(0xffffffffff, 47) == 0xffff800000000000);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/mov_addr.c b/deps/lightening/tests/mov_addr.c
new file mode 100644
index 0000000..b4a9aaa
--- /dev/null
+++ b/deps/lightening/tests/mov_addr.c
@@ -0,0 +1,25 @@
+#include "test.h"
+
+static uint64_t thing = 0x123456789abcdef0;
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+
+ jit_patch_there(j, jit_mov_addr(j, JIT_R0), &thing);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ void* (*f)(void) = jit_end(j, NULL);
+
+ ASSERT(f() == &thing);
+ ASSERT(*(uint64_t*)f() == thing);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/movi.c b/deps/lightening/tests/movi.c
new file mode 100644
index 0000000..fcdd656
--- /dev/null
+++ b/deps/lightening/tests/movi.c
@@ -0,0 +1,22 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+
+ jit_movi(j, JIT_R0, 0xa500a500);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_uword_t (*f)(void) = jit_end(j, NULL);
+
+ ASSERT(f() == 0xa500a500);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/movi_d.c b/deps/lightening/tests/movi_d.c
new file mode 100644
index 0000000..cb9e63d
--- /dev/null
+++ b/deps/lightening/tests/movi_d.c
@@ -0,0 +1,22 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+
+ jit_movi_d(j, JIT_F0, 3.14159);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_d(j, JIT_F0);
+
+ double (*f)(void) = jit_end(j, NULL);
+
+ ASSERT(f() == 3.14159);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/movi_f.c b/deps/lightening/tests/movi_f.c
new file mode 100644
index 0000000..944f615
--- /dev/null
+++ b/deps/lightening/tests/movi_f.c
@@ -0,0 +1,22 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+
+ jit_movi_f(j, JIT_F0, 3.14159f);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_f(j, JIT_F0);
+
+ float (*f)(void) = jit_end(j, NULL);
+
+ ASSERT(f() == 3.14159f);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/mulr.c b/deps/lightening/tests/mulr.c
new file mode 100644
index 0000000..452e35d
--- /dev/null
+++ b/deps/lightening/tests/mulr.c
@@ -0,0 +1,64 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_mulr(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = ret;
+
+ ASSERT(f(0x7fffffff, 1) == 0x7fffffff);
+ ASSERT(f(1, 0x7fffffff) == 0x7fffffff);
+ ASSERT(f(0x80000000, 1) == 0x80000000);
+ ASSERT(f(1, 0x80000000) == 0x80000000);
+ ASSERT(f(0x7fffffff, 2) == 0xfffffffe);
+ ASSERT(f(2, 0x7fffffff) == 0xfffffffe);
+ ASSERT(f(0x7fffffff, 0) == 0);
+ ASSERT(f(0, 0x7fffffff) == 0);
+#if __WORDSIZE == 32
+ ASSERT(f(0x80000000, 2) == 0);
+ ASSERT(f(2, 0x80000000) == 0);
+ ASSERT(f(0x7fffffff, 0x80000000) == 0x80000000);
+ ASSERT(f(0x80000000, 0x7fffffff) == 0x80000000);
+ ASSERT(f(0x7fffffff, 0xffffffff) == 0x80000001);
+ ASSERT(f(0xffffffff, 0x7fffffff) == 0x80000001);
+ ASSERT(f(0xffffffff, 0xffffffff) == 1);
+#else
+ ASSERT(f(0x80000000, 2) == 0x100000000);
+ ASSERT(f(2, 0x80000000) == 0x100000000);
+ ASSERT(f(0x7fffffff, 0x80000000) == 0x3fffffff80000000);
+ ASSERT(f(0x80000000, 0x7fffffff) == 0x3fffffff80000000);
+ ASSERT(f(0x7fffffff, 0xffffffff) == 0x7ffffffe80000001);
+ ASSERT(f(0xffffffff, 0x7fffffff) == 0x7ffffffe80000001);
+ ASSERT(f(0xffffffff, 0xffffffff) == 0xfffffffe00000001);
+ ASSERT(f(0x7fffffffffffffff, 1) == 0x7fffffffffffffff);
+ ASSERT(f(1, 0x7fffffffffffffff) == 0x7fffffffffffffff);
+ ASSERT(f(0x8000000000000000, 1) == 0x8000000000000000);
+ ASSERT(f(1, 0x8000000000000000) == 0x8000000000000000);
+ ASSERT(f(0x7fffffffffffffff, 2) == 0xfffffffffffffffe);
+ ASSERT(f(2, 0x7fffffffffffffff) == 0xfffffffffffffffe);
+ ASSERT(f(0x8000000000000000, 2) == 0);
+ ASSERT(f(2, 0x8000000000000000) == 0);
+ ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == 0x8000000000000000);
+ ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == 0x8000000000000000);
+ ASSERT(f(0x7fffffffffffffff, 0xffffffffffffffff) == 0x8000000000000001);
+ ASSERT(f(0xffffffffffffffff, 0x7fffffffffffffff) == 0x8000000000000001);
+ ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 1);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/mulr_d.c b/deps/lightening/tests/mulr_d.c
new file mode 100644
index 0000000..945f152
--- /dev/null
+++ b/deps/lightening/tests/mulr_d.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1));
+
+ jit_mulr_d(j, JIT_F0, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_d(j, JIT_F0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ double (*f)(double, double) = ret;
+ ASSERT(f(-0.5, 0.5) == -0.25);
+ ASSERT(f(0.25, 0.75) == 0.1875);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/mulr_f.c b/deps/lightening/tests/mulr_f.c
new file mode 100644
index 0000000..2d0dd4f
--- /dev/null
+++ b/deps/lightening/tests/mulr_f.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1));
+
+ jit_mulr_f(j, JIT_F0, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_f(j, JIT_F0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ float (*f)(float, float) = ret;
+ ASSERT(f(-0.5f, 0.5f) == -0.25f);
+ ASSERT(f(0.25f, 0.75f) == 0.1875f);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/negr.c b/deps/lightening/tests/negr.c
new file mode 100644
index 0000000..18e27cb
--- /dev/null
+++ b/deps/lightening/tests/negr.c
@@ -0,0 +1,39 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_negr(j, JIT_R0, JIT_R0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+ ASSERT(f(0) == 0);
+#if __WORDSIZE == 32
+ ASSERT(f(1) == 0xffffffff);
+ ASSERT(f(0xffffffff) == 1);
+ ASSERT(f(0x80000000) == 0x80000000);
+ ASSERT(f(0x7fffffff) == 0x80000001);
+ ASSERT(f(0x80000001) == 0x7fffffff);
+#else
+ ASSERT(f(1) == 0xffffffffffffffff);
+ ASSERT(f(0xffffffff) == 0xffffffff00000001);
+ ASSERT(f(0x80000000) == 0xffffffff80000000);
+ ASSERT(f(0x7fffffff) == 0xffffffff80000001);
+ ASSERT(f(0x80000001) == 0xffffffff7fffffff);
+ ASSERT(f(0xffffffffffffffff) == 1);
+ ASSERT(f(0x8000000000000000) == 0x8000000000000000);
+ ASSERT(f(0x7fffffffffffffff) == 0x8000000000000001);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/negr_d.c b/deps/lightening/tests/negr_d.c
new file mode 100644
index 0000000..d0e168b
--- /dev/null
+++ b/deps/lightening/tests/negr_d.c
@@ -0,0 +1,26 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0));
+
+ jit_negr_d(j, JIT_F0, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_d(j, JIT_F0);
+
+ double (*f)(double) = jit_end(j, NULL);
+
+ ASSERT(f(0.0) == -0.0);
+ ASSERT(f(0.5) == -0.5);
+ ASSERT(f(1.0 / 0.0) == -1.0 / 0.0);
+ ASSERT(f(-1.25) == 1.25);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/negr_f.c b/deps/lightening/tests/negr_f.c
new file mode 100644
index 0000000..26110d5
--- /dev/null
+++ b/deps/lightening/tests/negr_f.c
@@ -0,0 +1,26 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0));
+
+ jit_negr_f(j, JIT_F0, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_f(j, JIT_F0);
+
+ float (*f)(float) = jit_end(j, NULL);
+
+ ASSERT(f(0.0f) == -0.0f);
+ ASSERT(f(0.5f) == -0.5f);
+ ASSERT(f(1.0f / 0.0f) == -1.0f / 0.0f);
+ ASSERT(f(-1.25f) == 1.25f);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/ori.c b/deps/lightening/tests/ori.c
new file mode 100644
index 0000000..6310185
--- /dev/null
+++ b/deps/lightening/tests/ori.c
@@ -0,0 +1,31 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_ori(j, JIT_R0, JIT_R0, 1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ jit_word_t (*f)(jit_word_t) = ret;
+
+ ASSERT(f(0x7fffffff) == 0x7fffffff);
+ ASSERT(f(0x80000000) == 0x80000001);
+#if __WORDSIZE == 64
+ ASSERT(f(0x7fffffffffffffff) == 0x7fffffffffffffff);
+ ASSERT(f(0x8000000000000000) == 0x8000000000000001);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/orr.c b/deps/lightening/tests/orr.c
new file mode 100644
index 0000000..5a9087a
--- /dev/null
+++ b/deps/lightening/tests/orr.c
@@ -0,0 +1,48 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_orr(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = ret;
+
+ ASSERT(f(0x7fffffff, 1) == 0x7fffffff);
+ ASSERT(f(1, 0x7fffffff) == 0x7fffffff);
+ ASSERT(f(0x80000000, 1) == 0x80000001);
+ ASSERT(f(1, 0x80000000) == 0x80000001);
+ ASSERT(f(0x7fffffff, 0x80000000) == 0xffffffff);
+ ASSERT(f(0x80000000, 0x7fffffff) == 0xffffffff);
+ ASSERT(f(0x7fffffff, 0xffffffff) == 0xffffffff);
+ ASSERT(f(0xffffffff, 0x7fffffff) == 0xffffffff);
+ ASSERT(f(0xffffffff, 0xffffffff) == 0xffffffff);
+ ASSERT(f(0x7fffffff, 0) == 0x7fffffff);
+ ASSERT(f(0, 0x7fffffff) == 0x7fffffff);
+#if __WORDSIZE == 64
+ ASSERT(f(0x7fffffffffffffff, 1) == 0x7fffffffffffffff);
+ ASSERT(f(1, 0x7fffffffffffffff) == 0x7fffffffffffffff);
+ ASSERT(f(0x8000000000000000, 1) == 0x8000000000000001);
+ ASSERT(f(1, 0x8000000000000000) == 0x8000000000000001);
+ ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == 0xffffffffffffffff);
+ ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == 0xffffffffffffffff);
+ ASSERT(f(0x7fffffffffffffff, 0xffffffffffffffff) == 0xffffffffffffffff);
+ ASSERT(f(0xffffffffffffffff, 0x7fffffffffffffff) == 0xffffffffffffffff);
+ ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 0xffffffffffffffff);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/qdivr.c b/deps/lightening/tests/qdivr.c
new file mode 100644
index 0000000..665053c
--- /dev/null
+++ b/deps/lightening/tests/qdivr.c
@@ -0,0 +1,44 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 3, 0, 0);
+
+ jit_operand_t args[] =
+ { jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_V0) };
+ jit_load_args(j, 4, args);
+
+ jit_qdivr(j, JIT_V1, JIT_V2, JIT_R2, JIT_V0);
+ jit_str(j, JIT_R0, JIT_V1);
+ jit_str(j, JIT_R1, JIT_V2);
+
+ jit_leave_jit_abi(j, 3, 0, align);
+
+ jit_ret(j);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ void (*f)(jit_word_t*, jit_word_t*, jit_word_t, jit_word_t) = ret;
+
+#define QDIV(a, b, c, d) \
+ do { \
+ jit_word_t C = 0, D = 0; f(&C, &D, a, b); ASSERT(C == c); ASSERT(D == d); \
+ } while (0)
+
+ QDIV(10, 3, 3, 1);
+ QDIV(-33, 9, -3, -6);
+ QDIV(-41, -7, 5, -6);
+ QDIV(65536, 4096, 16, 0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/qdivr_u.c b/deps/lightening/tests/qdivr_u.c
new file mode 100644
index 0000000..e260193
--- /dev/null
+++ b/deps/lightening/tests/qdivr_u.c
@@ -0,0 +1,42 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+
+ size_t align = jit_enter_jit_abi(j, 3, 0, 0);
+
+ jit_operand_t args[] =
+ { jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_V0) };
+ jit_load_args(j, 4, args);
+
+ jit_qdivr_u(j, JIT_V1, JIT_V2, JIT_R2, JIT_V0);
+ jit_str(j, JIT_R0, JIT_V1);
+ jit_str(j, JIT_R1, JIT_V2);
+
+ jit_leave_jit_abi(j, 3, 0, align);
+
+ jit_ret(j);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ void (*f)(jit_word_t*, jit_word_t*, jit_word_t, jit_word_t) = ret;
+#define QDIV(a, b, c, d) \
+ do { \
+ jit_word_t C = 0, D = 0; f(&C, &D, a, b); ASSERT(C == c); ASSERT(D == d); \
+ } while (0)
+
+ QDIV(-1, -2, 1, 1);
+ QDIV(-2, -5, 1, 3);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/qmulr.c b/deps/lightening/tests/qmulr.c
new file mode 100644
index 0000000..1645f5a
--- /dev/null
+++ b/deps/lightening/tests/qmulr.c
@@ -0,0 +1,58 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+
+ size_t align = jit_enter_jit_abi(j, 3, 0, 0);
+
+ jit_operand_t args[] =
+ { jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_V0) };
+ jit_load_args(j, 4, args);
+
+ jit_qmulr(j, JIT_V1, JIT_V2, JIT_R2, JIT_V0);
+ jit_str(j, JIT_R0, JIT_V1);
+ jit_str(j, JIT_R1, JIT_V2);
+
+ jit_leave_jit_abi(j, 3, 0, align);
+
+ jit_ret(j);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ void (*f)(jit_word_t*, jit_word_t*, jit_word_t, jit_word_t) = ret;
+
+#define QMUL(a, b, c, d) \
+ do { \
+ jit_word_t C = 0, D = 0; f(&C, &D, a, b); ASSERT(C == c); ASSERT(D == d); \
+ } while (0)
+
+ QMUL(-2, -1, 2, 0);
+ QMUL(0, -1, 0, 0);
+ QMUL(-1, 0, 0, 0);
+ QMUL(1, -1, -1, -1);
+#if __WORDSIZE == 32
+ QMUL(0x7ffff, 0x7ffff, 0xfff00001, 0x3f);
+ QMUL(0x80000000, -2, 0, 1);
+ QMUL(0x80000000, 2, 0, -1);
+ QMUL(0x80000001, 3, 0x80000003, -2);
+ QMUL(0x80000001, -3, 0x7ffffffd, 1);
+#else
+ QMUL(0x7ffffffff, 0x7ffffffff, 0xfffffff000000001, 0x3f);
+ QMUL(0x8000000000000000, -2, 0, 1);
+ QMUL(0x8000000000000000, 2, 0, -1);
+ QMUL(0x8000000000000001, 3, 0x8000000000000003, -2);
+ QMUL(0x8000000000000001, -3, 0x7ffffffffffffffd, 1);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/qmulr_u.c b/deps/lightening/tests/qmulr_u.c
new file mode 100644
index 0000000..bb1d50d
--- /dev/null
+++ b/deps/lightening/tests/qmulr_u.c
@@ -0,0 +1,46 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+
+ size_t align = jit_enter_jit_abi(j, 3, 0, 0);
+
+ jit_operand_t args[] =
+ { jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_V0) };
+ jit_load_args(j, 4, args);
+
+ jit_qmulr_u(j, JIT_V1, JIT_V2, JIT_R2, JIT_V0);
+ jit_str(j, JIT_R0, JIT_V1);
+ jit_str(j, JIT_R1, JIT_V2);
+
+ jit_leave_jit_abi(j, 3, 0, align);
+
+ jit_ret(j);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ void (*f)(jit_word_t*, jit_word_t*, jit_word_t, jit_word_t) = ret;
+
+#define UQMUL(a, b, c, d) \
+ do { \
+ jit_word_t C = 0, D = 0; f(&C, &D, a, b); ASSERT(C == c); ASSERT(D == d); \
+ } while (0)
+
+#if __WORDSIZE == 32
+ UQMUL(0xffffff, 0xffffff, 0xfe000001, 0xffff);
+#else
+ UQMUL(0xffffffffff, 0xffffffffff, 0xfffffe0000000001, 0xffff);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/remr.c b/deps/lightening/tests/remr.c
new file mode 100644
index 0000000..805d6fb
--- /dev/null
+++ b/deps/lightening/tests/remr.c
@@ -0,0 +1,60 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_remr(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = ret;
+
+ ASSERT(f(0x7fffffff, 1) == 0);
+ ASSERT(f(1, 0x7fffffff) == 1);
+ ASSERT(f(0x80000000, 1) == 0);
+ ASSERT(f(1, 0x80000000) == 1);
+ ASSERT(f(0x7fffffff, 2) == 1);
+ ASSERT(f(2, 0x7fffffff) == 2);
+ ASSERT(f(0x80000000, 2) == 0);
+ ASSERT(f(2, 0x80000000) == 2);
+ ASSERT(f(0x7fffffff, 0x80000000) == 0x7fffffff);
+ ASSERT(f(0, 0x7fffffff) == 0);
+ ASSERT(f(0xffffffff, 0xffffffff) == 0);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0x80000000, 0x7fffffff) == 0xffffffff);
+ ASSERT(f(0x7fffffff, 0xffffffff) == 0);
+ ASSERT(f(0xffffffff, 0x7fffffff) == 0xffffffff);
+#else
+ ASSERT(f(0x80000000, 0x7fffffff) == 1);
+ ASSERT(f(0x7fffffff, 0xffffffff) == 0x7fffffff);
+ ASSERT(f(0xffffffff, 0x7fffffff) == 1);
+ ASSERT(f(0x7fffffffffffffff, 1) == 0);
+ ASSERT(f(1, 0x7fffffffffffffff) == 1);
+ ASSERT(f(0x8000000000000000, 1) == 0);
+ ASSERT(f(1, 0x8000000000000000) == 1);
+ ASSERT(f(0x7fffffffffffffff, 2) == 1);
+ ASSERT(f(2, 0x7fffffffffffffff) == 2);
+ ASSERT(f(0x8000000000000000, 2) == 0);
+ ASSERT(f(2, 0x8000000000000000) == 2);
+ ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == 0x7fffffffffffffff);
+ ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == 0xffffffffffffffff);
+ ASSERT(f(0x7fffffffffffffff, 0xffffffffffffffff) == 0);
+ ASSERT(f(0xffffffffffffffff, 0x7fffffffffffffff) == 0xffffffffffffffff);
+ ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/remr_u.c b/deps/lightening/tests/remr_u.c
new file mode 100644
index 0000000..a9a0178
--- /dev/null
+++ b/deps/lightening/tests/remr_u.c
@@ -0,0 +1,56 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_remr_u(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = ret;
+
+ ASSERT(f(0x7fffffff, 1) == 0);
+ ASSERT(f(1, 0x7fffffff) == 1);
+ ASSERT(f(0x80000000, 1) == 0);
+ ASSERT(f(1, 0x80000000) == 1);
+ ASSERT(f(0x7fffffff, 2) == 1);
+ ASSERT(f(2, 0x7fffffff) == 2);
+ ASSERT(f(0x80000000, 2) == 0);
+ ASSERT(f(2, 0x80000000) == 2);
+ ASSERT(f(0x7fffffff, 0x80000000) == 0x7fffffff);
+ ASSERT(f(0x80000000, 0x7fffffff) == 1);
+ ASSERT(f(0, 0x7fffffff) == 0);
+ ASSERT(f(0x7fffffff, 0xffffffff) == 0x7fffffff);
+ ASSERT(f(0xffffffff, 0x7fffffff) == 1);
+ ASSERT(f(0xffffffff, 0xffffffff) == 0);
+
+#if __WORDSIZE != 32
+ ASSERT(f(0x7fffffffffffffff, 1) == 0);
+ ASSERT(f(1, 0x7fffffffffffffff) == 1);
+ ASSERT(f(0x8000000000000000, 1) == 0);
+ ASSERT(f(1, 0x8000000000000000) == 1);
+ ASSERT(f(0x7fffffffffffffff, 2) == 1);
+ ASSERT(f(2, 0x7fffffffffffffff) == 2);
+ ASSERT(f(0x8000000000000000, 2) == 0);
+ ASSERT(f(2, 0x8000000000000000) == 2);
+ ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == 0x7fffffffffffffff);
+ ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == 1);
+ ASSERT(f(0x7fffffffffffffff, 0xffffffffffffffff) == 0x7fffffffffffffff);
+ ASSERT(f(0xffffffffffffffff, 0x7fffffffffffffff) == 1);
+ ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/rshi.c b/deps/lightening/tests/rshi.c
new file mode 100644
index 0000000..c536055
--- /dev/null
+++ b/deps/lightening/tests/rshi.c
@@ -0,0 +1,28 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_rshi(j, JIT_R0, JIT_R0, 31);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0x80000000) == -1);
+#else
+ ASSERT(f(0x80000000) == 1);
+ ASSERT(f(0x8000000000000000) == 0xffffffff00000000);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/rshi_u.c b/deps/lightening/tests/rshi_u.c
new file mode 100644
index 0000000..8f6dbd4
--- /dev/null
+++ b/deps/lightening/tests/rshi_u.c
@@ -0,0 +1,28 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_rshi_u(j, JIT_R0, JIT_R0, 31);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ jit_word_t (*f)(jit_word_t) = jit_end(j, NULL);
+
+#if __WORDSIZE == 32
+ ASSERT(f(0x80000000) == 1);
+#else
+ ASSERT(f(0x80000000) == 1);
+ ASSERT(f(0x8000000000000000) == 0x100000000);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/rshr.c b/deps/lightening/tests/rshr.c
new file mode 100644
index 0000000..b4b5689
--- /dev/null
+++ b/deps/lightening/tests/rshr.c
@@ -0,0 +1,63 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_rshr(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = ret;
+
+ ASSERT(f(0xfe, 1) == 0x7f);
+ ASSERT(f(0x1fffc, 2) == 0x7fff);
+ ASSERT(f(0x40000000, 30) == 1);
+ ASSERT(f(0x20000000, 29) == 1);
+ ASSERT(f(0x10000000, 28) == 1);
+ ASSERT(f(0x810000, 16) == 0x81);
+ ASSERT(f(0x20000, 17) == 1);
+ ASSERT(f(0x40000, 18) == 1);
+ ASSERT(f(0x7f8000, 15) == 0xff);
+ ASSERT(f(0x1000000, 24) == 1);
+ ASSERT(f(0x7fffffff, 0) == 0x7fffffff);
+#if __WORDSIZE == 32
+ ASSERT(f(0xfffffff8, 3) == 0xffffffff);
+ ASSERT(f(0x80000000, 31) == 0xffffffff);
+ ASSERT(f(0xffffff00, 8) == 0xffffffff);
+#else
+ ASSERT(f(0x3fffffff8, 3) == 0x7fffffff);
+ ASSERT(f(0xffffffc080000000, 31) == 0xffffffffffffff81);
+ ASSERT(f(0xffffff00, 8) == 0xffffff);
+ ASSERT(f(0xfe00000000, 33) == 0x7f);
+ ASSERT(f(0x1ffffc00000000, 34) == 0x7ffff);
+ ASSERT(f(0xfffffff800000000, 29) == 0xffffffffffffffc0);
+ ASSERT(f(0x8000000000000000, 63) == 0xffffffffffffffff);
+ ASSERT(f(0x4000000000000000, 62) == 1);
+ ASSERT(f(0x2000000000000000, 61) == 1);
+ ASSERT(f(0x1000000000000000, 60) == 1);
+ ASSERT(f(0x81000000000000, 48) == 0x81);
+ ASSERT(f(0x2000000000000, 49) == 1);
+ ASSERT(f(0x10000000000, 40) == 1);
+ ASSERT(f(0x7f800000000000, 47) == 0xff);
+ ASSERT(f(0x100000000000000, 56) == 1);
+ ASSERT(f(0xffffff0000000000, 40) == 0xffffffffffffffff);
+ ASSERT(f(0xfffffffe00000000, 33) == 0xffffffffffffffff);
+ ASSERT(f(0x8000000000000001, 63) == 0xffffffffffffffff);
+ ASSERT(f(0x1000000000000, 48) == 1);
+ ASSERT(f(0xffff800000000000, 47) == 0xffffffffffffffff);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/rshr_u.c b/deps/lightening/tests/rshr_u.c
new file mode 100644
index 0000000..64c59fd
--- /dev/null
+++ b/deps/lightening/tests/rshr_u.c
@@ -0,0 +1,62 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_rshr_u(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = ret;
+
+ ASSERT(f(0xfe, 1) == 0x7f);
+ ASSERT(f(0x1fffc, 2) == 0x7fff);
+ ASSERT(f(0x80000000, 31) == 1);
+ ASSERT(f(0x40000000, 30) == 1);
+ ASSERT(f(0x20000000, 29) == 1);
+ ASSERT(f(0x10000000, 28) == 1);
+ ASSERT(f(0x810000, 16) == 0x81);
+ ASSERT(f(0x20000, 17) == 1);
+ ASSERT(f(0x40000, 18) == 1);
+ ASSERT(f(0x7f8000, 15) == 0xff);
+ ASSERT(f(0x1000000, 24) == 1);
+ ASSERT(f(0xffffff00, 8) == 0xffffff);
+ ASSERT(f(0x7fffffff, 0) == 0x7fffffff);
+#if __WORDSIZE == 32
+ ASSERT(f(0xfffffff8, 3) == 0x1fffffff);
+#else
+ ASSERT(f(0x3fffffff8, 3) == 0x7fffffff);
+ ASSERT(f(0xffffffc080000000, 31) == 0x1ffffff81);
+ ASSERT(f(0xfe00000000, 33) == 0x7f);
+ ASSERT(f(0x1ffffc00000000, 34) == 0x7ffff);
+ ASSERT(f(0xfffffff800000000, 29) == 0x7ffffffc0);
+ ASSERT(f(0x8000000000000000, 63) == 1);
+ ASSERT(f(0x4000000000000000, 62) == 1);
+ ASSERT(f(0x2000000000000000, 61) == 1);
+ ASSERT(f(0x1000000000000000, 60) == 1);
+ ASSERT(f(0x81000000000000, 48) == 0x81);
+ ASSERT(f(0x2000000000000, 49) == 1);
+ ASSERT(f(0x10000000000, 40) == 1);
+ ASSERT(f(0x7f800000000000, 47) == 0xff);
+ ASSERT(f(0x100000000000000, 56) == 1);
+ ASSERT(f(0xffffff0000000000, 40) == 0xffffff);
+ ASSERT(f(0xfffffffe00000000, 33) == 0x7fffffff);
+ ASSERT(f(0x8000000000000001, 63) == 1);
+ ASSERT(f(0x1000000000000, 48) == 1);
+ ASSERT(f(0xffff800000000000, 47) == 0x1ffff);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/sqrtr_d.c b/deps/lightening/tests/sqrtr_d.c
new file mode 100644
index 0000000..873deb9
--- /dev/null
+++ b/deps/lightening/tests/sqrtr_d.c
@@ -0,0 +1,25 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0));
+
+ jit_sqrtr_d(j, JIT_F0, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_d(j, JIT_F0);
+
+ double (*f)(double) = jit_end(j, NULL);
+
+ ASSERT(f(0.0) == 0.0);
+ ASSERT(f(4.0) == 2.0);
+ ASSERT(f(-4.0) != f(-4.0)); // nan
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/sqrtr_f.c b/deps/lightening/tests/sqrtr_f.c
new file mode 100644
index 0000000..66db831
--- /dev/null
+++ b/deps/lightening/tests/sqrtr_f.c
@@ -0,0 +1,25 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0));
+
+ jit_sqrtr_f(j, JIT_F0, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_f(j, JIT_F0);
+
+ float (*f)(float) = jit_end(j, NULL);
+
+ ASSERT(f(0.0) == 0.0);
+ ASSERT(f(4.0) == 2.0);
+ ASSERT(f(-4.0) != f(-4.0)); // nan
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/sti_c.c b/deps/lightening/tests/sti_c.c
new file mode 100644
index 0000000..ff6e6d5
--- /dev/null
+++ b/deps/lightening/tests/sti_c.c
@@ -0,0 +1,31 @@
+#include "test.h"
+
+static uint8_t data[] = { 0x12, 0x00, 0x34 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_INT8, JIT_R1));
+
+ jit_sti_c(j, &data[1], JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(int8_t) = jit_end(j, NULL);
+
+ ASSERT(data[0] == 0x12);
+ ASSERT(data[1] == 0x00);
+ ASSERT(data[2] == 0x34);
+ f(-1);
+ ASSERT(data[0] == 0x12);
+ ASSERT(data[1] == 0xff);
+ ASSERT(data[2] == 0x34);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/sti_d.c b/deps/lightening/tests/sti_d.c
new file mode 100644
index 0000000..8a703e6
--- /dev/null
+++ b/deps/lightening/tests/sti_d.c
@@ -0,0 +1,31 @@
+#include "test.h"
+
+static double data[] = { -1.0, 0.0, 0.5 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0));
+
+ jit_sti_d(j, &data[1], JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(double) = jit_end(j, NULL);
+
+ ASSERT(data[0] == -1.0);
+ ASSERT(data[1] == 0.0);
+ ASSERT(data[2] == 0.5);
+ f(42.5);
+ ASSERT(data[0] == -1.0);
+ ASSERT(data[1] == 42.5);
+ ASSERT(data[2] == 0.5);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/sti_f.c b/deps/lightening/tests/sti_f.c
new file mode 100644
index 0000000..e027192
--- /dev/null
+++ b/deps/lightening/tests/sti_f.c
@@ -0,0 +1,31 @@
+#include "test.h"
+
+static float data[] = { -1.0, 0.0, 0.5 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0));
+
+ jit_sti_f(j, &data[1], JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(float) = jit_end(j, NULL);
+
+ ASSERT(data[0] == -1.0f);
+ ASSERT(data[1] == 0.0f);
+ ASSERT(data[2] == 0.5f);
+ f(42.5f);
+ ASSERT(data[0] == -1.0f);
+ ASSERT(data[1] == 42.5f);
+ ASSERT(data[2] == 0.5f);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/sti_i.c b/deps/lightening/tests/sti_i.c
new file mode 100644
index 0000000..4a233c6
--- /dev/null
+++ b/deps/lightening/tests/sti_i.c
@@ -0,0 +1,31 @@
+#include "test.h"
+
+static uint32_t data[] = { 0x12121212, 0x00000000, 0x34343434 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_INT32, JIT_R1));
+
+ jit_sti_i(j, &data[1], JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(int32_t) = jit_end(j, NULL);
+
+ ASSERT(data[0] == 0x12121212);
+ ASSERT(data[1] == 0x00);
+ ASSERT(data[2] == 0x34343434);
+ f(-1);
+ ASSERT(data[0] == 0x12121212);
+ ASSERT(data[1] == 0xffffffff);
+ ASSERT(data[2] == 0x34343434);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/sti_l.c b/deps/lightening/tests/sti_l.c
new file mode 100644
index 0000000..fce9180
--- /dev/null
+++ b/deps/lightening/tests/sti_l.c
@@ -0,0 +1,33 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+#if __WORDSIZE > 32
+ static uint64_t data[] = { 0x1212121212121212, 0, 0x3434343434343434 };
+
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_INT64, JIT_R1));
+
+ jit_sti_l(j, &data[1], JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(int64_t) = jit_end(j, NULL);
+
+ ASSERT(data[0] == 0x1212121212121212);
+ ASSERT(data[1] == 0x00);
+ ASSERT(data[2] == 0x3434343434343434);
+ f(-1);
+ ASSERT(data[0] == 0x1212121212121212);
+ ASSERT(data[1] == 0xffffffffffffffff);
+ ASSERT(data[2] == 0x3434343434343434);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/sti_s.c b/deps/lightening/tests/sti_s.c
new file mode 100644
index 0000000..daab0bd
--- /dev/null
+++ b/deps/lightening/tests/sti_s.c
@@ -0,0 +1,31 @@
+#include "test.h"
+
+static uint16_t data[] = { 0x1212, 0x0000, 0x3434 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_INT16, JIT_R1));
+
+ jit_sti_s(j, &data[1], JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(int16_t) = jit_end(j, NULL);
+
+ ASSERT(data[0] == 0x1212);
+ ASSERT(data[1] == 0);
+ ASSERT(data[2] == 0x3434);
+ f(-1);
+ ASSERT(data[0] == 0x1212);
+ ASSERT(data[1] == 0xffff);
+ ASSERT(data[2] == 0x3434);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/str_atomic.c b/deps/lightening/tests/str_atomic.c
new file mode 100644
index 0000000..9098c2a
--- /dev/null
+++ b/deps/lightening/tests/str_atomic.c
@@ -0,0 +1,32 @@
+#include "test.h"
+
+static long data[] = { 0x12121212, 0x00000000, 0x34343434 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_INT32, JIT_R1));
+
+ jit_str_atomic(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(void*, int32_t) = jit_end(j, NULL);
+
+ ASSERT(data[0] == 0x12121212);
+ ASSERT(data[1] == 0x00);
+ ASSERT(data[2] == 0x34343434);
+ f(&data[1], 0x0f0f0f0f);
+ ASSERT(data[0] == 0x12121212);
+ ASSERT(data[1] == 0x0f0f0f0f);
+ ASSERT(data[2] == 0x34343434);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/str_c.c b/deps/lightening/tests/str_c.c
new file mode 100644
index 0000000..b894b82
--- /dev/null
+++ b/deps/lightening/tests/str_c.c
@@ -0,0 +1,32 @@
+#include "test.h"
+
+static uint8_t data[] = { 0x12, 0x00, 0x34 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_INT8, JIT_R1));
+
+ jit_str_c(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(void*, int8_t) = jit_end(j, NULL);
+
+ ASSERT(data[0] == 0x12);
+ ASSERT(data[1] == 0x00);
+ ASSERT(data[2] == 0x34);
+ f(&data[1], -1);
+ ASSERT(data[0] == 0x12);
+ ASSERT(data[1] == 0xff);
+ ASSERT(data[2] == 0x34);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/str_d.c b/deps/lightening/tests/str_d.c
new file mode 100644
index 0000000..2f992a6
--- /dev/null
+++ b/deps/lightening/tests/str_d.c
@@ -0,0 +1,32 @@
+#include "test.h"
+
+static double data[] = { -1.0, 0.0, 0.5 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0));
+
+ jit_str_d(j, JIT_R0, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(void*, double) = jit_end(j, NULL);
+
+ ASSERT(data[0] == -1.0);
+ ASSERT(data[1] == 0.0);
+ ASSERT(data[2] == 0.5);
+ f(&data[1], 42.5);
+ ASSERT(data[0] == -1.0);
+ ASSERT(data[1] == 42.5);
+ ASSERT(data[2] == 0.5);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/str_f.c b/deps/lightening/tests/str_f.c
new file mode 100644
index 0000000..fdad3c2
--- /dev/null
+++ b/deps/lightening/tests/str_f.c
@@ -0,0 +1,32 @@
+#include "test.h"
+
+static float data[] = { -1.0, 0.0, 0.5 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0));
+
+ jit_str_f(j, JIT_R0, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(void*, float) = jit_end(j, NULL);
+
+ ASSERT(data[0] == -1.0f);
+ ASSERT(data[1] == 0.0f);
+ ASSERT(data[2] == 0.5f);
+ f(&data[1], 42.5f);
+ ASSERT(data[0] == -1.0f);
+ ASSERT(data[1] == 42.5f);
+ ASSERT(data[2] == 0.5f);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/str_i.c b/deps/lightening/tests/str_i.c
new file mode 100644
index 0000000..968f0ce
--- /dev/null
+++ b/deps/lightening/tests/str_i.c
@@ -0,0 +1,32 @@
+#include "test.h"
+
+static uint32_t data[] = { 0x12121212, 0x00000000, 0x34343434 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_INT32, JIT_R1));
+
+ jit_str_i(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(void*, int32_t) = jit_end(j, NULL);
+
+ ASSERT(data[0] == 0x12121212);
+ ASSERT(data[1] == 0x00);
+ ASSERT(data[2] == 0x34343434);
+ f(&data[1], -1);
+ ASSERT(data[0] == 0x12121212);
+ ASSERT(data[1] == 0xffffffff);
+ ASSERT(data[2] == 0x34343434);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/str_l.c b/deps/lightening/tests/str_l.c
new file mode 100644
index 0000000..450885b
--- /dev/null
+++ b/deps/lightening/tests/str_l.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+#if __WORDSIZE > 32
+ static uint64_t data[] = { 0x1212121212121212, 0, 0x3434343434343434 };
+
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_INT64, JIT_R1));
+
+ jit_str_l(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(void*, int64_t) = jit_end(j, NULL);
+
+ ASSERT(data[0] == 0x1212121212121212);
+ ASSERT(data[1] == 0x00);
+ ASSERT(data[2] == 0x3434343434343434);
+ f(&data[1], -1);
+ ASSERT(data[0] == 0x1212121212121212);
+ ASSERT(data[1] == 0xffffffffffffffff);
+ ASSERT(data[2] == 0x3434343434343434);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/str_s.c b/deps/lightening/tests/str_s.c
new file mode 100644
index 0000000..3e228ed
--- /dev/null
+++ b/deps/lightening/tests/str_s.c
@@ -0,0 +1,32 @@
+#include "test.h"
+
+static uint16_t data[] = { 0x1212, 0x0000, 0x3434 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_INT16, JIT_R1));
+
+ jit_str_s(j, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(void*, int16_t) = jit_end(j, NULL);
+
+ ASSERT(data[0] == 0x1212);
+ ASSERT(data[1] == 0);
+ ASSERT(data[2] == 0x3434);
+ f(&data[1], -1);
+ ASSERT(data[0] == 0x1212);
+ ASSERT(data[1] == 0xffff);
+ ASSERT(data[2] == 0x3434);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/stxi_c.c b/deps/lightening/tests/stxi_c.c
new file mode 100644
index 0000000..d76d814
--- /dev/null
+++ b/deps/lightening/tests/stxi_c.c
@@ -0,0 +1,32 @@
+#include "test.h"
+
+static uint8_t data[] = { 0x12, 0x00, 0x34 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2),
+ jit_operand_gpr (JIT_OPERAND_ABI_INT8, JIT_R1));
+
+ jit_stxi_c(j, (uintptr_t)data, JIT_R2, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(jit_word_t, int8_t) = jit_end(j, NULL);
+
+ ASSERT(data[0] == 0x12);
+ ASSERT(data[1] == 0x00);
+ ASSERT(data[2] == 0x34);
+ f(1, -1);
+ ASSERT(data[0] == 0x12);
+ ASSERT(data[1] == 0xff);
+ ASSERT(data[2] == 0x34);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/stxi_d.c b/deps/lightening/tests/stxi_d.c
new file mode 100644
index 0000000..3933c56
--- /dev/null
+++ b/deps/lightening/tests/stxi_d.c
@@ -0,0 +1,32 @@
+#include "test.h"
+
+static double data[] = { -1.0, 0.0, 0.5 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0));
+
+ jit_stxi_d(j, (uintptr_t)data, JIT_R2, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(jit_word_t, double) = jit_end(j, NULL);
+
+ ASSERT(data[0] == -1.0);
+ ASSERT(data[1] == 0.0);
+ ASSERT(data[2] == 0.5);
+ f(8, 42.5);
+ ASSERT(data[0] == -1.0);
+ ASSERT(data[1] == 42.5);
+ ASSERT(data[2] == 0.5);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/stxi_f.c b/deps/lightening/tests/stxi_f.c
new file mode 100644
index 0000000..aea6756
--- /dev/null
+++ b/deps/lightening/tests/stxi_f.c
@@ -0,0 +1,32 @@
+#include "test.h"
+
+static float data[] = { -1.0, 0.0, 0.5 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0));
+
+ jit_stxi_f(j, (uintptr_t)data, JIT_R2, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(jit_word_t, float) = jit_end(j, NULL);
+
+ ASSERT(data[0] == -1.0f);
+ ASSERT(data[1] == 0.0f);
+ ASSERT(data[2] == 0.5f);
+ f(4, 42.5f);
+ ASSERT(data[0] == -1.0f);
+ ASSERT(data[1] == 42.5f);
+ ASSERT(data[2] == 0.5f);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/stxi_i.c b/deps/lightening/tests/stxi_i.c
new file mode 100644
index 0000000..79dab03
--- /dev/null
+++ b/deps/lightening/tests/stxi_i.c
@@ -0,0 +1,32 @@
+#include "test.h"
+
+static uint32_t data[] = { 0x12121212, 0x00000000, 0x34343434 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2),
+ jit_operand_gpr (JIT_OPERAND_ABI_INT32, JIT_R1));
+
+ jit_stxi_i(j, (uintptr_t)data, JIT_R2, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(jit_word_t, int32_t) = jit_end(j, NULL);
+
+ ASSERT(data[0] == 0x12121212);
+ ASSERT(data[1] == 0x00);
+ ASSERT(data[2] == 0x34343434);
+ f(4, -1);
+ ASSERT(data[0] == 0x12121212);
+ ASSERT(data[1] == 0xffffffff);
+ ASSERT(data[2] == 0x34343434);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/stxi_l.c b/deps/lightening/tests/stxi_l.c
new file mode 100644
index 0000000..8a68241
--- /dev/null
+++ b/deps/lightening/tests/stxi_l.c
@@ -0,0 +1,34 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+#if __WORDSIZE > 32
+ static uint64_t data[] = { 0x1212121212121212, 0, 0x3434343434343434 };
+
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2),
+ jit_operand_gpr (JIT_OPERAND_ABI_INT64, JIT_R1));
+
+ jit_stxi_l(j, (uintptr_t)data, JIT_R2, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(jit_word_t, int64_t) = jit_end(j, NULL);
+
+ ASSERT(data[0] == 0x1212121212121212);
+ ASSERT(data[1] == 0x00);
+ ASSERT(data[2] == 0x3434343434343434);
+ f(8, -1);
+ ASSERT(data[0] == 0x1212121212121212);
+ ASSERT(data[1] == 0xffffffffffffffff);
+ ASSERT(data[2] == 0x3434343434343434);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/stxi_s.c b/deps/lightening/tests/stxi_s.c
new file mode 100644
index 0000000..64bda5d
--- /dev/null
+++ b/deps/lightening/tests/stxi_s.c
@@ -0,0 +1,32 @@
+#include "test.h"
+
+static uint16_t data[] = { 0x1212, 0x0000, 0x3434 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2),
+ jit_operand_gpr (JIT_OPERAND_ABI_INT16, JIT_R1));
+
+ jit_stxi_s(j, (uintptr_t)data, JIT_R2, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(jit_word_t, int16_t) = jit_end(j, NULL);
+
+ ASSERT(data[0] == 0x1212);
+ ASSERT(data[1] == 0);
+ ASSERT(data[2] == 0x3434);
+ f(2, -1);
+ ASSERT(data[0] == 0x1212);
+ ASSERT(data[1] == 0xffff);
+ ASSERT(data[2] == 0x3434);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/stxr_c.c b/deps/lightening/tests/stxr_c.c
new file mode 100644
index 0000000..8876855
--- /dev/null
+++ b/deps/lightening/tests/stxr_c.c
@@ -0,0 +1,33 @@
+#include "test.h"
+
+static uint8_t data[] = { 0x12, 0x00, 0x34 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_3(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2),
+ jit_operand_gpr (JIT_OPERAND_ABI_INT8, JIT_R1));
+
+ jit_stxr_c(j, JIT_R0, JIT_R2, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(void*, jit_word_t, int8_t) = jit_end(j, NULL);
+
+ ASSERT(data[0] == 0x12);
+ ASSERT(data[1] == 0x00);
+ ASSERT(data[2] == 0x34);
+ f(data, 1, -1);
+ ASSERT(data[0] == 0x12);
+ ASSERT(data[1] == 0xff);
+ ASSERT(data[2] == 0x34);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/stxr_d.c b/deps/lightening/tests/stxr_d.c
new file mode 100644
index 0000000..e87688a
--- /dev/null
+++ b/deps/lightening/tests/stxr_d.c
@@ -0,0 +1,33 @@
+#include "test.h"
+
+static double data[] = { -1.0, 0.0, 0.5 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_3(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0));
+
+ jit_stxr_d(j, JIT_R0, JIT_R2, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(void*, jit_word_t, double) = jit_end(j, NULL);
+
+ ASSERT(data[0] == -1.0);
+ ASSERT(data[1] == 0.0);
+ ASSERT(data[2] == 0.5);
+ f(data, 8, 42.5);
+ ASSERT(data[0] == -1.0);
+ ASSERT(data[1] == 42.5);
+ ASSERT(data[2] == 0.5);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/stxr_f.c b/deps/lightening/tests/stxr_f.c
new file mode 100644
index 0000000..bf0c476
--- /dev/null
+++ b/deps/lightening/tests/stxr_f.c
@@ -0,0 +1,33 @@
+#include "test.h"
+
+static float data[] = { -1.0, 0.0, 0.5 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_3(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0));
+
+ jit_stxr_f(j, JIT_R0, JIT_R2, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(void*, jit_word_t, float) = jit_end(j, NULL);
+
+ ASSERT(data[0] == -1.0f);
+ ASSERT(data[1] == 0.0f);
+ ASSERT(data[2] == 0.5f);
+ f(data, 4, 42.5f);
+ ASSERT(data[0] == -1.0f);
+ ASSERT(data[1] == 42.5f);
+ ASSERT(data[2] == 0.5f);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/stxr_i.c b/deps/lightening/tests/stxr_i.c
new file mode 100644
index 0000000..8260462
--- /dev/null
+++ b/deps/lightening/tests/stxr_i.c
@@ -0,0 +1,33 @@
+#include "test.h"
+
+static uint32_t data[] = { 0x12121212, 0x00000000, 0x34343434 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_3(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2),
+ jit_operand_gpr (JIT_OPERAND_ABI_INT32, JIT_R1));
+
+ jit_stxr_i(j, JIT_R0, JIT_R2, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(void*, jit_word_t, int32_t) = jit_end(j, NULL);
+
+ ASSERT(data[0] == 0x12121212);
+ ASSERT(data[1] == 0x00);
+ ASSERT(data[2] == 0x34343434);
+ f(data, 4, -1);
+ ASSERT(data[0] == 0x12121212);
+ ASSERT(data[1] == 0xffffffff);
+ ASSERT(data[2] == 0x34343434);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/stxr_l.c b/deps/lightening/tests/stxr_l.c
new file mode 100644
index 0000000..fa6bb1f
--- /dev/null
+++ b/deps/lightening/tests/stxr_l.c
@@ -0,0 +1,35 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+#if __WORDSIZE > 32
+ static uint64_t data[] = { 0x1212121212121212, 0, 0x3434343434343434 };
+
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_3(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2),
+ jit_operand_gpr (JIT_OPERAND_ABI_INT64, JIT_R1));
+
+ jit_stxr_l(j, JIT_R0, JIT_R2, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(void*, jit_word_t, int64_t) = jit_end(j, NULL);
+
+ ASSERT(data[0] == 0x1212121212121212);
+ ASSERT(data[1] == 0x00);
+ ASSERT(data[2] == 0x3434343434343434);
+ f(data, 8, -1);
+ ASSERT(data[0] == 0x1212121212121212);
+ ASSERT(data[1] == 0xffffffffffffffff);
+ ASSERT(data[2] == 0x3434343434343434);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/stxr_s.c b/deps/lightening/tests/stxr_s.c
new file mode 100644
index 0000000..a93ccd9
--- /dev/null
+++ b/deps/lightening/tests/stxr_s.c
@@ -0,0 +1,33 @@
+#include "test.h"
+
+static uint16_t data[] = { 0x1212, 0x0000, 0x3434 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_3(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2),
+ jit_operand_gpr (JIT_OPERAND_ABI_INT16, JIT_R1));
+
+ jit_stxr_s(j, JIT_R0, JIT_R2, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(void*, jit_word_t, int16_t) = jit_end(j, NULL);
+
+ ASSERT(data[0] == 0x1212);
+ ASSERT(data[1] == 0);
+ ASSERT(data[2] == 0x3434);
+ f(data, 2, -1);
+ ASSERT(data[0] == 0x1212);
+ ASSERT(data[1] == 0xffff);
+ ASSERT(data[2] == 0x3434);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/subr.c b/deps/lightening/tests/subr.c
new file mode 100644
index 0000000..57cf950
--- /dev/null
+++ b/deps/lightening/tests/subr.c
@@ -0,0 +1,26 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_subr(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ int (*f)(int, int) = ret;
+ ASSERT(f(42, 69) == -27);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/subr_d.c b/deps/lightening/tests/subr_d.c
new file mode 100644
index 0000000..bc611c5
--- /dev/null
+++ b/deps/lightening/tests/subr_d.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1));
+
+ jit_subr_d(j, JIT_F0, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_d(j, JIT_F0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ double (*f)(double, double) = ret;
+ ASSERT(f(42., 69.) == -27.);
+ ASSERT(f(42., 69.5) == -27.5);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/subr_f.c b/deps/lightening/tests/subr_f.c
new file mode 100644
index 0000000..a7befec
--- /dev/null
+++ b/deps/lightening/tests/subr_f.c
@@ -0,0 +1,27 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0),
+ jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1));
+
+ jit_subr_f(j, JIT_F0, JIT_F0, JIT_F1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr_f(j, JIT_F0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ float (*f)(float, float) = ret;
+ ASSERT(f(42.f, 69.f) == -27.f);
+ ASSERT(f(42.0f, 69.5f) == -27.5f);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/subx.c b/deps/lightening/tests/subx.c
new file mode 100644
index 0000000..b88bcbd
--- /dev/null
+++ b/deps/lightening/tests/subx.c
@@ -0,0 +1,63 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_movi(j, JIT_R2, 0);
+ jit_subcr(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_subxi(j, JIT_R2, JIT_R2, 0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R2);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = ret;
+
+ ASSERT(f(0, 0) == 0);
+
+#if __WORDSIZE == 32
+ /* carry */
+ ASSERT(f(0x7fffffff, 0xffffffff) == 0xffffffff);
+ /* overflow */
+ ASSERT(f(0x80000000, 1) == 0);
+ /* carry */
+ ASSERT(f(0x7fffffff, 0x80000000) == 0xffffffff);
+ /* overflow */
+ ASSERT(f(0x80000000, 0x7fffffff) == 0);
+ /* carry+overflow */
+ ASSERT(f(1, 0x80000000) == 0xffffffff);
+#else
+ /* carry */
+ ASSERT(f(0x7fffffff, 0xffffffff) == -1);
+ /* nothing */
+ ASSERT(f(0x80000000, 1) == 0);
+ /* carry */
+ ASSERT(f(0x7fffffff, 0x80000000) == -1);
+ /* nothing */
+ ASSERT(f(0x80000000, 0x7fffffff) == 0);
+ /* carry */
+ ASSERT(f(1, 0x80000000) == -1);
+ /* carry */
+ ASSERT(f(0x7fffffffffffffff, 0xffffffffffffffff) == -1);
+ /* overflow */
+ ASSERT(f(0x8000000000000000, 1) == 0);
+ /* carry */
+ ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == -1);
+ /* overflow */
+ ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == 0);
+ /* carry+overflow */
+ ASSERT(f(1, 0x8000000000000000) == -1);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/swap_atomic.c b/deps/lightening/tests/swap_atomic.c
new file mode 100644
index 0000000..fffa05e
--- /dev/null
+++ b/deps/lightening/tests/swap_atomic.c
@@ -0,0 +1,32 @@
+#include "test.h"
+
+static long data[] = { 0x12121212, 0x00000000, 0x34343434 };
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_INT32, JIT_R1));
+
+ jit_swap_atomic(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_ret(j);
+
+ void (*f)(void*, int32_t) = jit_end(j, NULL);
+
+ ASSERT(data[0] == 0x12121212);
+ ASSERT(data[1] == 0x00);
+ ASSERT(data[2] == 0x34343434);
+ f(&data[1], 0x0f0f0f0f);
+ ASSERT(data[0] == 0x12121212);
+ ASSERT(data[1] == 0x0f0f0f0f);
+ ASSERT(data[2] == 0x34343434);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/test.h b/deps/lightening/tests/test.h
new file mode 100644
index 0000000..c4eff2b
--- /dev/null
+++ b/deps/lightening/tests/test.h
@@ -0,0 +1,79 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include <lightening.h>
+
+#define ASSERT(x) \
+ do { \
+ if (!(x)) { \
+ fprintf(stderr, "%s:%d: assertion failed: " #x "\n", \
+ __FILE__, __LINE__); \
+ abort(); \
+ } \
+ } while (0)
+
+static inline int
+main_helper (int argc, char *argv[],
+ void (*run_test)(jit_state_t*, uint8_t*, size_t))
+{
+ ASSERT(init_jit());
+ jit_state_t *j = jit_new_state (NULL, NULL);
+ ASSERT(j);
+
+ const size_t arena_size = 4096;
+ char *arena_base = mmap (NULL, arena_size,
+ PROT_EXEC | PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (arena_base == MAP_FAILED)
+ {
+ perror ("allocating JIT code buffer failed");
+ return 1;
+ }
+
+ run_test(j, (uint8_t*)arena_base, arena_size);
+
+ jit_destroy_state(j);
+
+ munmap(arena_base, arena_size);
+
+ return 0;
+}
+
+static inline int
+main_compiler (int argc, char *argv[],
+ size_t (*run_test)(jit_state_t*, uint8_t*, size_t))
+{
+ ASSERT(init_jit());
+ jit_state_t *j = jit_new_state (NULL, NULL);
+ ASSERT(j);
+
+ size_t arena_size = 4096, prev_arena_size = arena_size;
+ uint8_t *arena_base = NULL;
+ do {
+ if (arena_base) {
+ if (munmap(arena_base, prev_arena_size) == -1) {
+ perror("unmapping arena failed");
+ return 1;
+ }
+ }
+
+ prev_arena_size = arena_size;
+
+ arena_base = mmap (NULL, arena_size,
+ PROT_EXEC | PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (arena_base == MAP_FAILED) {
+ perror ("allocating JIT code buffer failed");
+ return 1;
+ }
+ } while ((arena_size = run_test(j, arena_base, arena_size)) != 0);
+
+ jit_destroy_state(j);
+
+ munmap(arena_base, arena_size);
+
+ return 0;
+}
diff --git a/deps/lightening/tests/truncr_d_i.c b/deps/lightening/tests/truncr_d_i.c
new file mode 100644
index 0000000..b21280f
--- /dev/null
+++ b/deps/lightening/tests/truncr_d_i.c
@@ -0,0 +1,30 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0));
+
+ jit_truncr_d_i(j, JIT_R0, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ int (*f)(double) = jit_end(j, NULL);
+
+ ASSERT(f(0.0) == 0);
+ ASSERT(f(-0.0) == 0);
+ ASSERT(f(0.5) == 0);
+ ASSERT(f(-0.5) == 0);
+ ASSERT(f(1.5) == 1);
+ ASSERT(f(-1.5) == -1);
+ ASSERT(f(2.5) == 2);
+ ASSERT(f(-2.5) == -2);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/truncr_d_l.c b/deps/lightening/tests/truncr_d_l.c
new file mode 100644
index 0000000..189617a
--- /dev/null
+++ b/deps/lightening/tests/truncr_d_l.c
@@ -0,0 +1,32 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+#if __WORDSIZE > 32
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0));
+
+ jit_truncr_d_l(j, JIT_R0, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ int64_t (*f)(double) = jit_end(j, NULL);
+
+ ASSERT(f(0.0) == 0);
+ ASSERT(f(-0.0) == 0);
+ ASSERT(f(0.5) == 0);
+ ASSERT(f(-0.5) == 0);
+ ASSERT(f(1.5) == 1);
+ ASSERT(f(-1.5) == -1);
+ ASSERT(f(2.5) == 2);
+ ASSERT(f(-2.5) == -2);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/truncr_f_i.c b/deps/lightening/tests/truncr_f_i.c
new file mode 100644
index 0000000..3dbf630
--- /dev/null
+++ b/deps/lightening/tests/truncr_f_i.c
@@ -0,0 +1,30 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0));
+
+ jit_truncr_f_i(j, JIT_R0, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ int (*f)(float) = jit_end(j, NULL);
+
+ ASSERT(f(0.0) == 0);
+ ASSERT(f(-0.0) == 0);
+ ASSERT(f(0.5) == 0);
+ ASSERT(f(-0.5) == 0);
+ ASSERT(f(1.5) == 1);
+ ASSERT(f(-1.5) == -1);
+ ASSERT(f(2.5) == 2);
+ ASSERT(f(-2.5) == -2);
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/truncr_f_l.c b/deps/lightening/tests/truncr_f_l.c
new file mode 100644
index 0000000..7369ae3
--- /dev/null
+++ b/deps/lightening/tests/truncr_f_l.c
@@ -0,0 +1,32 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+#if __WORDSIZE > 32
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0));
+
+ jit_truncr_f_l(j, JIT_R0, JIT_F0);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ int64_t (*f)(float) = jit_end(j, NULL);
+
+ ASSERT(f(0.0) == 0);
+ ASSERT(f(-0.0) == 0);
+ ASSERT(f(0.5) == 0);
+ ASSERT(f(-0.5) == 0);
+ ASSERT(f(1.5) == 1);
+ ASSERT(f(-1.5) == -1);
+ ASSERT(f(2.5) == 2);
+ ASSERT(f(-2.5) == -2);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/xori.c b/deps/lightening/tests/xori.c
new file mode 100644
index 0000000..4bb2ad1
--- /dev/null
+++ b/deps/lightening/tests/xori.c
@@ -0,0 +1,31 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0));
+
+ jit_xori(j, JIT_R0, JIT_R0, 1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ jit_word_t (*f)(jit_word_t) = ret;
+
+ ASSERT(f(0x7fffffff) == 0x7ffffffe);
+ ASSERT(f(0x80000000) == 0x80000001);
+#if __WORDSIZE == 64
+ ASSERT(f(0x7fffffffffffffff) == 0x7ffffffffffffffe);
+ ASSERT(f(0x8000000000000000) == 0x8000000000000001);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/xorr.c b/deps/lightening/tests/xorr.c
new file mode 100644
index 0000000..dd5a390
--- /dev/null
+++ b/deps/lightening/tests/xorr.c
@@ -0,0 +1,48 @@
+#include "test.h"
+
+static void
+run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(j, arena_base, arena_size);
+ size_t align = jit_enter_jit_abi(j, 0, 0, 0);
+ jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0),
+ jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1));
+
+ jit_xorr(j, JIT_R0, JIT_R0, JIT_R1);
+ jit_leave_jit_abi(j, 0, 0, align);
+ jit_retr(j, JIT_R0);
+
+ size_t size = 0;
+ void* ret = jit_end(j, &size);
+
+ jit_word_t (*f)(jit_word_t, jit_word_t) = ret;
+
+ ASSERT(f(0x7fffffff, 1) == 0x7ffffffe);
+ ASSERT(f(1, 0x7fffffff) == 0x7ffffffe);
+ ASSERT(f(0x80000000, 1) == 0x80000001);
+ ASSERT(f(1, 0x80000000) == 0x80000001);
+ ASSERT(f(0x7fffffff, 0x80000000) == 0xffffffff);
+ ASSERT(f(0x80000000, 0x7fffffff) == 0xffffffff);
+ ASSERT(f(0x7fffffff, 0xffffffff) == 0x80000000);
+ ASSERT(f(0xffffffff, 0x7fffffff) == 0x80000000);
+ ASSERT(f(0xffffffff, 0xffffffff) == 0);
+ ASSERT(f(0x7fffffff, 0) == 0x7fffffff);
+ ASSERT(f(0, 0x7fffffff) == 0x7fffffff);
+#if __WORDSIZE == 64
+ ASSERT(f(0x7fffffffffffffff, 1) == 0x7ffffffffffffffe);
+ ASSERT(f(1, 0x7fffffffffffffff) == 0x7ffffffffffffffe);
+ ASSERT(f(0x8000000000000000, 1) == 0x8000000000000001);
+ ASSERT(f(1, 0x8000000000000000) == 0x8000000000000001);
+ ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == 0xffffffffffffffff);
+ ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == 0xffffffffffffffff);
+ ASSERT(f(0x7fffffffffffffff, 0xffffffffffffffff) == 0x8000000000000000);
+ ASSERT(f(0xffffffffffffffff, 0x7fffffffffffffff) == 0x8000000000000000);
+ ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 0);
+#endif
+}
+
+int
+main (int argc, char *argv[])
+{
+ return main_helper(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/z_atomic.c b/deps/lightening/tests/z_atomic.c
new file mode 100644
index 0000000..8612d15
--- /dev/null
+++ b/deps/lightening/tests/z_atomic.c
@@ -0,0 +1,88 @@
+#include "test.h"
+
+#include <threads.h>
+
+/* note non-atomic counter! */
+size_t num = 0;
+long lock_var = 0;
+void (*spin_lock)(void);
+void (*spin_unlock)(void);
+
+/* arbitrary number, as long as its large enough to likely allow other threads
+ * to spawn. */
+#define THREAD_INCREMENTS 1000000
+static int
+loop(void *arg)
+{
+ for (size_t i = 0; i < THREAD_INCREMENTS; ++i) {
+ (*spin_lock)();
+ num++;
+ (*spin_unlock)();
+ }
+
+ return 0;
+}
+
+#define NUM_THREADS 10
+static void
+run_loops()
+{
+ thrd_t threads[NUM_THREADS];
+ for (size_t i = 0; i < NUM_THREADS; ++i)
+ ASSERT(thrd_create(&threads[i], loop, NULL) == thrd_success);
+
+ for (size_t i = 0; i < NUM_THREADS; ++i)
+ ASSERT(thrd_join(threads[i], NULL) == thrd_success);
+
+ ASSERT(num == NUM_THREADS * THREAD_INCREMENTS);
+}
+
+static size_t
+run_test(jit_state_t *_jit, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(_jit, arena_base, arena_size);
+
+ /* based on https://rigtorp.se/spinlock/ */
+ spin_lock = jit_address(_jit);
+ {
+ size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0);
+ void *do_exchange = jit_address(_jit);
+ void *while_load = jit_address(_jit);
+ /* do { */
+ /* while (atomic_load(lock_var)); */
+ jit_movi(_jit, JIT_R1, (jit_imm_t)&lock_var);
+ jit_ldr_atomic(_jit, JIT_R0, JIT_R1);
+ jit_patch_there(_jit, jit_bnei(_jit, JIT_R0, 0), while_load);
+ /* } while (atomic_exchange(lock_var, 1)); */
+ jit_movi(_jit, JIT_R0, 1);
+ jit_swap_atomic(_jit, JIT_R0, JIT_R1, JIT_R0);
+ jit_patch_there(_jit, jit_bnei(_jit, JIT_R0, 0), do_exchange);
+ jit_leave_jit_abi(_jit, 0, 0, frame);
+ jit_ret(_jit);
+ }
+
+ spin_unlock = jit_address(_jit);
+ {
+ size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0);
+ jit_movi(_jit, JIT_R0, 0);
+ jit_movi(_jit, JIT_R1, (jit_imm_t)&lock_var);
+ jit_str_atomic(_jit, JIT_R1, JIT_R0);
+ jit_leave_jit_abi(_jit, 0, 0, frame);
+ jit_ret(_jit);
+ }
+
+ size_t size;
+ void *p = jit_end(_jit, &size);
+
+ if (p)
+ run_loops();
+ else
+ return size;
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return main_compiler(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/z_bp.c b/deps/lightening/tests/z_bp.c
new file mode 100644
index 0000000..57f7bfa
--- /dev/null
+++ b/deps/lightening/tests/z_bp.c
@@ -0,0 +1,61 @@
+#include "test.h"
+
+static size_t
+run_test(jit_state_t *_jit, uint8_t *arena_base, size_t arena_size)
+{
+ int32_t (*function)(int32_t);
+
+ jit_begin(_jit, arena_base, arena_size);
+
+ void *entry = jit_address(_jit);
+ size_t frame = jit_enter_jit_abi(_jit, 3, 0, 0);
+
+ jit_load_args_1(_jit, jit_operand_gpr(JIT_OPERAND_ABI_INT32, JIT_R0));
+
+ jit_reloc_t out1 = jit_beqi(_jit, JIT_R0, 0);
+ jit_movr(_jit, JIT_V0, JIT_R0);
+ jit_movi(_jit, JIT_R0, 1);
+
+ jit_reloc_t out2 = jit_blei_u(_jit, JIT_V0, 2);
+ jit_subi(_jit, JIT_V1, JIT_V0, 1);
+ jit_subi(_jit, JIT_V2, JIT_V0, 2);
+
+ jit_calli_1(_jit, entry, jit_operand_gpr(JIT_OPERAND_ABI_INT32, JIT_V1));
+ jit_retval(_jit, JIT_V1);
+
+ jit_calli_1(_jit, entry, jit_operand_gpr(JIT_OPERAND_ABI_INT32, JIT_V2));
+ jit_retval(_jit, JIT_R0);
+
+ jit_addr(_jit, JIT_R0, JIT_R0, JIT_V1);
+
+ jit_patch_here(_jit, out1);
+ jit_patch_here(_jit, out2);
+ jit_leave_jit_abi(_jit, 3, 0, frame);
+ jit_retr(_jit, JIT_R0);
+
+ size_t size = 0;
+ function = jit_end(_jit, &size);
+
+ if (function) {
+ ASSERT((*function)(1) == 1);
+ ASSERT((*function)(2) == 1);
+ ASSERT((*function)(3) == 2);
+ ASSERT((*function)(4) == 3);
+ ASSERT((*function)(5) == 5);
+ ASSERT((*function)(6) == 8);
+ ASSERT((*function)(7) == 13);
+ ASSERT((*function)(8) == 21);
+ ASSERT((*function)(9) == 34);
+
+ ASSERT((*function)(32) == 2178309);
+ } else {
+ return size;
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return main_compiler(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/z_branch.c b/deps/lightening/tests/z_branch.c
new file mode 100644
index 0000000..30f0ea5
--- /dev/null
+++ b/deps/lightening/tests/z_branch.c
@@ -0,0 +1,584 @@
+#include "test.h"
+
+#if __WORDSIZE == 64
+# define I7f 0x7fffffffffffffff
+# define I80 0x8000000000000000
+# define I81 0x8000000000000001
+# define Iff 0xffffffffffffffff
+#else
+# define I7f 0x7fffffff
+# define I80 0x80000000
+# define I81 0x80000001
+# define Iff 0xffffffff
+#endif
+
+#define NaN (0.0 / 0.0)
+
+#if defined(DEBUG)
+#define dump_args(comp, r0, r1)\
+ jit_calli_1(_jit, puts,\
+ jit_operand_imm(JIT_OPERAND_ABI_POINTER,\
+ (jit_imm_t)#comp " " #r0 " " #r1));
+#else
+#define dump_args(comp, r0, r1)
+#endif
+
+#define BOP(N, Ls, Rs, Lu, Ru, R0, R1) \
+{ \
+ dump_args(N##r, Ls, Rs); \
+ jit_movi(_jit, R0, Ls); \
+ jit_movi(_jit, R1, Rs); \
+ jit_reloc_t r = jit_b##N##r(_jit, R0, R1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##i, Ls, Rs); \
+ jit_movi(_jit, R0, Ls); \
+ jit_reloc_t r = jit_b##N##i(_jit, R0, Rs); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##r_u, Lu, Ru); \
+ jit_movi(_jit, R0, Lu); \
+ jit_movi(_jit, R1, Ru); \
+ jit_reloc_t r = jit_b##N##r_u(_jit, R0, R1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##i_u, Lu, Ru); \
+ jit_movi(_jit, R0, Lu); \
+ jit_reloc_t r = jit_b##N##i_u(_jit, R0, Ru); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+}
+
+#define EB(N, L, R, R0, R1) \
+{ \
+ dump_args(N##r, L, R); \
+ jit_movi(_jit, R0, L); \
+ jit_movi(_jit, R1, R); \
+ jit_reloc_t r = jit_b##N##r(_jit, R0, R1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##i, L, R); \
+ jit_movi(_jit, R0, L); \
+ jit_reloc_t r = jit_b##N##i(_jit, R0, R); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+
+#define XEB(N, L, R, R0, R1) \
+{ \
+ dump_args(N##r, L, R); \
+ jit_movi(_jit, R0, L); \
+ jit_movi(_jit, R1, R); \
+ jit_reloc_t r = jit_b##N##r(_jit, R0, R1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##i, L, R); \
+ jit_movi(_jit, R0, L); \
+ jit_reloc_t r = jit_b##N##i(_jit, R0, R); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+}
+
+#define XBOP(N, Ls, Rs, Lu, Ru, R0, R1) \
+{ \
+ dump_args(N##r, Ls, Rs); \
+ jit_movi(_jit, R0, Ls); \
+ jit_movi(_jit, R1, Rs); \
+ jit_reloc_t r = jit_b##N##r(_jit, R0, R1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##i, Ls, Rs); \
+ jit_movi(_jit, R0, Ls); \
+ jit_reloc_t r = jit_b##N##i(_jit, R0, Rs); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##r_u, Lu, Ru); \
+ jit_movi(_jit, R0, Lu); \
+ jit_movi(_jit, R1, Ru); \
+ jit_reloc_t r = jit_b##N##r_u(_jit, R0, R1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##i_u, Lu, Ru); \
+ jit_movi(_jit, R0, Lu); \
+ jit_reloc_t r = jit_b##N##i_u(_jit, R0, Ru); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+}
+
+#define BOPI(N, Ls, Rs, Lu, Ru) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_V1) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_V2) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_R0) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_R1) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_R2) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_V0) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_V2) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_R0) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_R1) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_R2) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_V0) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_V1) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_R0) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_R1) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_R2) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_V0) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_V1) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_V2) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_R1) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_R2) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_V0) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_V1) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_V2) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_R0) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_R2) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_V0) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_V1) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_V2) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_R0) \
+ BOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_R1)
+
+#define EBI(N, L, R) \
+ EB(N, L, R, JIT_V0, JIT_V1) \
+ EB(N, L, R, JIT_V0, JIT_V2) \
+ EB(N, L, R, JIT_V0, JIT_R0) \
+ EB(N, L, R, JIT_V0, JIT_R1) \
+ EB(N, L, R, JIT_V0, JIT_R2) \
+ EB(N, L, R, JIT_V1, JIT_V0) \
+ EB(N, L, R, JIT_V1, JIT_V2) \
+ EB(N, L, R, JIT_V1, JIT_R0) \
+ EB(N, L, R, JIT_V1, JIT_R1) \
+ EB(N, L, R, JIT_V1, JIT_R2) \
+ EB(N, L, R, JIT_V2, JIT_V0) \
+ EB(N, L, R, JIT_V2, JIT_V1) \
+ EB(N, L, R, JIT_V2, JIT_R0) \
+ EB(N, L, R, JIT_V2, JIT_R1) \
+ EB(N, L, R, JIT_V2, JIT_R2) \
+ EB(N, L, R, JIT_R0, JIT_V0) \
+ EB(N, L, R, JIT_R0, JIT_V1) \
+ EB(N, L, R, JIT_R0, JIT_V2) \
+ EB(N, L, R, JIT_R0, JIT_R1) \
+ EB(N, L, R, JIT_R0, JIT_R2) \
+ EB(N, L, R, JIT_R1, JIT_V0) \
+ EB(N, L, R, JIT_R1, JIT_V1) \
+ EB(N, L, R, JIT_R1, JIT_V2) \
+ EB(N, L, R, JIT_R1, JIT_R0) \
+ EB(N, L, R, JIT_R1, JIT_R2) \
+ EB(N, L, R, JIT_R2, JIT_V0) \
+ EB(N, L, R, JIT_R2, JIT_V1) \
+ EB(N, L, R, JIT_R2, JIT_V2) \
+ EB(N, L, R, JIT_R2, JIT_R0) \
+ EB(N, L, R, JIT_R2, JIT_R1)
+
+
+#define XEBI(N, L, R) \
+ XEB(N, L, R, JIT_V0, JIT_V1) \
+ XEB(N, L, R, JIT_V0, JIT_V2) \
+ XEB(N, L, R, JIT_V0, JIT_R0) \
+ XEB(N, L, R, JIT_V0, JIT_R1) \
+ XEB(N, L, R, JIT_V0, JIT_R2) \
+ XEB(N, L, R, JIT_V1, JIT_V0) \
+ XEB(N, L, R, JIT_V1, JIT_V2) \
+ XEB(N, L, R, JIT_V1, JIT_R0) \
+ XEB(N, L, R, JIT_V1, JIT_R1) \
+ XEB(N, L, R, JIT_V1, JIT_R2) \
+ XEB(N, L, R, JIT_V2, JIT_V0) \
+ XEB(N, L, R, JIT_V2, JIT_V1) \
+ XEB(N, L, R, JIT_V2, JIT_R0) \
+ XEB(N, L, R, JIT_V2, JIT_R1) \
+ XEB(N, L, R, JIT_V2, JIT_R2) \
+ XEB(N, L, R, JIT_R0, JIT_V0) \
+ XEB(N, L, R, JIT_R0, JIT_V1) \
+ XEB(N, L, R, JIT_R0, JIT_V2) \
+ XEB(N, L, R, JIT_R0, JIT_R1) \
+ XEB(N, L, R, JIT_R0, JIT_R2) \
+ XEB(N, L, R, JIT_R1, JIT_V0) \
+ XEB(N, L, R, JIT_R1, JIT_V1) \
+ XEB(N, L, R, JIT_R1, JIT_V2) \
+ XEB(N, L, R, JIT_R1, JIT_R0) \
+ XEB(N, L, R, JIT_R1, JIT_R2) \
+ XEB(N, L, R, JIT_R2, JIT_V0) \
+ XEB(N, L, R, JIT_R2, JIT_V1) \
+ XEB(N, L, R, JIT_R2, JIT_V2) \
+ XEB(N, L, R, JIT_R2, JIT_R0) \
+ XEB(N, L, R, JIT_R2, JIT_R1)
+
+#define XBOPI(N, Ls, Rs, Lu, Ru) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_V1) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_V2) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_R0) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_R1) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_R2) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_V0) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_V2) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_R0) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_R1) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_R2) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_V0) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_V1) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_R0) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_R1) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_R2) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_V0) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_V1) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_V2) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_R1) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_R2) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_V0) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_V1) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_V2) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_R0) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_R2) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_V0) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_V1) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_V2) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_R0) \
+ XBOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_R1)
+
+#define TBOPF(N, T, L, R) \
+{ \
+ dump_args(N##r##_##T, L, R); \
+ jit_movi_##T(_jit, JIT_F0, L); \
+ jit_movi_##T(_jit, JIT_F1, R); \
+ jit_reloc_t r = \
+ jit_b##N##r##_##T(_jit, JIT_F0, JIT_F1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##i##_##T, L, R); \
+ jit_movi_##T(_jit, JIT_F0, L); \
+ jit_movi_##T(_jit, JIT_F1, R); \
+ jit_reloc_t r = \
+ jit_b##N##r##_##T(_jit, JIT_F0, JIT_F1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##r##_##T, L, NaN); \
+ jit_movi_##T(_jit, JIT_F0, L); \
+ jit_movi_##T(_jit, JIT_F1, NaN); \
+ jit_reloc_t err = \
+ jit_b##N##r##_##T(_jit, JIT_F0, JIT_F1); \
+ jit_reloc_t ok = jit_jmp(_jit); \
+ jit_patch_here(_jit, err); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, ok); \
+} \
+{ \
+ dump_args(N##i##_##T, L, NaN); \
+ jit_movi_##T(_jit, JIT_F0, L); \
+ jit_movi_##T(_jit, JIT_F1, NaN); \
+ jit_reloc_t err = \
+ jit_b##N##r##_##T(_jit, JIT_F0, JIT_F1); \
+ jit_reloc_t ok = jit_jmp(_jit); \
+ jit_patch_here(_jit, err); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, ok); \
+}
+
+#define BOPF(N, L, R) \
+ TBOPF(N, f, L, R) \
+ TBOPF(N, d, L, R)
+
+#define TUBOPF(N, T, L, R) \
+{ \
+ dump_args(N##r##_##T, L, R); \
+ jit_movi_##T(_jit, JIT_F0, L); \
+ jit_movi_##T(_jit, JIT_F1, R); \
+ jit_reloc_t r = \
+ jit_b##N##r##_##T(_jit, JIT_F0, JIT_F1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##i##_##T, L, R); \
+ jit_movi_##T(_jit, JIT_F0, L); \
+ jit_movi_##T(_jit, JIT_F1, R); \
+ jit_reloc_t r = \
+ jit_b##N##r##_##T(_jit, JIT_F0, JIT_F1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##r##_##T, L, NaN); \
+ jit_movi_##T(_jit, JIT_F0, L); \
+ jit_movi_##T(_jit, JIT_F1, NaN); \
+ jit_reloc_t r = \
+ jit_b##N##r##_##T(_jit, JIT_F0, JIT_F1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##i##_##T, L, NaN); \
+ jit_movi_##T(_jit, JIT_F0, L); \
+ jit_movi_##T(_jit, JIT_F1, NaN); \
+ jit_reloc_t r = \
+ jit_b##N##r##_##T(_jit, JIT_F0, JIT_F1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+}
+
+#define UBOPF(N, L, R) \
+ TUBOPF(N, f, L, R) \
+ TUBOPF(N, d, L, R)
+
+#define ARGB(N, L, R) \
+{ \
+ dump_args(N##r, L, R); \
+ jit_movi(_jit, JIT_R0, L); \
+ jit_movi(_jit, JIT_R1, R); \
+ jit_reloc_t r = \
+ jit_b##N##r(_jit, JIT_R0, JIT_R1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##i, L, R); \
+ jit_movi(_jit, JIT_R0, L); \
+ jit_reloc_t r = jit_b##N##i(_jit, JIT_R0, R); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##r_u, R, L); \
+ jit_movi(_jit, JIT_R0, R); \
+ jit_movi(_jit, JIT_R1, L); \
+ jit_reloc_t r = \
+ jit_b##N##r_u(_jit, JIT_R0, JIT_R1);\
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##i_u, R, L); \
+ jit_movi(_jit, JIT_R0, R); \
+ jit_reloc_t r = jit_b##N##i_u(_jit, JIT_R0, L); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+}
+
+#define ARGBS() \
+ ARGB(lt, -1, 1) \
+ ARGB(le, -1, -1) \
+ ARGB(ge, -1, -1) \
+ ARGB(gt, 1, -1) \
+
+#define OVFGB(N, L, R, U, D) \
+{ \
+ dump_args(N##r, L, ); \
+ jit_movi(_jit, JIT_R0, L); \
+ jit_movi(_jit, JIT_R1, 1); \
+ jit_reloc_t r = \
+ jit_b##N##r(_jit, JIT_R0, JIT_R1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##r_u, R, ); \
+ jit_movi(_jit, JIT_R0, R); \
+ jit_movi(_jit, JIT_R1, 1); \
+ jit_reloc_t r = \
+ jit_b##N##r_u(_jit, JIT_R0, JIT_R1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##i, U, ); \
+ jit_movi(_jit, JIT_R0, U); \
+ jit_reloc_t r = \
+ jit_b##N##i(_jit, JIT_R0, 1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##i_u, D, ); \
+ jit_movi(_jit, JIT_R0, D); \
+ jit_reloc_t r = \
+ jit_b##N##i_u(_jit, JIT_R0, 1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+}
+
+#define OVFGBS() \
+ OVFGB(oadd, I7f, Iff, I7f, Iff) \
+ OVFGB(xadd, I80, I7f, I80, I7f) \
+ OVFGB(osub, I80, 0x0, I80, 0x0) \
+ OVFGB(xsub, I81, I80, I81, I80)
+
+#define MGB(N, L, R) \
+{ \
+ dump_args(N##r, L, R); \
+ jit_movi(_jit, JIT_R0, L); \
+ jit_movi(_jit, JIT_R1, R); \
+ jit_reloc_t r = \
+ jit_b##N##r(_jit, JIT_R0, JIT_R1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##i, L, R); \
+ jit_movi(_jit, JIT_R0, L); \
+ jit_reloc_t r = jit_b##N##i(_jit, JIT_R0, R); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+}
+
+#define MBGS() \
+ MGB(ms, 1, 3) \
+ MGB(mc, 1, 2) \
+ MGB(ne, -3, 3) \
+ MGB(eq, 3, 3)
+
+#define ARFGB(N, L, R) \
+{ \
+ dump_args(N##r_f, L, R); \
+ jit_movi_f(_jit, JIT_F0, L); \
+ jit_movi_f(_jit, JIT_F1, R); \
+ jit_reloc_t r = \
+ jit_b##N##r_f(_jit, JIT_F0, JIT_F1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+} \
+{ \
+ dump_args(N##i_f, L, R); \
+ jit_movi_f(_jit, JIT_F0, L); \
+ jit_movi_f(_jit, JIT_F1, R); \
+ jit_reloc_t r = \
+ jit_b##N##r_f(_jit, JIT_F0, JIT_F1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+}
+
+#define ARFGBS() \
+ ARFGB(lt, 1, 2) \
+ ARFGB(le, -1, -1) \
+ ARFGB(ge, -3, -3) \
+ ARFGB(gt, 2, 1) \
+ ARFGB(eq, -2, -2) \
+ ARFGB(ne, 0, 2)
+
+static size_t
+run_test(jit_state_t *_jit, uint8_t *arena_base, size_t arena_size)
+{
+ void (*function)();
+ jit_begin(_jit, arena_base, arena_size);
+ size_t frame = jit_enter_jit_abi(_jit, 3, 0, 0);
+
+ ARGBS();
+ MBGS();
+ OVFGBS();
+ ARFGBS();
+
+ BOPI(lt, -1, 1, 1, -1)
+ BOPI(le, -1, -1, 1, 1)
+ EBI(eq, 32, 32)
+ BOPI(ge, -2, -2, 2, 2)
+ BOPI(gt, 2, -2, -2, 2)
+ EBI(ne, 3, -3)
+ XEBI(ms, 1, 3)
+ XEBI(mc, 1, 2)
+ XBOPI(oadd, I7f, 1, Iff, 1)
+ XBOPI(xadd, I80, 1, I7f, 1)
+ XBOPI(osub, I80, 1, 0, 1)
+ XBOPI(xsub, I81, 1, I80, 1)
+ BOPF(lt, 1, 2)
+ BOPF(le, 2, 2)
+ BOPF(eq, 3, 3)
+ BOPF(ge, 3, 3)
+ BOPF(gt, 4, 3)
+ UBOPF(ne, 4, 3)
+ UBOPF(unlt, 1, 2)
+ UBOPF(unle, 2, 2)
+ UBOPF(uneq, 3, 3)
+ UBOPF(unge, 3, 3)
+ UBOPF(ungt, 4, 3)
+ BOPF(ltgt, 4, 3)
+
+ {
+ dump_args(ordr, 5, 5);
+ jit_movi_f(_jit, JIT_F0, 5);
+ jit_movi_f(_jit, JIT_F1, 5);
+ jit_reloc_t r = jit_bordr_f(_jit, JIT_F0, JIT_F1);
+ jit_calli_0(_jit, abort);
+ jit_patch_here(_jit, r);
+ }
+ {
+ dump_args(ordr, 5, 1);
+ jit_movi_f(_jit, JIT_F0, 5);
+ jit_movi_f(_jit, JIT_F1, 1);
+ jit_reloc_t r = jit_bordr_f(_jit, JIT_F0, JIT_F1);
+ jit_calli_0(_jit, abort);
+ jit_patch_here(_jit, r);
+ }
+ {
+ dump_args(ordr, 5, NaN);
+ jit_movi_f(_jit, JIT_F0, 5);
+ jit_movi_f(_jit, JIT_F1, NaN);
+ jit_reloc_t err = jit_bordr_f(_jit, JIT_F0, JIT_F1);
+ jit_reloc_t ok = jit_jmp(_jit);
+ jit_patch_here(_jit, err);
+ jit_calli_0(_jit, abort);
+ jit_patch_here(_jit, ok);
+ }
+ {
+ dump_args(unordr, 5, 5);
+ jit_movi_f(_jit, JIT_F0, 5);
+ jit_movi_f(_jit, JIT_F1, 5);
+ jit_reloc_t err = jit_bunordr_f(_jit, JIT_F0, JIT_F1);
+ jit_reloc_t ok = jit_jmp(_jit);
+ jit_patch_here(_jit, err);
+ jit_calli_0(_jit, abort);
+ jit_patch_here(_jit, ok);
+ }
+ {
+ dump_args(unordr, 5, 1);
+ jit_movi_f(_jit, JIT_F0, 5);
+ jit_movi_f(_jit, JIT_F1, 1);
+ jit_reloc_t err = jit_bunordr_f(_jit, JIT_F0, JIT_F1);
+ jit_reloc_t ok = jit_jmp(_jit);
+ jit_patch_here(_jit, err);
+ jit_calli_0(_jit, abort);
+ jit_patch_here(_jit, ok);
+ }
+ {
+ dump_args(unordr, 5, NaN);
+ jit_movi_f(_jit, JIT_F0, 5);
+ jit_movi_f(_jit, JIT_F1, NaN);
+ jit_reloc_t r = jit_bunordr_f(_jit, JIT_F0, JIT_F1);
+ jit_calli_0(_jit, abort);
+ jit_patch_here(_jit, r);
+ }
+
+ jit_leave_jit_abi(_jit, 3, 0, frame);
+ jit_ret(_jit);
+
+ size_t size;
+ function = jit_end(_jit, &size);
+
+ if (function)
+ (*function)();
+ else
+ return size;
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return main_compiler(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/z_call.c b/deps/lightening/tests/z_call.c
new file mode 100644
index 0000000..be1c072
--- /dev/null
+++ b/deps/lightening/tests/z_call.c
@@ -0,0 +1,307 @@
+#include "test.h"
+
+#define operand_c JIT_OPERAND_ABI_INT8
+#define operand_s JIT_OPERAND_ABI_INT16
+#define operand_i JIT_OPERAND_ABI_INT32
+#define operand_uc JIT_OPERAND_ABI_UINT8
+#define operand_us JIT_OPERAND_ABI_UINT16
+#define operand_ui JIT_OPERAND_ABI_UINT32
+#define operand_l JIT_OPERAND_ABI_INT64
+#define operand_f JIT_OPERAND_ABI_FLOAT
+#define operand_d JIT_OPERAND_ABI_DOUBLE
+
+#define def_wi(i) \
+ void *_w##i = jit_address(_jit); \
+{ \
+ size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); \
+ jit_load_args_1(_jit, jit_operand_gpr(operand##i, JIT_R0)); \
+ jit_leave_jit_abi(_jit, 0, 0, frame); \
+ jit_retr(_jit, JIT_R0); \
+}
+
+#define def_wf(f) \
+ void *_w##f = jit_address(_jit); \
+{ \
+ size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); \
+ jit_load_args_1(_jit, jit_operand_fpr(operand##f, JIT_F0)); \
+ jit_truncr##f(_jit, JIT_R0, JIT_F0); \
+ jit_leave_jit_abi(_jit, 0, 0, frame); \
+ jit_retr(_jit, JIT_R0); \
+}
+
+#define def_fi(f, i) \
+ void *f##i = jit_address(_jit); \
+{ \
+ size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); \
+ jit_load_args_1(_jit, jit_operand_gpr(operand##i, JIT_R0)); \
+ jit_extr##f(_jit, JIT_F0, JIT_R0); \
+ jit_leave_jit_abi(_jit, 0, 0, frame); \
+ jit_retr##f(_jit, JIT_F0); \
+}
+
+#define def_f(f) \
+ void *f##f = jit_address(_jit); \
+{ \
+ size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); \
+ jit_load_args_1(_jit, jit_operand_fpr(operand##f, JIT_F0)); \
+ jit_leave_jit_abi(_jit, 0, 0, frame); \
+ jit_retr##f(_jit, JIT_F0); \
+}
+
+#define def_ff(f, g) \
+ void *f##g = jit_address(_jit); \
+{ \
+ size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); \
+ jit_load_args_1(_jit, jit_operand_fpr(operand##f, JIT_F0)); \
+ jit_extr##f##g(_jit, JIT_F0, JIT_F0); \
+ jit_leave_jit_abi(_jit, 0, 0, frame); \
+ jit_retr##g(_jit, JIT_F0); \
+}
+
+#if defined(DEBUG)
+#define dump_args(n, f, i, a, r) \
+ jit_calli_1(_jit, puts, \
+ jit_operand_imm(JIT_OPERAND_ABI_POINTER, \
+ (jit_imm_t)#n " " #f " " #i " " #a " " #r))
+#else
+#define dump_args(n, f, i, a, r)
+#endif
+
+#define _call_w(n, i, a, r) \
+{ \
+ dump_args(n, , i, a, r); \
+ jit_calli_1(_jit, _w##i, jit_operand_imm(operand##i, a)); \
+ jit_retval(_jit, JIT_R0); \
+ jit_extr##i(_jit, JIT_R0, JIT_R0); \
+ jit_reloc_t ok = jit_beqi(_jit, JIT_R0, r); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, ok); \
+}
+#define call_w(n, i, a, r) _call_w(n, i, a, r)
+
+#define _call_wf(n, f, a, r) \
+{ \
+ dump_args(n, f, , a, r); \
+ jit_movi##f(_jit, JIT_F0, (long long)a); \
+ jit_calli_1(_jit, _w##f, jit_operand_fpr(operand##f, JIT_F0)); \
+ jit_retval(_jit, JIT_R0); \
+ jit_extr##f(_jit, JIT_F0, JIT_R0); \
+ jit_movi##f(_jit, JIT_F1, r); \
+ jit_reloc_t ok = jit_beqr##f(_jit, JIT_F0, JIT_F1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, ok); \
+}
+#define call_wf(n, f, a, r) _call_wf(n, f, a, r)
+
+#define _call_fi(n, f, i, a, r) \
+{ \
+ dump_args(n, f, i, a, r); \
+ jit_calli_1(_jit, f##i, jit_operand_imm(operand##i, a)); \
+ jit_retval##f(_jit, JIT_F0); \
+ jit_movi##f(_jit, JIT_F1, r); \
+ jit_reloc_t ok = jit_beqr##f(_jit, JIT_F0, JIT_F1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, ok); \
+}
+#define call_fi(n, f, i, a, r) _call_fi(n, f, i, a, r)
+
+#define _call_f(n, f, a, r) \
+{ \
+ dump_args(n, f, , a, r); \
+ jit_movi##f(_jit, JIT_F0, a); \
+ jit_calli_1(_jit, f##f, jit_operand_fpr(operand##f, JIT_F0)); \
+ jit_retval##f(_jit, JIT_F0); \
+ jit_movi##f(_jit, JIT_F1, r); \
+ jit_reloc_t ok = jit_beqr##f(_jit, JIT_F0, JIT_F1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, ok); \
+}
+#define call_f(n, f, a, r) _call_f(n, f, a, r)
+
+#define _call_ff(n, f, g, a, r) \
+{ \
+ dump_args(n, f, g, a, r); \
+ jit_movi##f(_jit, JIT_F0, a); \
+ jit_calli_1(_jit, f##g, jit_operand_fpr(operand##f, JIT_F0)); \
+ jit_retval##g(_jit, JIT_F0); \
+ jit_movi##g(_jit, JIT_F1, r); \
+ jit_reloc_t ok = jit_beqr##g(_jit, JIT_F0, JIT_F1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, ok); \
+}
+#define call_ff(n, f, g, a, r) _call_ff(n, f, g, a, r)
+
+#define c7f (int8_t)0x7f
+#define c80 (int8_t)0x80
+#define c81 (int8_t)0x81
+#define cff (int8_t)0xff
+#define s7f (int16_t)0x7fff
+#define s80 (int16_t)0x8000
+#define s81 (int16_t)0x8001
+#define i7f (int32_t)0x7fffffff
+#define i80 (int32_t)0x80000000
+#define i81 (int32_t)0x80000001
+#define iff (int32_t)0xffffffff
+#define l7f (int64_t)0x7fffffffffffffff
+#define l80 (int64_t)0x8000000000000000
+#define l81 (int64_t)0x8000000000000001
+
+#define uc7f (uint8_t)0x7f
+#define uc80 (uint8_t)0x80
+#define uc81 (uint8_t)0x81
+#define ucff (uint8_t)0xff
+#define us7f (uint16_t)0x7fff
+#define us80 (uint16_t)0x8000
+#define us81 (uint16_t)0x8001
+#define ui7f (uint32_t)0x7fffffff
+#define ui80 (uint32_t)0x80000000
+#define ui81 (uint32_t)0x80000001
+#define uiff (uint32_t)0xffffffff
+#define ul7f (uint64_t)0x7fffffffffffffff
+#define ul80 (uint64_t)0x8000000000000000
+#define ul81 (uint64_t)0x8000000000000001
+
+#define f7f 127.0
+#define f80 -128.0
+#define f81 -127.0
+#define uf80 128.0
+#define uf81 127.0
+
+#if __WORDSIZE == 32
+# define wc80 (long)0xffffff80
+# define wc81 (long)0xffffff81
+# define ws80 (long)0xffff8000
+# define ws81 (long)0xffff8001
+#else
+# define wc80 (long)0xffffffffffffff80
+# define wc81 (long)0xffffffffffffff81
+# define ws80 (long)0xffffffffffff8000
+# define ws81 (long)0xffffffffffff8001
+# define wi80 (long)0xffffffff80000000
+# define wi81 (long)0xffffffff80000001
+#endif
+
+static size_t
+run_test(jit_state_t *_jit, uint8_t *arena_base, size_t arena_size)
+{
+ void (*function)();
+ jit_begin(_jit, arena_base, arena_size);
+ jit_reloc_t main = jit_jmp(_jit);
+
+ def_wi(_c)
+ def_wi(_uc)
+ def_wi(_s)
+ def_wi(_us)
+#if __WORDSIZE == 64
+ def_wi(_i)
+ def_wi(_ui)
+#endif
+ def_wf(_f)
+ def_wf(_d)
+ def_fi(_f, _c)
+ def_fi(_f, _uc)
+ def_fi(_f, _s)
+ def_fi(_f, _us)
+ def_fi(_f, _i)
+#if __WORDSIZE == 64
+ def_fi(_f, _ui)
+ def_fi(_f, _l)
+#endif
+ def_fi(_d, _c)
+ def_fi(_d, _uc)
+ def_fi(_d, _s)
+ def_fi(_d, _us)
+ def_fi(_d, _i)
+#if __WORDSIZE == 64
+ def_fi(_d, _ui)
+ def_fi(_d, _l)
+#endif
+ def_f(_f)
+ def_f(_d)
+ def_ff(_f, _d)
+ def_ff(_d, _f)
+
+ jit_patch_here(_jit, main);
+ size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0);
+
+ call_w(__LINE__, _c, c7f, c7f)
+ call_w(__LINE__, _c, c80, wc80)
+ call_w(__LINE__, _c, c81, wc81)
+ call_w(__LINE__, _uc, uc7f, c7f)
+ call_w(__LINE__, _uc, uc80, uc80)
+ call_w(__LINE__, _uc, uc81, uc81)
+ call_w(__LINE__, _s, s7f, s7f)
+ call_w(__LINE__, _s, s80, ws80)
+ call_w(__LINE__, _s, s81, ws81)
+ call_w(__LINE__, _us, us7f, us7f)
+ call_w(__LINE__, _us, us80, us80)
+ call_w(__LINE__, _us, us81, us81)
+#if __WORDSIZE == 64
+ call_w(__LINE__, _i, i7f, i7f)
+ call_w(__LINE__, _i, i80, wi80)
+ call_w(__LINE__, _i, i81, wi81)
+ call_w(__LINE__, _ui, ui7f, ui7f)
+ call_w(__LINE__, _ui, ui80, ui80)
+ call_w(__LINE__, _ui, ui81, ui81)
+#endif
+ call_wf(__LINE__, _f, c7f, f7f)
+ call_wf(__LINE__, _f, wc80, f80)
+ call_wf(__LINE__, _f, wc81, f81)
+ call_wf(__LINE__, _d, c7f, f7f)
+ call_wf(__LINE__, _d, wc80, f80)
+ call_wf(__LINE__, _d, wc81, f81)
+ call_fi(__LINE__, _f, _c, c7f, f7f)
+ call_fi(__LINE__, _f, _c, c80, f80)
+ call_fi(__LINE__, _f, _uc, uc7f, f7f)
+ call_fi(__LINE__, _f, _uc, uc80, uf80)
+ call_fi(__LINE__, _f, _s, c7f, f7f)
+ call_fi(__LINE__, _f, _s, uc80, uf80)
+ call_fi(__LINE__, _f, _us, uc7f, f7f)
+ call_fi(__LINE__, _f, _us, uc80, uf80)
+ call_fi(__LINE__, _f, _i, c7f, f7f)
+ call_fi(__LINE__, _f, _i, uc80, uf80)
+#if __WORDSIZE == 64
+ call_fi(__LINE__, _f, _ui, uc7f, f7f)
+ call_fi(__LINE__, _f, _ui, uc80, uf80)
+ call_fi(__LINE__, _f, _l, c7f, f7f)
+ call_fi(__LINE__, _f, _l, uc80, uf80)
+#endif
+ call_fi(__LINE__, _d, _c, c7f, f7f)
+ call_fi(__LINE__, _d, _c, c80, f80)
+ call_fi(__LINE__, _d, _uc, uc7f, f7f)
+ call_fi(__LINE__, _d, _uc, uc80, uf80)
+ call_fi(__LINE__, _d, _s, c7f, f7f)
+ call_fi(__LINE__, _d, _s, uc80, uf80)
+ call_fi(__LINE__, _d, _us, uc7f, f7f)
+ call_fi(__LINE__, _d, _us, uc80, uf80)
+ call_fi(__LINE__, _d, _i, c7f, f7f)
+ call_fi(__LINE__, _d, _i, uc80, uf80)
+#if __WORDSIZE == 64
+ call_fi(__LINE__, _d, _ui, uc7f, f7f)
+ call_fi(__LINE__, _d, _ui, uc80, uf80)
+ call_fi(__LINE__, _d, _l, c7f, f7f)
+ call_fi(__LINE__, _d, _l, uc80, uf80)
+#endif
+ call_f(__LINE__, _f, f7f, f7f)
+ call_f(__LINE__, _d, f7f, f7f)
+ call_ff(__LINE__, _f, _d, f80, f80)
+ call_ff(__LINE__, _d, _f, f81, f81)
+
+ jit_leave_jit_abi(_jit, 0, 0, frame);
+ jit_ret(_jit);
+
+ size_t size = 0;
+ function = jit_end(_jit, &size);
+
+ if (function)
+ (*function)();
+ else
+ return size;
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return main_compiler(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/z_ccall.c b/deps/lightening/tests/z_ccall.c
new file mode 100644
index 0000000..ef2b17e
--- /dev/null
+++ b/deps/lightening/tests/z_ccall.c
@@ -0,0 +1,1000 @@
+#include "test.h"
+
+#define _QUOTE(x) #x
+#define QUOTE(x) _QUOTE(x)
+
+#if defined(DEBUG)
+#define dump_args(x) puts(x)
+#else
+#define dump_args(x)
+#endif
+
+#define _w0 0
+#define _w1 1
+#define _w2 (_w1-2)
+#define _w3 (_w2-3)
+#define _w4 (_w3-4)
+#define _w5 (_w4-5)
+#define _w6 (_w5-6)
+#define _w7 (_w6-7)
+#define _w8 (_w7-8)
+#define _w9 (_w8-9)
+#define _w10 (_w9-10)
+#define _w11 (_w10-11)
+#define _w12 (_w11-12)
+#define _w13 (_w12-13)
+#define _w14 (_w13-14)
+#define _w15 (_w14-15)
+#define _c0 _w0
+#define _c1 _w1
+#define _c2 _w2
+#define _c3 _w3
+#define _c4 _w4
+#define _c5 _w5
+#define _c6 _w6
+#define _c7 _w7
+#define _c8 _w8
+#define _c9 _w9
+#define _c10 _w10
+#define _c11 _w11
+#define _c12 _w12
+#define _c13 _w13
+#define _c14 _w14
+#define _c15 _w15
+#define _uc0 (_w0&0xff)
+#define _uc1 (_w1&0xff)
+#define _uc2 (_w2&0xff)
+#define _uc3 (_w3&0xff)
+#define _uc4 (_w4&0xff)
+#define _uc5 (_w5&0xff)
+#define _uc6 (_w6&0xff)
+#define _uc7 (_w7&0xff)
+#define _uc8 (_w8&0xff)
+#define _uc9 (_w9&0xff)
+#define _uc10 (_w10&0xff)
+#define _uc11 (_w11&0xff)
+#define _uc12 (_w12&0xff)
+#define _uc13 (_w13&0xff)
+#define _uc14 (_w14&0xff)
+#define _uc15 (_w15&0xff)
+#define _s0 _w0
+#define _s1 _w1
+#define _s2 _w2
+#define _s3 _w3
+#define _s4 _w4
+#define _s5 _w5
+#define _s6 _w6
+#define _s7 _w7
+#define _s8 _w8
+#define _s9 _w9
+#define _s10 _w10
+#define _s11 _w11
+#define _s12 _w12
+#define _s13 _w13
+#define _s14 _w14
+#define _s15 _w15
+#define _us0 (_w0&0xffff)
+#define _us1 (_w1&0xffff)
+#define _us2 (_w2&0xffff)
+#define _us3 (_w3&0xffff)
+#define _us4 (_w4&0xffff)
+#define _us5 (_w5&0xffff)
+#define _us6 (_w6&0xffff)
+#define _us7 (_w7&0xffff)
+#define _us8 (_w8&0xffff)
+#define _us9 (_w9&0xffff)
+#define _us10 (_w10&0xffff)
+#define _us11 (_w11&0xffff)
+#define _us12 (_w12&0xffff)
+#define _us13 (_w13&0xffff)
+#define _us14 (_w14&0xffff)
+#define _us15 (_w15&0xffff)
+#define _i0 _w0
+#define _i1 _w1
+#define _i2 _w2
+#define _i3 _w3
+#define _i4 _w4
+#define _i5 _w5
+#define _i6 _w6
+#define _i7 _w7
+#define _i8 _w8
+#define _i9 _w9
+#define _i10 _w10
+#define _i11 _w11
+#define _i12 _w12
+#define _i13 _w13
+#define _i14 _w14
+#define _i15 _w15
+#if __WORDSIZE == 64
+# define _ui0 (_w0&0xffffffff)
+# define _ui1 (_w1&0xffffffff)
+# define _ui2 (_w2&0xffffffff)
+# define _ui3 (_w3&0xffffffff)
+# define _ui4 (_w4&0xffffffff)
+# define _ui5 (_w5&0xffffffff)
+# define _ui6 (_w6&0xffffffff)
+# define _ui7 (_w7&0xffffffff)
+# define _ui8 (_w8&0xffffffff)
+# define _ui9 (_w9&0xffffffff)
+# define _ui10 (_w10&0xffffffff)
+# define _ui11 (_w11&0xffffffff)
+# define _ui12 (_w12&0xffffffff)
+# define _ui13 (_w13&0xffffffff)
+# define _ui14 (_w14&0xffffffff)
+# define _ui15 (_w15&0xffffffff)
+# define _l0 _w0
+# define _l1 _w1
+# define _l2 _w2
+# define _l3 _w3
+# define _l4 _w4
+# define _l5 _w5
+# define _l6 _w6
+# define _l7 _w7
+# define _l8 _w8
+# define _l9 _w9
+# define _l10 _w10
+# define _l11 _w11
+# define _l12 _w12
+# define _l13 _w13
+# define _l14 _w14
+# define _l15 _w15
+#endif
+
+/*
+ * Types
+ */
+typedef signed char _c;
+typedef unsigned char _uc;
+typedef signed short _s;
+typedef unsigned short _us;
+typedef signed int _i;
+#if __WORDSIZE == 64
+typedef unsigned int _ui;
+typedef jit_word_t _l;
+#endif
+typedef float _f;
+typedef double _d;
+
+#define prt0(T) T C##T##0(void);
+#define prt1(T) prt0(T) \
+ T C##T##1(T);
+#define prt2(T) prt1(T) \
+ T C##T##2(T,T);
+#define prt3(T) prt2(T) \
+ T C##T##3(T,T,T);
+#define prt4(T) prt3(T) \
+ T C##T##4(T,T,T,T);
+#define prt5(T) prt4(T) \
+ T C##T##5(T,T,T,T,T);
+#define prt6(T) prt5(T) \
+ T C##T##6(T,T,T,T,T,T);
+#define prt7(T) prt6(T) \
+ T C##T##7(T,T,T,T,T,T,T);
+#define prt8(T) prt7(T) \
+ T C##T##8(T,T,T,T,T,T,T,T);
+#define prt9(T) prt8(T) \
+ T C##T##9(T,T,T,T,T,T,T,T,T);
+#define prt10(T) prt9(T) \
+ T C##T##10(T,T,T,T,T,T,T,T,T,T);
+#define prt11(T) prt10(T) \
+ T C##T##11(T,T,T,T,T,T,T,T,T,T,T);
+#define prt12(T) prt11(T) \
+ T C##T##12(T,T,T,T,T,T,T,T,T,T,T,T);
+#define prt13(T) prt12(T) \
+ T C##T##13(T,T,T,T,T,T,T,T,T,T,T,T,T);
+#define prt14(T) prt13(T) \
+ T C##T##14(T,T,T,T,T,T,T,T,T,T,T,T,T,T);
+#define prt15(T) prt14(T) \
+ T C##T##15(T,T,T,T,T,T,T,T,T,T,T,T,T,T,T);
+#define prt(T) prt15(T)
+prt(_c)
+prt(_uc)
+prt(_s)
+prt(_us)
+prt(_i)
+#if __WORDSIZE == 64
+prt(_ui)
+prt(_l)
+#endif
+prt(_f)
+prt(_d)
+#undef prt
+#undef prt15
+#undef prt14
+#undef prt13
+#undef prt12
+#undef prt11
+#undef prt10
+#undef prt9
+#undef prt8
+#undef prt7
+#undef prt6
+#undef prt5
+#undef prt4
+#undef prt3
+#undef prt2
+#undef prt1
+#undef prt0
+
+#define prtn(N,T) T J##T##n(void);
+#define prt0(T) prtn(0,T)
+#define prt1(T) prt0(T) prtn(1,T)
+#define prt2(T) prt1(T) prtn(2,T)
+#define prt3(T) prt2(T) prtn(3,T)
+#define prt4(T) prt3(T) prtn(4,T)
+#define prt5(T) prt4(T) prtn(5,T)
+#define prt6(T) prt5(T) prtn(6,T)
+#define prt7(T) prt6(T) prtn(7,T)
+#define prt8(T) prt7(T) prtn(8,T)
+#define prt9(T) prt8(T) prtn(9,T)
+#define prt10(T) prt9(T) prtn(10,T)
+#define prt11(T) prt10(T) prtn(11,T)
+#define prt12(T) prt11(T) prtn(12,T)
+#define prt13(T) prt12(T) prtn(13,T)
+#define prt14(T) prt13(T) prtn(14,T)
+#define prt15(T) prt14(T) prtn(15,T)
+#define prt(T) prt15(T)
+prt(_c)
+prt(_uc)
+prt(_s)
+prt(_us)
+prt(_i)
+#if __WORDSIZE == 64
+prt(_ui)
+prt(_l)
+#endif
+prt(_f)
+prt(_d)
+#undef prt
+#undef prt15
+#undef prt14
+#undef prt13
+#undef prt12
+#undef prt11
+#undef prt10
+#undef prt9
+#undef prt8
+#undef prt7
+#undef prt6
+#undef prt5
+#undef prt4
+#undef prt3
+#undef prt2
+#undef prt1
+#undef prt0
+#undef prtn
+
+/*
+ * Initialization
+ */
+
+#define dat0(T) T (*j##T##0)(void);
+
+#define dat1(T) dat0(T) \
+ T (*j##T##1)(T);
+
+#define dat2(T) dat1(T) \
+ T (*j##T##2)(T,T);
+
+#define dat3(T) dat2(T) \
+ T (*j##T##3)(T,T,T);
+
+#define dat4(T) dat3(T) \
+ T (*j##T##4)(T,T,T,T);
+
+#define dat5(T) dat4(T) \
+ T (*j##T##5)(T,T,T,T,T);
+
+#define dat6(T) dat5(T) \
+ T (*j##T##6)(T,T,T,T,T,T);
+
+#define dat7(T) dat6(T) \
+ T (*j##T##7)(T,T,T,T,T,T,T);
+
+#define dat8(T) dat7(T) \
+ T (*j##T##8)(T,T,T,T,T,T,T,T);
+
+#define dat9(T) dat8(T) \
+ T (*j##T##9)(T,T,T,T,T,T,T,T,T);
+
+#define dat10(T) dat9(T) \
+ T (*j##T##10)(T,T,T,T,T,T,T,T,T,T);
+
+#define dat11(T) dat10(T) \
+ T (*j##T##11)(T,T,T,T,T,T,T,T,T,T,T);
+
+#define dat12(T) dat11(T) \
+ T (*j##T##12)(T,T,T,T,T,T,T,T,T,T,T,T);
+
+#define dat13(T) dat12(T) \
+ T (*j##T##13)(T,T,T,T,T,T,T,T,T,T,T,T,T);
+
+#define dat14(T) dat13(T) \
+ T (*j##T##14)(T,T,T,T,T,T,T,T,T,T,T,T,T,T);
+
+#define dat15(T) dat14(T) \
+ T (*j##T##15)(T,T,T,T,T,T,T,T,T,T,T,T,T,T,T);
+
+#define dat(T) dat15(T)
+dat(_c)
+dat(_uc)
+dat(_s)
+dat(_us)
+dat(_i)
+#if __WORDSIZE == 64
+dat(_ui)
+dat(_l)
+#endif
+dat(_f)
+dat(_d)
+#undef dat
+#undef dat15
+#undef dat14
+#undef dat13
+#undef dat12
+#undef dat11
+#undef dat10
+#undef dat9
+#undef dat8
+#undef dat7
+#undef dat6
+#undef dat5
+#undef dat4
+#undef dat3
+#undef dat2
+#undef dat1
+#undef dat0
+
+/*
+ * Implementation
+ */
+#define dcl0(T) \
+T C##T##0(void) \
+{ \
+ dump_args(QUOTE(C##T##0));\
+ return (0); \
+}
+#define dcl1(T) \
+dcl0(T) \
+T C##T##1(T A) \
+{ \
+ dump_args(QUOTE(C##T##1));\
+ return (A); \
+}
+#define dcl2(T) \
+dcl1(T) \
+T C##T##2(T A,T B) \
+{ \
+ dump_args(QUOTE(C##T##2));\
+ return (A-B); \
+}
+#define dcl3(T) \
+dcl2(T) \
+T C##T##3(T A,T B,T C) \
+{ \
+ dump_args(QUOTE(C##T##3));\
+ return (A-B-C); \
+}
+#define dcl4(T) \
+dcl3(T) \
+T C##T##4(T A,T B,T C,T D) \
+{ \
+ dump_args(QUOTE(C##T##4));\
+ return (A-B-C-D); \
+}
+#define dcl5(T) \
+dcl4(T) \
+T C##T##5(T A,T B,T C,T D,T E) \
+{ \
+ dump_args(QUOTE(C##T##5));\
+ return (A-B-C-D-E); \
+}
+#define dcl6(T) \
+dcl5(T) \
+T C##T##6(T A,T B,T C,T D,T E,T F) \
+{ \
+ dump_args(QUOTE(C##T##6));\
+ return (A-B-C-D-E-F); \
+}
+#define dcl7(T) \
+dcl6(T) \
+T C##T##7(T A,T B,T C,T D,T E,T F,T G) \
+{ \
+ dump_args(QUOTE(C##T##7));\
+ return (A-B-C-D-E-F-G); \
+}
+#define dcl8(T) \
+dcl7(T) \
+T C##T##8(T A,T B,T C,T D,T E,T F,T G,T H) \
+{ \
+ dump_args(QUOTE(C##T##8));\
+ return (A-B-C-D-E-F-G-H); \
+}
+#define dcl9(T) \
+dcl8(T) \
+T C##T##9(T A,T B,T C,T D,T E,T F,T G,T H,T I) \
+{ \
+ dump_args(QUOTE(C##T##9));\
+ return (A-B-C-D-E-F-G-H-I); \
+}
+#define dcl10(T) \
+dcl9(T) \
+T C##T##10(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J) \
+{ \
+ dump_args(QUOTE(C##T##10));\
+ return (A-B-C-D-E-F-G-H-I-J); \
+}
+#define dcl11(T) \
+dcl10(T) \
+T C##T##11(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K) \
+{ \
+ dump_args(QUOTE(C##T##11));\
+ return (A-B-C-D-E-F-G-H-I-J-K); \
+}
+#define dcl12(T) \
+dcl11(T) \
+T C##T##12(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K,T L) \
+{ \
+ dump_args(QUOTE(C##T##12));\
+ return (A-B-C-D-E-F-G-H-I-J-K-L); \
+}
+#define dcl13(T) \
+dcl12(T) \
+T C##T##13(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K,T L,T M) \
+{ \
+ dump_args(QUOTE(C##T##13));\
+ return (A-B-C-D-E-F-G-H-I-J-K-L-M); \
+}
+#define dcl14(T) \
+dcl13(T) \
+T C##T##14(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K,T L,T M,T N) \
+{ \
+ dump_args(QUOTE(C##T##14));\
+ return (A-B-C-D-E-F-G-H-I-J-K-L-M-N); \
+}
+#define dcl15(T) \
+dcl14(T) \
+T C##T##15(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K,T L,T M,T N,T O) \
+{ \
+ dump_args(QUOTE(C##T##15));\
+ return (A-B-C-D-E-F-G-H-I-J-K-L-M-N-O); \
+}
+#define dcl(T) dcl15(T)
+dcl(_c)
+dcl(_uc)
+dcl(_s)
+dcl(_us)
+dcl(_i)
+#if __WORDSIZE == 64
+dcl(_ui)
+dcl(_l)
+#endif
+dcl(_f)
+dcl(_d)
+#undef dcl
+#undef dcl15
+#undef dcl14
+#undef dcl13
+#undef dcl12
+#undef dcl11
+#undef dcl10
+#undef dcl9
+#undef dcl8
+#undef dcl7
+#undef dcl6
+#undef dcl5
+#undef dcl4
+#undef dcl3
+#undef dcl2
+#undef dcl1
+#undef dcl0
+
+#define dcl0(T) \
+T CJ##T##0(void) \
+{ \
+ dump_args(QUOTE(CJ##T##0));\
+ return ((*j##T##0)()); \
+}
+#define dcl1(T) \
+dcl0(T) \
+T CJ##T##1(void) \
+{ \
+ dump_args(QUOTE(CJ##T##1));\
+ return ((*j##T##1)(1)); \
+}
+#define dcl2(T) \
+dcl1(T) \
+T CJ##T##2(void) \
+{ \
+ dump_args(QUOTE(CJ##T##2));\
+ return ((*j##T##2)(1,2)); \
+}
+#define dcl3(T) \
+dcl2(T) \
+T CJ##T##3(void) \
+{ \
+ dump_args(QUOTE(CJ##T##3));\
+ return ((*j##T##3)(1,2,3)); \
+}
+#define dcl4(T) \
+dcl3(T) \
+T CJ##T##4(void) \
+{ \
+ dump_args(QUOTE(CJ##T##4));\
+ return ((*j##T##4)(1,2,3,4)); \
+}
+#define dcl5(T) \
+dcl4(T) \
+T CJ##T##5(void) \
+{ \
+ dump_args(QUOTE(CJ##T##5));\
+ return ((*j##T##5)(1,2,3,4,5)); \
+}
+#define dcl6(T) \
+dcl5(T) \
+T CJ##T##6(void) \
+{ \
+ dump_args(QUOTE(CJ##T##6));\
+ return ((*j##T##6)(1,2,3,4,5,6)); \
+}
+#define dcl7(T) \
+dcl6(T) \
+T CJ##T##7(void) \
+{ \
+ dump_args(QUOTE(CJ##T##7));\
+ return ((*j##T##7)(1,2,3,4,5,6,7)); \
+}
+#define dcl8(T) \
+dcl7(T) \
+T CJ##T##8(void) \
+{ \
+ dump_args(QUOTE(CJ##T##8));\
+ return ((*j##T##8)(1,2,3,4,5,6,7,8)); \
+}
+#define dcl9(T) \
+dcl8(T) \
+T CJ##T##9(void) \
+{ \
+ dump_args(QUOTE(CJ##T##9));\
+ return ((*j##T##9)(1,2,3,4,5,6,7,8,9)); \
+}
+#define dcl10(T) \
+dcl9(T) \
+T CJ##T##10(void) \
+{ \
+ dump_args(QUOTE(CJ##T##10));\
+ return ((*j##T##10)(1,2,3,4,5,6,7,8,9,10)); \
+}
+#define dcl11(T) \
+dcl10(T) \
+T CJ##T##11(void) \
+{ \
+ dump_args(QUOTE(CJ##T##11));\
+ return ((*j##T##11)(1,2,3,4,5,6,7,8,9,10,11)); \
+}
+#define dcl12(T) \
+dcl11(T) \
+T CJ##T##12(void) \
+{ \
+ dump_args(QUOTE(CJ##T##12));\
+ return ((*j##T##12)(1,2,3,4,5,6,7,8,9,10,11,12)); \
+}
+#define dcl13(T) \
+dcl12(T) \
+T CJ##T##13(void) \
+{ \
+ dump_args(QUOTE(CJ##T##13));\
+ return ((*j##T##13)(1,2,3,4,5,6,7,8,9,10,11,12,13)); \
+}
+#define dcl14(T) \
+dcl13(T) \
+T CJ##T##14(void) \
+{ \
+ dump_args(QUOTE(CJ##T##14));\
+ return ((*j##T##14)(1,2,3,4,5,6,7,8,9,10,11,12,13,14)); \
+}
+#define dcl15(T) \
+dcl14(T) \
+T CJ##T##15(void) \
+{ \
+ dump_args(QUOTE(CJ##T##15));\
+ return ((*j##T##15)(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15)); \
+}
+#define dcl(t) dcl15(t)
+dcl(_c)
+dcl(_uc)
+dcl(_s)
+dcl(_us)
+dcl(_i)
+#if __WORDSIZE == 64
+dcl(_ui)
+dcl(_l)
+#endif
+dcl(_f)
+dcl(_d)
+#undef dcl
+#undef dcl15
+#undef dcl14
+#undef dcl13
+#undef dcl12
+#undef dcl11
+#undef dcl10
+#undef dcl9
+#undef dcl8
+#undef dcl7
+#undef dcl6
+#undef dcl5
+#undef dcl4
+#undef dcl3
+#undef dcl2
+#undef dcl1
+#undef dcl0
+
+size_t
+run_test(jit_state_t *_jit, uint8_t *code_base, size_t code_size)
+{
+ jit_reloc_t jmpi_main;
+ void (*function)(void);
+ jit_reloc_t jmp;
+
+ jit_begin(_jit, code_base, code_size);
+
+ jmpi_main = jit_jmp(_jit);
+
+#define calc(B,T,R,O)\
+ jit_movr##B(_jit, R##1, R##0);\
+ jit_ldxi##T(_jit, R##0, JIT_SP, 8 * O);\
+ jit_subr##B(_jit, R##0, R##1, R##0);
+
+#define get0(B,T,R) jit_movi##B(_jit, R##0, 0);
+#define get1(B,T,R) jit_ldxi##T(_jit, R##0, JIT_SP, 8 * 0);
+#define get2(B,T,R) \
+ get1(B,T,R); \
+ calc(B,T,R,1);
+#define get3(B,T,R) \
+ get2(B,T,R); \
+ calc(B,T,R,2);
+#define get4(B,T,R) \
+ get3(B,T,R); \
+ calc(B,T,R,3);
+#define get5(B,T,R) \
+ get4(B,T,R); \
+ calc(B,T,R,4);
+#define get6(B,T,R) \
+ get5(B,T,R); \
+ calc(B,T,R,5);
+#define get7(B,T,R) \
+ get6(B,T,R); \
+ calc(B,T,R,6);
+#define get8(B,T,R) \
+ get7(B,T,R); \
+ calc(B,T,R,7);
+#define get9(B,T,R) \
+ get8(B,T,R); \
+ calc(B,T,R,8);
+#define get10(B,T,R) \
+ get9(B,T,R); \
+ calc(B,T,R,9);
+#define get11(B,T,R) \
+ get10(B,T,R); \
+ calc(B,T,R,10);
+#define get12(B,T,R) \
+ get11(B,T,R); \
+ calc(B,T,R,11);
+#define get13(B,T,R) \
+ get12(B,T,R); \
+ calc(B,T,R,12);
+#define get14(B,T,R) \
+ get13(B,T,R); \
+ calc(B,T,R,13);
+#define get15(B,T,R) \
+ get14(B,T,R); \
+ calc(B,T,R,14);
+
+#if __WORDSIZE == 32
+# define jit_extr_i(_jit, u, v) /**/
+#else
+# define jit_extr_l(_jit, u, v) /**/
+#endif
+
+#if __WORDSIZE == 64
+#define jit_stxi_ui(_jit, u, r0, r1) jit_stxi_i(_jit, u, r0, r1)
+#endif
+#define jit_stxi_us(_jit, u, r0, r1) jit_stxi_s(_jit, u, r0, r1)
+#define jit_stxi_uc(_jit, u, r0, r1) jit_stxi_c(_jit, u, r0, r1)
+
+#define abi_uc JIT_OPERAND_ABI_UINT8
+#define abi_c JIT_OPERAND_ABI_INT8
+#define abi_us JIT_OPERAND_ABI_UINT16
+#define abi_s JIT_OPERAND_ABI_INT16
+#define abi_ui JIT_OPERAND_ABI_UINT32
+#define abi_i JIT_OPERAND_ABI_INT32
+#define abi_ul JIT_OPERAND_ABI_UINT64
+#define abi_l JIT_OPERAND_ABI_INT64
+#define abi_f JIT_OPERAND_ABI_FLOAT
+#define abi_d JIT_OPERAND_ABI_DOUBLE
+
+#define store0(T) jit_operand_mem(JIT_OPERAND_ABI_UINT8, JIT_SP, 0)
+#define store1(T) jit_operand_mem(abi##T, JIT_SP, 0 * 8)
+#define store2(T) store1(T), jit_operand_mem(abi##T, JIT_SP, 1 * 8)
+#define store3(T) store2(T), jit_operand_mem(abi##T, JIT_SP, 2 * 8)
+#define store4(T) store3(T), jit_operand_mem(abi##T, JIT_SP, 3 * 8)
+#define store5(T) store4(T), jit_operand_mem(abi##T, JIT_SP, 4 * 8)
+#define store6(T) store5(T), jit_operand_mem(abi##T, JIT_SP, 5 * 8)
+#define store7(T) store6(T), jit_operand_mem(abi##T, JIT_SP, 6 * 8)
+#define store8(T) store7(T), jit_operand_mem(abi##T, JIT_SP, 7 * 8)
+#define store9(T) store8(T), jit_operand_mem(abi##T, JIT_SP, 8 * 8)
+#define store10(T) store9(T), jit_operand_mem(abi##T, JIT_SP, 9 * 8)
+#define store11(T) store10(T), jit_operand_mem(abi##T, JIT_SP, 10 * 8)
+#define store12(T) store11(T), jit_operand_mem(abi##T, JIT_SP, 11 * 8)
+#define store13(T) store12(T), jit_operand_mem(abi##T, JIT_SP, 12 * 8)
+#define store14(T) store13(T), jit_operand_mem(abi##T, JIT_SP, 13 * 8)
+#define store15(T) store14(T), jit_operand_mem(abi##T, JIT_SP, 14 * 8)
+
+// Placeholder, won't actually be used.
+#define load0(T) jit_operand_mem(JIT_OPERAND_ABI_INT8, JIT_SP, 0)
+#define load1(T) jit_operand_mem(abi##T, JIT_SP, 0 * 8)
+#define load2(T) load1(T), jit_operand_mem(abi##T, JIT_SP, 1 * 8)
+#define load3(T) load2(T), jit_operand_mem(abi##T, JIT_SP, 2 * 8)
+#define load4(T) load3(T), jit_operand_mem(abi##T, JIT_SP, 3 * 8)
+#define load5(T) load4(T), jit_operand_mem(abi##T, JIT_SP, 4 * 8)
+#define load6(T) load5(T), jit_operand_mem(abi##T, JIT_SP, 5 * 8)
+#define load7(T) load6(T), jit_operand_mem(abi##T, JIT_SP, 6 * 8)
+#define load8(T) load7(T), jit_operand_mem(abi##T, JIT_SP, 7 * 8)
+#define load9(T) load8(T), jit_operand_mem(abi##T, JIT_SP, 8 * 8)
+#define load10(T) load9(T), jit_operand_mem(abi##T, JIT_SP, 9 * 8)
+#define load11(T) load10(T), jit_operand_mem(abi##T, JIT_SP, 10 * 8)
+#define load12(T) load11(T), jit_operand_mem(abi##T, JIT_SP, 11 * 8)
+#define load13(T) load12(T), jit_operand_mem(abi##T, JIT_SP, 12 * 8)
+#define load14(T) load13(T), jit_operand_mem(abi##T, JIT_SP, 13 * 8)
+#define load15(T) load14(T), jit_operand_mem(abi##T, JIT_SP, 14 * 8)
+
+#define defi(T, N) \
+ { \
+ j##T##N = jit_address(_jit); \
+ size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); \
+ size_t stack = jit_align_stack(_jit, N * 8); \
+ jit_operand_t args[] = {store##N(T)}; \
+ jit_load_args(_jit, N, args); \
+ get##N(,T,JIT_R) \
+ jit_extr##T(_jit, JIT_R0, JIT_R0); \
+ jit_shrink_stack(_jit, stack); \
+ jit_leave_jit_abi(_jit, 0, 0, frame); \
+ jit_retr(_jit, JIT_R0); \
+ }
+
+#define deff(T, N) \
+ { \
+ j##T##N = jit_address(_jit); \
+ size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); \
+ size_t stack = jit_align_stack(_jit, N * 8); \
+ jit_operand_t args[] = {store##N(T)}; \
+ jit_load_args(_jit, N, args); \
+ get##N(T,T,JIT_F); \
+ jit_shrink_stack(_jit, stack); \
+ jit_leave_jit_abi(_jit, 0, 0, frame); \
+ jit_retr##T(_jit, JIT_F0); \
+ }
+
+#define def0(X, T) def##X(T, 0)
+#define def1(X, T) def0(X, T) def##X(T, 1)
+#define def2(X, T) def1(X, T) def##X(T, 2)
+#define def3(X, T) def2(X, T) def##X(T, 3)
+#define def4(X, T) def3(X, T) def##X(T, 4)
+#define def5(X, T) def4(X, T) def##X(T, 5)
+#define def6(X, T) def5(X, T) def##X(T, 6)
+#define def7(X, T) def6(X, T) def##X(T, 7)
+#define def8(X, T) def7(X, T) def##X(T, 8)
+#define def9(X, T) def8(X, T) def##X(T, 9)
+#define def10(X, T) def9(X, T) def##X(T, 10)
+#define def11(X, T) def10(X, T) def##X(T, 11)
+#define def12(X, T) def11(X, T) def##X(T, 12)
+#define def13(X, T) def12(X, T) def##X(T, 13)
+#define def14(X, T) def13(X, T) def##X(T, 14)
+#define def15(X, T) def14(X, T) def##X(T, 15)
+#define def(T) def15(i, T)
+ def(_c)
+ def(_uc)
+ def(_s)
+ def(_us)
+ def(_i)
+#if __WORDSIZE == 64
+ def(_ui)
+ def(_l)
+#endif
+#undef def
+#define def(T) def15(f, T)
+ def(_f)
+ def(_d)
+#undef def
+
+ jit_patch_here(_jit, jmpi_main);
+ size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0);
+ size_t stack = jit_align_stack(_jit, 15 * 8);
+
+#define push0(B,T,R) /**/
+#define push1(B,T,R)\
+ jit_movi##B(_jit, R##0, 1);\
+ jit_stxi##T(_jit, 0 * 8, JIT_SP, R##0);
+
+#define push2(B,T,R)\
+ push1(B,T,R)\
+ jit_movi##B(_jit, R##0, 2);\
+ jit_stxi##T(_jit, 1 * 8, JIT_SP, R##0);
+
+#define push3(B,T,R)\
+ push2(B,T,R)\
+ jit_movi##B(_jit, R##0, 3);\
+ jit_stxi##T(_jit, 2 * 8, JIT_SP, R##0);
+
+#define push4(B,T,R)\
+ push3(B,T,R)\
+ jit_movi##B(_jit, R##0, 4);\
+ jit_stxi##T(_jit, 3 * 8, JIT_SP, R##0);
+
+#define push5(B,T,R)\
+ push4(B,T,R)\
+ jit_movi##B(_jit, R##0, 5);\
+ jit_stxi##T(_jit, 4 * 8, JIT_SP, R##0);
+
+#define push6(B,T,R)\
+ push5(B,T,R)\
+ jit_movi##B(_jit, R##0, 6);\
+ jit_stxi##T(_jit, 5 * 8, JIT_SP, R##0);
+
+#define push7(B,T,R)\
+ push6(B,T,R)\
+ jit_movi##B(_jit, R##0, 7);\
+ jit_stxi##T(_jit, 6 * 8, JIT_SP, R##0);
+
+#define push8(B,T,R)\
+ push7(B,T,R)\
+ jit_movi##B(_jit, R##0, 8);\
+ jit_stxi##T(_jit, 7 * 8, JIT_SP, R##0);
+
+#define push9(B,T,R)\
+ push8(B,T,R)\
+ jit_movi##B(_jit, R##0, 9);\
+ jit_stxi##T(_jit, 8 * 8, JIT_SP, R##0);
+
+#define push10(B,T,R)\
+ push9(B,T,R)\
+ jit_movi##B(_jit, R##0, 10);\
+ jit_stxi##T(_jit, 9 * 8, JIT_SP, R##0);
+
+#define push11(B,T,R)\
+ push10(B,T,R)\
+ jit_movi##B(_jit, R##0, 11);\
+ jit_stxi##T(_jit, 10 * 8, JIT_SP, R##0);
+
+#define push12(B,T,R)\
+ push11(B,T,R)\
+ jit_movi##B(_jit, R##0, 12);\
+ jit_stxi##T(_jit, 11 * 8, JIT_SP, R##0);
+
+#define push13(B,T,R)\
+ push12(B,T,R)\
+ jit_movi##B(_jit, R##0, 13);\
+ jit_stxi##T(_jit, 12 * 8, JIT_SP, R##0);
+
+#define push14(B,T,R)\
+ push13(B,T,R)\
+ jit_movi##B(_jit, R##0, 14);\
+ jit_stxi##T(_jit, 13 * 8, JIT_SP, R##0);
+
+#define push15(B,T,R)\
+ push14(B,T,R)\
+ jit_movi##B(_jit, R##0, 15);\
+ jit_stxi##T(_jit, 14 * 8, JIT_SP, R##0);
+
+#define calin(T,N) \
+ { \
+ push##N(, T, JIT_R) \
+ jit_operand_t args[] = {load##N(T)}; \
+ jit_calli(_jit, C##T##N, N, args); \
+ jit_retval##T(_jit, JIT_R0); \
+ jit_movi(_jit, JIT_R1, T##N); \
+ jmp = jit_beqr(_jit, JIT_R0, JIT_R1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, jmp); \
+ }
+
+#define calfn(T,N) \
+ { \
+ push##N(T, T, JIT_F) \
+ jit_operand_t args[] = {load##N(T)}; \
+ jit_calli(_jit, C##T##N, N, args); \
+ jit_retval##T(_jit, JIT_F0); \
+ jit_movi##T(_jit, JIT_F1, _w##N); \
+ jmp = jit_beqr##T(_jit, JIT_F0, JIT_F1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, jmp); \
+ }
+#define calx0(X,T) cal##X##n(T,0)
+#define calx1(X,T) calx0(X,T) cal##X##n(T,1)
+#define calx2(X,T) calx1(X,T) cal##X##n(T,2)
+#define calx3(X,T) calx2(X,T) cal##X##n(T,3)
+#define calx4(X,T) calx3(X,T) cal##X##n(T,4)
+#define calx5(X,T) calx4(X,T) cal##X##n(T,5)
+#define calx6(X,T) calx5(X,T) cal##X##n(T,6)
+#define calx7(X,T) calx6(X,T) cal##X##n(T,7)
+#define calx8(X,T) calx7(X,T) cal##X##n(T,8)
+#define calx9(X,T) calx8(X,T) cal##X##n(T,9)
+#define calx10(X,T) calx9(X,T) cal##X##n(T,10)
+#define calx11(X,T) calx10(X,T) cal##X##n(T,11)
+#define calx12(X,T) calx11(X,T) cal##X##n(T,12)
+#define calx13(X,T) calx12(X,T) cal##X##n(T,13)
+#define calx14(X,T) calx13(X,T) cal##X##n(T,14)
+#define calx15(X,T) calx14(X,T) cal##X##n(T,15)
+#define cali(T) calx15(i,T)
+#define calf(T) calx15(f,T)
+
+ cali(_c)
+ cali(_uc)
+ cali(_s)
+ cali(_us)
+ cali(_i)
+#if __WORDSIZE == 64
+ cali(_ui)
+ cali(_l)
+#endif
+ calf(_f)
+ calf(_d)
+
+#undef calin
+#undef calfn
+#define calin(T,N) \
+ { \
+ push##N(, T, JIT_R) \
+ jit_operand_t args[] = {load##N(T)}; \
+ jit_calli(_jit, CJ##T##N, N, args); \
+ jit_retval##T(_jit, JIT_R0); \
+ jit_movi(_jit, JIT_R1, T##N); \
+ jmp = jit_beqr(_jit, JIT_R0, JIT_R1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, jmp); \
+ }
+
+#define calfn(T,N) \
+ { \
+ push##N(T, T, JIT_F) \
+ jit_operand_t args[] = {load##N(T)}; \
+ jit_calli(_jit, CJ##T##N, N, args); \
+ jit_retval##T(_jit, JIT_F0); \
+ jit_movi##T(_jit, JIT_F1, _w##N); \
+ jmp = jit_beqr##T(_jit, JIT_F0, JIT_F1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, jmp); \
+ }
+
+ cali(_c)
+ cali(_uc)
+ cali(_s)
+ cali(_us)
+ cali(_i)
+#if __WORDSIZE == 64
+ cali(_ui)
+ cali(_l)
+#endif
+ calf(_f)
+ calf(_d)
+
+ jit_shrink_stack(_jit, stack);
+ jit_leave_jit_abi(_jit, 0, 0, frame);
+ jit_ret(_jit);
+
+ size_t size = 0;
+ function = jit_end(_jit, &size);
+
+ if (function)
+ (*function)();
+ else
+ return size;
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return main_compiler(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/z_clobber.c b/deps/lightening/tests/z_clobber.c
new file mode 100644
index 0000000..7503de7
--- /dev/null
+++ b/deps/lightening/tests/z_clobber.c
@@ -0,0 +1,1145 @@
+#include "test.h"
+
+/* do not bother about result of operations, only ensure valid arguments
+ * and that registers not modified by the operation are not clobbered */
+
+#define IV0 0x10000
+#define IV1 0x10001
+#define IV2 0x10002
+#define IV3 0x10003
+#define IV4 0x10004
+#define IV5 0x10005
+#define FV0 100.0
+#define FV1 101.0
+#define FV2 102.0
+#define FV3 103.0
+#define FV4 104.0
+#define FV5 105.0
+#define IR0 JIT_R0
+#define IR1 JIT_R1
+#define IR2 JIT_R2
+#define IR3 JIT_V0
+#define IR4 JIT_V1
+#define IR5 JIT_V2
+#define FR0 JIT_F0
+#define FR1 JIT_F1
+#define FR2 JIT_F2
+#define FR3 JIT_F3
+#define FR4 JIT_F4
+#define FR5 JIT_F5
+
+#define setup() \
+ jit_movi(_jit, JIT_R0, IV0); \
+ jit_movi(_jit, JIT_R1, IV1); \
+ jit_movi(_jit, JIT_R2, IV2); \
+ jit_movi(_jit, JIT_V0, IV3); \
+ jit_movi(_jit, JIT_V1, IV4); \
+ jit_movi(_jit, JIT_V2, IV5);
+
+#define setup_f() \
+ jit_movi_f(_jit, JIT_F0, FV0); \
+ jit_movi_f(_jit, JIT_F1, FV1); \
+ jit_movi_f(_jit, JIT_F2, FV2); \
+ jit_movi_f(_jit, JIT_F3, FV3); \
+ jit_movi_f(_jit, JIT_F4, FV4); \
+ jit_movi_f(_jit, JIT_F5, FV5);
+
+#define setup_d() \
+ jit_movi_d(_jit, JIT_F0, FV0); \
+ jit_movi_d(_jit, JIT_F1, FV1); \
+ jit_movi_d(_jit, JIT_F2, FV2); \
+ jit_movi_d(_jit, JIT_F3, FV3); \
+ jit_movi_d(_jit, JIT_F4, FV4); \
+ jit_movi_d(_jit, JIT_F5, FV5);
+
+#define check(label, rn) \
+{ \
+ jit_reloc_t r = jit_beqi(_jit, IR##rn, IV##rn); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+}
+
+#define check1(k, l, i0) \
+ check(k##l##i0##_0, i0)
+
+#define check2(k, l, i0, i1) \
+ check(k##l##i0##i1##_0, i0) \
+ check(k##l##i0##i1##_1, i1)
+
+#define check3(k, l, i0, i1, i2) \
+ check(k##l##i0##i1##i2##_0, i0) \
+ check(k##l##i0##i1##i2##_1, i1) \
+ check(k##l##i0##i1##i2##_2, i2)
+
+#define check4(k, l, i0, i1, i2, i3) \
+ check(k##l##i0##i1##i2##i3##_0, i0) \
+ check(k##l##i0##i1##i2##i3##_1, i1) \
+ check(k##l##i0##i1##i2##i3##_2, i2) \
+ check(k##l##i0##i1##i2##i3##_3, i3)
+
+#define check5(k, l, i0, i1, i2, i3, i4) \
+ check(k##l##i0##i1##i2##i3##i4##_0, i0) \
+ check(k##l##i0##i1##i2##i3##i4##_1, i1) \
+ check(k##l##i0##i1##i2##i3##i3##_2, i2) \
+ check(k##l##i0##i1##i2##i3##i4##_3, i3) \
+ check(k##l##i0##i1##i2##i3##i4##_4, i4)
+
+#define check6(k, l, i0, i1, i2, i3, i4, i5) \
+ check(k##l##i0##i1##i2##i3##i4##i5##_0, i0) \
+ check(k##l##i0##i1##i2##i3##i4##i5##_1, i1) \
+ check(k##l##i0##i1##i2##i3##i3##i5##_2, i2) \
+ check(k##l##i0##i1##i2##i3##i4##i5##_3, i3) \
+ check(k##l##i0##i1##i2##i3##i4##i5##_4, i4) \
+ check(k##l##i0##i1##i2##i3##i4##i5##_5, i5)
+
+/* slightly hacky, lightning only uses JIT_F0-F5, and since all lightening
+ * platforms (at least at the moment) support JIT_F6, we can use it as a
+ * temporary register to get the value to compare agains in to the beqrf.
+ */
+#define checkf(f, label, rn) \
+{ \
+ jit_movi##f(_jit, JIT_F6, FV##rn); \
+ jit_reloc_t r = jit_beqr##f(_jit, FR##rn, JIT_F6); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+}
+
+#define checkf1(f, k, l, i0) \
+ checkf(f, f##k##l##i0##_0, i0)
+
+#define checkf2(f, k, l, i0, i1) \
+ checkf(f, f##k##l##i0##i1##_0, i0) \
+ checkf(f, f##k##l##i0##i1##_1, i1)
+
+#define checkf3(f, k, l, i0, i1, i2) \
+ checkf(f, f##k##l##i0##i1##i2##_0, i0) \
+ checkf(f, f##k##l##i0##i1##i2##_1, i1) \
+ checkf(f, f##k##l##i0##i1##i2##_2, i2)
+
+#define checkf4(f, k, l, i0, i1, i2, i3) \
+ checkf(f, f##k##l##i0##i1##i2##i3##_0, i0) \
+ checkf(f, f##k##l##i0##i1##i2##i3##_1, i1) \
+ checkf(f, f##k##l##i0##i1##i2##i3##_2, i2) \
+ checkf(f, f##k##l##i0##i1##i2##i3##_3, i3)
+
+#define checkf5(f, k, l, i0, i1, i2, i3, i4) \
+ checkf(f, f##k##l##i0##i1##i2##i3##i4##_0, i0) \
+ checkf(f, f##k##l##i0##i1##i2##i3##i4##_1, i1) \
+ checkf(f, f##k##l##i0##i1##i2##i3##i3##_2, i2) \
+ checkf(f, f##k##l##i0##i1##i2##i3##i4##_3, i3) \
+ checkf(f, f##k##l##i0##i1##i2##i3##i4##_4, i4)
+
+#define checkf6(f, k, l, i0, i1, i2, i3, i4, i5) \
+ checkf(f, f##k##l##i0##i1##i2##i3##i4##i5##_0, i0) \
+ checkf(f, f##k##l##i0##i1##i2##i3##i4##i5##_1, i1) \
+ checkf(f, f##k##l##i0##i1##i2##i3##i3##i5##_2, i2) \
+ checkf(f, f##k##l##i0##i1##i2##i3##i4##i5##_3, i3) \
+ checkf(f, f##k##l##i0##i1##i2##i3##i4##i5##_4, i4) \
+ checkf(f, f##k##l##i0##i1##i2##i3##i4##i5##_5, i5)
+
+#define alui(l, op, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i0, 1); \
+ jit_##op##i(_jit, IR##i1, IR##i0, 1); \
+ check4(i, l, i2, i3, i4, i5)
+
+#define aluic(l, op, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i0, 1); \
+ jit_##op##i(_jit, IR##i0, IR##i0, 1); \
+ check5(ic, l, i1, i2, i3, i4, i5)
+
+#define alur(l, op, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i0, 1); \
+ jit_movi(_jit, IR##i1, 1); \
+ jit_##op##r(_jit, IR##i2, IR##i0, IR##i1); \
+ check3(r, l, i3, i4, i5)
+
+#define alurc0(l, op, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i0, 1); \
+ jit_movi(_jit, IR##i1, 1); \
+ jit_##op##r(_jit, IR##i0, IR##i0, IR##i1); \
+ check4(r0, l, i2, i3, i4, i5)
+
+#define alurc1(l, op, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i0, 1); \
+ jit_movi(_jit, IR##i1, 1); \
+ jit_##op##r(_jit, IR##i1, IR##i0, IR##i1); \
+ check4(r1, l, i2, i3, i4, i5)
+
+#define alurc2(l, op, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i0, 1); \
+ jit_##op##r(_jit, IR##i0, IR##i0, IR##i0); \
+ check5(r2, l, i1, i2, i3, i4, i5)
+
+#define xalu(l, op, i0, i1, i2, i3, i4, i5) \
+ alui(l, op, i0, i1, i2, i3, i4, i5) \
+ aluic(l, op, i0, i1, i2, i3, i4, i5) \
+ alur(l, op, i0, i1, i2, i3, i4, i5) \
+ alurc0(l, op, i0, i1, i2, i3, i4, i5) \
+ alurc1(l, op, i0, i1, i2, i3, i4, i5) \
+ alurc2(l, op, i0, i1, i2, i3, i4, i5)
+
+#if __ia64__
+# define alu(l, op) \
+ xalu(l, op, 0, 1, 2, 3, 4, 5)
+#else
+# define alu(l, op) \
+ xalu(l, op, 0, 1, 2, 3, 4, 5) \
+ xalu(l, op, 1, 2, 3, 4, 5, 0) \
+ xalu(l, op, 2, 3, 4, 5, 0, 1) \
+ xalu(l, op, 3, 4, 5, 0, 1, 2) \
+ xalu(l, op, 4, 5, 0, 1, 2, 3) \
+ xalu(l, op, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define fopi(f, l, op, f0, f1, f2, f3, f4, f5) \
+ setup##f() \
+ jit_movi##f(_jit, FR##f0, 1.0); \
+ jit_movi##f(_jit, JIT_F6, 1.0); \
+ jit_##op##r##f(_jit, FR##f1, FR##f0, JIT_F6); \
+ checkf4(f, i, l, f2, f3, f4, f5)
+
+#define fopic(f, l, op, f0, f1, f2, f3, f4, f5) \
+ setup##f() \
+ jit_movi##f(_jit, FR##f0, 1.0); \
+ jit_movi##f(_jit, JIT_F6, 1.0); \
+ jit_##op##r##f(_jit, FR##f0, FR##f0, JIT_F6); \
+ checkf5(f, ic, l, f1, f2, f3, f4, f5)
+
+#define fopr(f, l, op, f0, f1, f2, f3, f4, f5) \
+ setup##f() \
+ jit_movi##f(_jit, FR##f0, 1.0); \
+ jit_movi##f(_jit, FR##f1, 1.0); \
+ jit_##op##r##f(_jit, FR##f2, FR##f0, FR##f1); \
+ checkf3(f, r, l, f3, f4, f5)
+
+#define foprc0(f, l, op, f0, f1, f2, f3, f4, f5) \
+ setup##f() \
+ jit_movi##f(_jit, FR##f0, 1.0); \
+ jit_movi##f(_jit, FR##f1, 1.0); \
+ jit_##op##r##f(_jit, FR##f0, FR##f0, FR##f1); \
+ checkf4(f, r0, l, f2, f3, f4, f5)
+
+#define foprc1(f, l, op, f0, f1, f2, f3, f4, f5) \
+ setup##f() \
+ jit_movi##f(_jit, FR##f0, 1.0); \
+ jit_movi##f(_jit, FR##f1, 1.0); \
+ jit_##op##r##f(_jit, FR##f1, FR##f0, FR##f1); \
+ checkf4(f, r1, l, f2, f3, f4, f5)
+
+#define foprc2(f, l, op, f0, f1, f2, f3, f4, f5) \
+ setup##f() \
+ jit_movi##f(_jit, FR##f0, 1.0); \
+ jit_##op##r##f(_jit, FR##f0, FR##f0, FR##f0); \
+ checkf5(f, r2, l, f1, f2, f3, f4, f5)
+
+#define xfop(f, l, op, f0, f1, f2, f3, f4, f5) \
+ fopi(f, l, op, f0, f1, f2, f3, f4, f5) \
+ fopic(f, l, op, f0, f1, f2, f3, f4, f5) \
+ fopr(f, l, op, f0, f1, f2, f3, f4, f5) \
+ foprc0(f, l, op, f0, f1, f2, f3, f4, f5) \
+ foprc1(f, l, op, f0, f1, f2, f3, f4, f5) \
+ foprc2(f, l, op, f0, f1, f2, f3, f4, f5)
+#if __ia64__
+# define xxfop(l, op, f, f0, f1, f2, f3, f4, f5) \
+ xfop(_f, l, op, f0, f1, f2, f3, f4, f5)
+#else
+# define xxfop(l, op, f, f0, f1, f2, f3, f4, f5) \
+ xfop(_f, l, op, f0, f1, f2, f3, f4, f5) \
+ xfop(_d, l, op, f0, f1, f2, f3, f4, f5)
+#endif
+#if __ia64__
+# define fop(l, op) \
+ xxfop(l, op, f, 0, 1, 2, 3, 4, 5)
+#else
+# define fop(l, op) \
+ xxfop(l, op, f, 0, 1, 2, 3, 4, 5) \
+ xxfop(l, op, f, 1, 2, 3, 4, 5, 0) \
+ xxfop(l, op, f, 2, 3, 4, 5, 0, 1) \
+ xxfop(l, op, f, 3, 4, 5, 0, 1, 2) \
+ xxfop(l, op, f, 4, 5, 0, 1, 2, 3) \
+ xxfop(l, op, f, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define aluxii(l, op, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i0, 1); \
+ jit_##op##ci(_jit, IR##i1, IR##i0, 1); \
+ jit_##op##xi(_jit, IR##i2, IR##i0, 1); \
+ check3(ii, l, i3, i4, i5)
+
+#define aluxir(l, op, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i0, 1); \
+ jit_##op##ci(_jit, IR##i1, IR##i0, 1); \
+ jit_##op##xr(_jit, IR##i2, IR##i0, IR##i1); \
+ check3(ir, l, i3, i4, i5)
+
+#define aluxri(l, op, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i0, 1); \
+ jit_movi(_jit, IR##i1, 1); \
+ jit_##op##cr(_jit, IR##i2, IR##i0, IR##i1); \
+ jit_##op##xi(_jit, IR##i0, IR##i1, 1); \
+ check3(ri, l, i3, i4, i5)
+
+#define aluxrr(l, op, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i0, 1); \
+ jit_movi(_jit, IR##i1, 1); \
+ jit_##op##cr(_jit, IR##i2, IR##i0, IR##i1); \
+ jit_##op##xr(_jit, IR##i2, IR##i0, IR##i1); \
+ check3(rr, l, i3, i4, i5)
+
+#define xalux(l, op, i0, i1, i2, i3, i4, i5) \
+ aluxii(l, op, i0, i1, i2, i3, i4, i5) \
+ aluxir(l, op, i0, i1, i2, i3, i4, i5) \
+ aluxri(l, op, i0, i1, i2, i3, i4, i5) \
+ aluxrr(l, op, i0, i1, i2, i3, i4, i5)
+#if __ia64__
+# define alux(l, op) \
+ xalux(l, op, 0, 1, 2, 3, 4, 5)
+#else
+# define alux(l, op) \
+ xalux(l, op, 0, 1, 2, 3, 4, 5) \
+ xalux(l, op, 1, 2, 3, 4, 5, 0) \
+ xalux(l, op, 2, 3, 4, 5, 0, 1) \
+ xalux(l, op, 3, 4, 5, 0, 1, 2) \
+ xalux(l, op, 4, 5, 0, 1, 2, 3) \
+ xalux(l, op, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define alui_u(l, op, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i0, 1); \
+ jit_##op##i_u(_jit, IR##i1, IR##i0, 1); \
+ check4(i_u, l, i2, i3, i4, i5)
+
+#define aluic_u(l, op, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i0, 1); \
+ jit_##op##i_u(_jit, IR##i0, IR##i0, 1); \
+ check5(ic_u, l, i1, i2, i3, i4, i5)
+
+#define alur_u(l, op, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i0, 1); \
+ jit_movi(_jit, IR##i1, 1); \
+ jit_##op##r_u(_jit, IR##i2, IR##i0, IR##i1); \
+ check3(r_u, l, i3, i4, i5)
+
+#define alurc0_u(l, op, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i0, 1); \
+ jit_movi(_jit, IR##i1, 1); \
+ jit_##op##r_u(_jit, IR##i0, IR##i0, IR##i1); \
+ check4(r0_u, l, i2, i3, i4, i5)
+
+#define alurc1_u(l, op, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i0, 1); \
+ jit_movi(_jit, IR##i1, 1); \
+ jit_##op##r_u(_jit, IR##i1, IR##i0, IR##i1); \
+ check4(r1_u, l, i2, i3, i4, i5)
+
+#define alurc2_u(l, op, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i0, 1); \
+ jit_##op##r_u(_jit, IR##i0, IR##i0, IR##i0); \
+ check5(r2_u, l, i1, i2, i3, i4, i5)
+
+#define xalu_u(l, op, i0, i1, i2, i3, i4, i5) \
+ alui_u(l, op, i0, i1, i2, i3, i4, i5) \
+ aluic_u(l, op, i0, i1, i2, i3, i4, i5) \
+ alur_u(l, op, i0, i1, i2, i3, i4, i5) \
+ alurc0_u(l, op, i0, i1, i2, i3, i4, i5) \
+ alurc1_u(l, op, i0, i1, i2, i3, i4, i5) \
+ alurc2_u(l, op, i0, i1, i2, i3, i4, i5)
+#if __ia64__
+# define alu_u(l, op) \
+ xalu_u(l, op, 0, 1, 2, 3, 4, 5)
+#else
+# define alu_u(l, op) \
+ xalu_u(l, op, 0, 1, 2, 3, 4, 5) \
+ xalu_u(l, op, 1, 2, 3, 4, 5, 0) \
+ xalu_u(l, op, 2, 3, 4, 5, 0, 1) \
+ xalu_u(l, op, 3, 4, 5, 0, 1, 2) \
+ xalu_u(l, op, 4, 5, 0, 1, 2, 3) \
+ xalu_u(l, op, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define unir(l, op, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i0, 1); \
+ jit_##op(_jit, IR##i1, IR##i0); \
+ check4(rr, l, i2, i3, i4, i5)
+
+#define unirc(l, op, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i0, 1); \
+ jit_##op(_jit, IR##i0, IR##i0); \
+ check5(rc, l, i1, i2, i3, i4, i5)
+
+#define xuni(l, op, i0, i1, i2, i3, i4, i5) \
+ unir(l, op, i0, i1, i2, i3, i4, i5) \
+ unirc(l, op, i0, i1, i2, i3, i4, i5)
+#if __ia64__
+# define uni(l, op) \
+ xuni(l, op, 0, 1, 2, 3, 4, 5)
+#else
+# define uni(l, op) \
+ xuni(l, op, 0, 1, 2, 3, 4, 5) \
+ xuni(l, op, 1, 2, 3, 4, 5, 0) \
+ xuni(l, op, 2, 3, 4, 5, 0, 1) \
+ xuni(l, op, 3, 4, 5, 0, 1, 2) \
+ xuni(l, op, 4, 5, 0, 1, 2, 3) \
+ xuni(l, op, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define unfr(f, l, op, f0, f1, f2, f3, f4, f5) \
+ setup##f() \
+ jit_movi##f(_jit, FR##f0, 1); \
+ jit_##op##f(_jit, FR##f1, FR##f0); \
+ checkf4(f, rr, l, f2, f3, f4, f5)
+
+#define unfrc(f, l, op, f0, f1, f2, f3, f4, f5) \
+ setup##f() \
+ jit_movi##f(_jit, FR##f0, 1); \
+ jit_##op##f(_jit, FR##f0, FR##f0); \
+ checkf5(f, rc, l, f1, f2, f3, f4, f5)
+
+#define xunf(f, l, op, f0, f1, f2, f3, f4, f5) \
+ unfr(f, l, op, f0, f1, f2, f3, f4, f5) \
+ unfrc(f, l, op, f0, f1, f2, f3, f4, f5)
+#define xxunf(l, op, f0, f1, f2, f3, f4, f5) \
+ xunf(_f, l, op, f0, f1, f2, f3, f4, f5) \
+ xunf(_d, l, op, f0, f1, f2, f3, f4, f5)
+#if __ia64__
+# define unf(l, op) \
+ xxunf(l, op, 0, 1, 2, 3, 4, 5)
+#else
+# define unf(l, op) \
+ xxunf(l, op, 0, 1, 2, 3, 4, 5) \
+ xxunf(l, op, 1, 2, 3, 4, 5, 0) \
+ xxunf(l, op, 2, 3, 4, 5, 0, 1) \
+ xxunf(l, op, 3, 4, 5, 0, 1, 2) \
+ xxunf(l, op, 4, 5, 0, 1, 2, 3) \
+ xxunf(l, op, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define fcpi(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ setup() \
+ setup##f() \
+ jit_movi##f(_jit, FR##f0, 1.0); \
+ jit_movi##f(_jit, JIT_F6, 1.0); \
+ jit_##op##r##f(_jit, IR##r0, FR##f0, JIT_F6); \
+ check5(i##f##f0, l, r1, r2, r3, r4, r5) \
+ checkf5(f, i##r0, l, f1, f2, f3, f4, f5)
+
+#define fcpr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ setup() \
+ setup##f() \
+ jit_movi##f(_jit, FR##f0, 1.0); \
+ jit_movi##f(_jit, FR##f1, 1.0); \
+ jit_##op##r##f(_jit, IR##r0, FR##f0, FR##f1); \
+ check5(r##f##f0, l, r1, r2, r3, r4, r5) \
+ checkf4(f, r##r0, l, f2, f3, f4, f5)
+
+#define fcprc(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ setup() \
+ setup##f() \
+ jit_movi##f(_jit, FR##f0, 1.0); \
+ jit_##op##r##f(_jit, IR##r0, FR##f0, FR##f0); \
+ check5(rc##f##f0, l, r1, r2, r3, r4, r5) \
+ checkf5(f, rc##r0, l, f1, f2, f3, f4, f5)
+
+#if __ia64__
+# define ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ fcpi(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#else
+# define ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ fcpi(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ fcpr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ fcprc(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ fcpi(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5) \
+ fcpr(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5) \
+ fcprc(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5) \
+ fcpi(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5) \
+ fcpr(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5) \
+ fcprc(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5) \
+ fcpi(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5) \
+ fcpr(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5) \
+ fcprc(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5) \
+ fcpi(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5) \
+ fcpr(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5) \
+ fcprc(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5) \
+ fcpi(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5) \
+ fcpr(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5) \
+ fcprc(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
+#endif
+#if __ia64__
+# define xfcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#else
+# define xfcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0) \
+ ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1) \
+ ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2) \
+ ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3) \
+ ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
+#endif
+#if __ia64__
+# define fcmp(l, op) \
+ xfcp(_f, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)
+#else
+# define fcmp(l, op) \
+ xfcp(_f, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5) \
+ xfcp(_d, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)
+#endif
+
+#define imvi(l, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i0, 1); \
+ check5(i, l, i1, i2, i3, i4, i5)
+
+#define imvr(l, i0, i1, i2, i3, i4, i5) \
+ setup() \
+ jit_movi(_jit, IR##i1, 1); \
+ jit_movr(_jit, IR##i0, IR##i1); \
+ check4(r, l, i2, i3, i4, i5)
+
+#define xmvi(l, i0, i1, i2, i3, i4, i5) \
+ imvi(l, i0, i1, i2, i3, i4, i5) \
+ imvr(l, i0, i1, i2, i3, i4, i5)
+#if __ia64__
+# define mvi(l) \
+ xmvi(l, 0, 1, 2, 3, 4, 5)
+#else
+# define mvi(l) \
+ xmvi(l, 0, 1, 2, 3, 4, 5) \
+ xmvi(l, 1, 2, 3, 4, 5, 0) \
+ xmvi(l, 2, 3, 4, 5, 0, 1) \
+ xmvi(l, 3, 4, 5, 0, 1, 2) \
+ xmvi(l, 4, 5, 0, 1, 2, 3) \
+ xmvi(l, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define fmvi(f, l, f0, f1, f2, f3, f4, f5) \
+ setup##f() \
+ jit_movi##f(_jit, FR##f0, 1); \
+ checkf5(f, i, l, f1, f2, f3, f4, f5)
+
+#define fmvr(f, l, f0, f1, f2, f3, f4, f5) \
+ setup##f() \
+ jit_movi##f(_jit, FR##f1, 1); \
+ jit_movr##f(_jit, FR##f0, FR##f1); \
+ checkf4(f, r, l, f2, f3, f4, f5)
+
+#define xmvf(f, l, f0, f1, f2, f3, f4, f5) \
+ fmvi(f, l, f0, f1, f2, f3, f4, f5) \
+ fmvr(f, l, f0, f1, f2, f3, f4, f5)
+#if __ia64__
+# define xxmvf(f, l) \
+ xmvf(f, l, 0, 1, 2, 3, 4, 5)
+#else
+# define xxmvf(f, l) \
+ xmvf(f, l, 0, 1, 2, 3, 4, 5) \
+ xmvf(f, l, 1, 2, 3, 4, 5, 0) \
+ xmvf(f, l, 2, 3, 4, 5, 0, 1) \
+ xmvf(f, l, 3, 4, 5, 0, 1, 2) \
+ xmvf(f, l, 4, 5, 0, 1, 2, 3) \
+ xmvf(f, l, 5, 0, 1, 2, 3, 4)
+#endif
+#define mvf(l) \
+ xxmvf(_f, l) \
+ xxmvf(_d, l)
+
+#define f2fr(f, l, op, f0, f1, f2, f3, f4, f5) \
+ setup##f() \
+ jit_movi##f(_jit, FR##f0, 1); \
+ jit_##op(_jit, FR##f1, FR##f0); \
+ checkf4(f, rr, l, f2, f3, f4, f5)
+
+#define f2frc(f, l, op, f0, f1, f2, f3, f4, f5) \
+ setup##f() \
+ jit_movi##f(_jit, FR##f0, 1); \
+ jit_##op(_jit, FR##f0, FR##f0); \
+ checkf5(f, rc, l, f1, f2, f3, f4, f5)
+
+#define xf2f(f, l, op, f0, f1, f2, f3, f4, f5) \
+ f2fr(f, l, op, f0, f1, f2, f3, f4, f5) \
+ f2frc(f, l, op, f0, f1, f2, f3, f4, f5)
+#if __ia64__
+# define f2f(l, f, op) \
+ xf2f(f, l, op, 0, 1, 2, 3, 4, 5)
+#else
+# define f2f(l, f, op) \
+ xf2f(f, l, op, 0, 1, 2, 3, 4, 5) \
+ xf2f(f, l, op, 1, 2, 3, 4, 5, 0) \
+ xf2f(f, l, op, 2, 3, 4, 5, 0, 1) \
+ xf2f(f, l, op, 3, 4, 5, 0, 1, 2) \
+ xf2f(f, l, op, 4, 5, 0, 1, 2, 3) \
+ xf2f(f, l, op, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define f2ir(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ setup() \
+ setup##f() \
+ jit_movi##f(_jit, FR##f0, 1); \
+ jit_##op##f(_jit, IR##r0, FR##f0); \
+ check5(r##f##f0, l, r1, r2, r3, r4, r5) \
+ checkf5(f, i##r0, l, f1, f2, f3, f4, f5)
+
+#if __ia64__
+# define if2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ f2ir(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+# define xf2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ if2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#else
+# define if2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ f2ir(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ f2ir(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5) \
+ f2ir(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5) \
+ f2ir(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5) \
+ f2ir(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5) \
+ f2ir(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
+# define xf2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ if2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ if2i(f, l, op, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0) \
+ if2i(f, l, op, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1) \
+ if2i(f, l, op, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2) \
+ if2i(f, l, op, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3) \
+ if2i(f, l, op, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
+#endif
+#define f2i(l, op) \
+ xf2i(_f, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5) \
+ xf2i(_d, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)
+
+#define i2fr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ setup() \
+ setup##f() \
+ jit_movi(_jit, IR##r0, 1); \
+ jit_##op##f(_jit, FR##f0, IR##r0); \
+ check5(r##f##f0, l, r1, r2, r3, r4, r5) \
+ checkf5(f, i##r0, l, f1, f2, f3, f4, f5)
+#if __ia64__
+# define ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ i2fr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+# define xi2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#else
+# define ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ i2fr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ i2fr(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5) \
+ i2fr(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5) \
+ i2fr(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5) \
+ i2fr(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5) \
+ i2fr(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
+# define xi2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0) \
+ ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1) \
+ ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2) \
+ ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3) \
+ ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
+#endif
+#define i2f(l, op) \
+ xi2f(_f, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5) \
+ xi2f(_d, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)
+
+#define off_c 1
+#define off_uc off_c
+#define off_s 2
+#define off_us off_s
+#define off_i 4
+#define off_ui off_i
+#define off_l 8
+#define off_f 4
+#define off_d 8
+
+#define ildi(i, l, r0, r1, r2, r3, r4, r5) \
+ setup() \
+ jit_##ldi##i(_jit, IR##r0, buff); \
+ check5(ldi##i, l, r1, r2, r3, r4, r5)
+
+#define ildr(i, l, r0, r1, r2, r3, r4, r5) \
+ setup() \
+ jit_movi(_jit, IR##r1, (jit_imm_t)buff); \
+ jit_##ldr##i(_jit, IR##r0, IR##r1); \
+ check4(ldr##i, l, r2, r3, r4, r5)
+
+#define ildr0(i, l, r0, r1, r2, r3, r4, r5) \
+ setup() \
+ jit_movi(_jit, IR##r0, (jit_imm_t)buff); \
+ jit_##ldr##i(_jit, IR##r0, IR##r0); \
+ check5(ldr##i, l, r1, r2, r3, r4, r5)
+
+#define ildxi(i, l, r0, r1, r2, r3, r4, r5) \
+ setup() \
+ jit_movi(_jit, IR##r1, (jit_imm_t)buff); \
+ jit_ldxi##i(_jit, IR##r0, IR##r1, off##i); \
+ check4(ldxi##i, l, r2, r3, r4, r5)
+
+#define ildxr(i, l, r0, r1, r2, r3, r4, r5) \
+ setup() \
+ jit_movi(_jit, IR##r1, (jit_imm_t)buff); \
+ jit_movi(_jit, IR##r2, off##i); \
+ jit_ldxr##i(_jit, IR##r0, IR##r1, IR##r2); \
+ check3(ldxr##i, l, r3, r4, r5)
+
+#define ildxr0(i, l, r0, r1, r2, r3, r4, r5) \
+ setup() \
+ jit_movi(_jit, IR##r1, (jit_imm_t)buff); \
+ jit_movi(_jit, IR##r0, off##i); \
+ jit_ldxr##i(_jit, IR##r0, IR##r1, IR##r0); \
+ check4(ldxr0##i, l, r2, r3, r4, r5)
+
+#define ildxr1(i, l, r0, r1, r2, r3, r4, r5) \
+ setup() \
+ jit_movi(_jit, IR##r0, (jit_imm_t)buff); \
+ jit_movi(_jit, IR##r1, off##i); \
+ jit_ldxr##i(_jit, IR##r0, IR##r0, IR##r1); \
+ check4(ldxr1##i, l, r2, r3, r4, r5)
+
+#define xxldi(i, l, r0, r1, r2, r3, r4, r5) \
+ ildi(i, l, r0, r1, r2, r3, r4, r5) \
+ ildr(i, l, r0, r1, r2, r3, r4, r5) \
+ ildr0(i, l, r0, r1, r2, r3, r4, r5) \
+ ildxi(i, l, r0, r1, r2, r3, r4, r5) \
+ ildxr(i, l, r0, r1, r2, r3, r4, r5) \
+ ildxr0(i, l, r0, r1, r2, r3, r4, r5) \
+ ildxr1(i, l, r0, r1, r2, r3, r4, r5)
+#if __WORDSIZE == 32
+#define xxxldi(l, r0, r1, r2, r3, r4, r5)
+#else
+#define xxxldi(l, r0, r1, r2, r3, r4, r5) \
+ xxldi(_ui, l, r0, r1, r2, r3, r4, r5) \
+ xxldi( _l, l, r0, r1, r2, r3, r4, r5)
+#endif
+#define xldi(l, r0, r1, r2, r3, r4, r5) \
+ xxldi( _c, l, r0, r1, r2, r3, r4, r5) \
+ xxldi(_uc, l, r0, r1, r2, r3, r4, r5) \
+ xxldi( _s, l, r0, r1, r2, r3, r4, r5) \
+ xxldi(_us, l, r0, r1, r2, r3, r4, r5) \
+ xxldi( _i, l, r0, r1, r2, r3, r4, r5) \
+ xxxldi(l, r0, r1, r2, r3, r4, r5)
+#if __ia64__
+# define ldi(l) \
+ xldi(l, 0, 1, 2, 3, 4, 5)
+#else
+# define ldi(l) \
+ xldi(l, 0, 1, 2, 3, 4, 5) \
+ xldi(l, 1, 2, 3, 4, 5, 0) \
+ xldi(l, 2, 3, 4, 5, 0, 1) \
+ xldi(l, 3, 4, 5, 0, 1, 2) \
+ xldi(l, 4, 5, 0, 1, 2, 3) \
+ xldi(l, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define fldi(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ setup() \
+ setup##f() \
+ jit_ldi##f(_jit, FR##f0, buff); \
+ check6(ldi##f##r0##f0, l, r0, r1, r2, r3, r4, r5) \
+ checkf5(f, ldi##r0##f0, l, f1, f2, f3, f4, f5)
+
+#define fldr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ setup() \
+ setup##f() \
+ jit_movi(_jit, IR##r0, (jit_imm_t)buff); \
+ jit_ldr##f(_jit, FR##f0, IR##r0); \
+ check5(ldr##f##r0##f0, l, r1, r2, r3, r4, r5) \
+ checkf5(f, ldr##r0##f0, l, f1, f2, f3, f4, f5)
+
+#define fldxi(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ setup() \
+ setup##f() \
+ jit_movi(_jit, IR##r0, (jit_imm_t)buff); \
+ jit_ldxi##f(_jit, FR##f0, IR##r0, off##f); \
+ check5(ldxi##f##r0##f0, l, r1, r2, r3, r4, r5) \
+ checkf5(f, ldxi##r0##f0, l, f1, f2, f3, f4, f5)
+
+#define fldxr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ setup() \
+ setup##f() \
+ jit_movi(_jit, IR##r0, (jit_imm_t)buff); \
+ jit_movi(_jit, IR##r1, off##f); \
+ jit_ldxr##f(_jit, FR##f0, IR##r0, IR##r1); \
+ check4(ldxr##f##r0##f0, l, r2, r3, r4, r5) \
+ checkf5(f, ldxr##r0##f0, l, f1, f2, f3, f4, f5)
+
+#define xldf(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ fldi(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ fldr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ fldxi(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ fldxr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+
+#define xxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ xldf(_f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ xldf(_d, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#if __ia64__
+# define ixldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ xxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#else
+# define fxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ xxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ xxldf(l, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0) \
+ xxldf(l, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1) \
+ xxldf(l, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2) \
+ xxldf(l, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3) \
+ xxldf(l, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
+# define ixldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ fxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ fxldf(l, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5) \
+ fxldf(l, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5) \
+ fxldf(l, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5) \
+ fxldf(l, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5) \
+ fxldf(l, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
+#endif
+#define ldf(l) \
+ ixldf(l, 0,1,2,3,4,5, 0,1,2,3,4,5)
+
+#define isti(i, l, r0, r1, r2, r3, r4, r5) \
+ setup() \
+ jit_sti##i(_jit, buff, IR##r0); \
+ check5(sti##i, l, r1, r2, r3, r4, r5)
+
+#define istr(i, l, r0, r1, r2, r3, r4, r5) \
+ setup() \
+ jit_movi(_jit, IR##r1, (jit_imm_t)buff); \
+ jit_str##i(_jit, IR##r1, IR##r0); \
+ check4(str##i, l, r2, r3, r4, r5)
+
+#define istr0(i, l, r0, r1, r2, r3, r4, r5) \
+ setup() \
+ jit_movi(_jit, IR##r1, (jit_imm_t)buff); \
+ jit_str##i(_jit, IR##r1, IR##r0); \
+ check4(str0##i, l, r2, r3, r4, r5)
+
+#define istxi(i, l, r0, r1, r2, r3, r4, r5) \
+ setup() \
+ jit_movi(_jit, IR##r1, (jit_imm_t)buff); \
+ jit_stxi##i(_jit, off##i, IR##r1, IR##r0); \
+ check4(stxi##i, l, r2, r3, r4, r5)
+
+#define istxr(i, l, r0, r1, r2, r3, r4, r5) \
+ setup() \
+ jit_movi(_jit, IR##r1, (jit_imm_t)buff); \
+ jit_movi(_jit, IR##r2, off##i); \
+ jit_stxr##i(_jit, IR##r2, IR##r1, IR##r0); \
+ check3(stxr##i, l, r3, r4, r5)
+
+#define istxr0(i, l, r0, r1, r2, r3, r4, r5) \
+ setup() \
+ jit_movi(_jit, IR##r1, (jit_imm_t)buff); \
+ jit_movi(_jit, IR##r0, off##i); \
+ jit_stxr##i(_jit, IR##r0, IR##r1, IR##r0); \
+ check4(stxr0##i, l, r2, r3, r4, r5)
+
+#define istxr1(i, l, r0, r1, r2, r3, r4, r5) \
+ setup() \
+ jit_movi(_jit, IR##r0, (jit_imm_t)buff); \
+ jit_movi(_jit, IR##r1, off##i); \
+ jit_stxr##i(_jit, IR##r1, IR##r0, IR##r0); \
+ check4(stxr1##i, l, r2, r3, r4, r5)
+
+#define xxsti(i, l, r0, r1, r2, r3, r4, r5) \
+ isti(i, l, r0, r1, r2, r3, r4, r5) \
+ istr(i, l, r0, r1, r2, r3, r4, r5) \
+ istr0(i, l, r0, r1, r2, r3, r4, r5) \
+ istxi(i, l, r0, r1, r2, r3, r4, r5) \
+ istxr(i, l, r0, r1, r2, r3, r4, r5) \
+ istxr0(i, l, r0, r1, r2, r3, r4, r5) \
+ istxr1(i, l, r0, r1, r2, r3, r4, r5)
+#if __WORDSIZE == 32
+#define xxxsti(l, r0, r1, r2, r3, r4, r5)
+#else
+#define xxxsti(l, r0, r1, r2, r3, r4, r5) \
+ xxsti( _l, l, r0, r1, r2, r3, r4, r5)
+#endif
+#define xsti(l, r0, r1, r2, r3, r4, r5) \
+ xxsti( _c, l, r0, r1, r2, r3, r4, r5) \
+ xxsti( _s, l, r0, r1, r2, r3, r4, r5) \
+ xxsti( _i, l, r0, r1, r2, r3, r4, r5) \
+ xxxsti(l, r0, r1, r2, r3, r4, r5)
+#if __ia64__
+# define sti(l) \
+ xsti(l, 0, 1, 2, 3, 4, 5)
+#else
+# define sti(l) \
+ xsti(l, 0, 1, 2, 3, 4, 5) \
+ xsti(l, 1, 2, 3, 4, 5, 0) \
+ xsti(l, 2, 3, 4, 5, 0, 1) \
+ xsti(l, 3, 4, 5, 0, 1, 2) \
+ xsti(l, 4, 5, 0, 1, 2, 3) \
+ xsti(l, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define fsti(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ setup() \
+ setup##f() \
+ jit_sti##f(_jit, buff, FR##f0); \
+ check6(sti##f##r0##f0, l, r0, r1, r2, r3, r4, r5) \
+ checkf5(f, sti##r0##f0, l, f1, f2, f3, f4, f5)
+
+#define fstr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ setup() \
+ setup##f() \
+ jit_movi(_jit, IR##r0, (jit_imm_t)buff); \
+ jit_str##f(_jit, IR##r0, FR##f0); \
+ check5(str##f##r0##f0, l, r1, r2, r3, r4, r5) \
+ checkf5(f, str##r0##f0, l, f1, f2, f3, f4, f5)
+
+#define fstxi(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ setup() \
+ setup##f() \
+ jit_movi(_jit, IR##r0, (jit_imm_t)buff); \
+ jit_stxi##f(_jit, off##f, IR##r0, FR##f0); \
+ check5(stxi##f##r0##f0, l, r1, r2, r3, r4, r5) \
+ checkf5(f, stxi##r0##f0, l, f1, f2, f3, f4, f5)
+
+#define fstxr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ setup() \
+ setup##f() \
+ jit_movi(_jit, IR##r0, (jit_imm_t)buff); \
+ jit_movi(_jit, IR##r1, off##f); \
+ jit_stxr##f(_jit, IR##r1, IR##r0, FR##f0); \
+ check4(stxr##f##r0##f0, l, r2, r3, r4, r5) \
+ checkf5(f, stxr##r0##f0, l, f1, f2, f3, f4, f5)
+
+#define xstf(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ fsti(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ fstr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ fstxi(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ fstxr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#define xxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ xstf(_f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ xstf(_d, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#if __ia64__
+# define ixstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ xxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#else
+# define fxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ xxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ xxstf(l, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0) \
+ xxstf(l, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1) \
+ xxstf(l, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2) \
+ xxstf(l, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3) \
+ xxstf(l, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
+# define ixstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ fxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+ fxstf(l, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5) \
+ fxstf(l, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5) \
+ fxstf(l, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5) \
+ fxstf(l, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5) \
+ fxstf(l, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
+#endif
+#define stf(l) \
+ ixstf(l, 0,1,2,3,4,5, 0,1,2,3,4,5)
+
+#define bri(l, op, u, il, ir, r0, r1, r2, r3, r4, r5) \
+{ \
+ setup() \
+ jit_movi(_jit, IR##r0, il); \
+ jit_reloc_t r = jit_b##op##i##u(_jit, IR##r0, ir); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+ check5(i, l, r1, r2, r3, r4, r5) \
+}
+
+#define brr(l, op, u, il, ir, r0, r1, r2, r3, r4, r5) \
+{ \
+ setup() \
+ jit_movi(_jit, IR##r0, il); \
+ jit_movi(_jit, IR##r1, ir); \
+ jit_reloc_t r = jit_b##op##r##u(_jit, IR##r0, IR##r1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+ check4(r, l, r2, r3, r4, r5) \
+}
+
+#define xjmpi(l, op, u, il, ir, r0, r1, r2, r3, r4, r5) \
+ bri(l, op, u, il, ir, r0, r1, r2, r3, r4, r5) \
+ brr(l, op, u, il, ir, r0, r1, r2, r3, r4, r5)
+#if __ia64__
+# define jmpi(l, op, u, il, ir) \
+ xjmpi(l, op, u, il, ir, 0, 1, 2, 3, 4, 5)
+#else
+# define jmpi(l, op, u, il, ir) \
+ xjmpi(l, op, u, il, ir, 0, 1, 2, 3, 4, 5) \
+ xjmpi(l, op, u, il, ir, 1, 2, 3, 4, 5, 0) \
+ xjmpi(l, op, u, il, ir, 2, 3, 4, 5, 0, 1) \
+ xjmpi(l, op, u, il, ir, 3, 4, 5, 0, 1, 2) \
+ xjmpi(l, op, u, il, ir, 4, 5, 0, 1, 2, 3) \
+ xjmpi(l, op, u, il, ir, 5, 0, 1, 2, 3, 4)
+#endif
+
+#define bfi(f, l, op, il, ir, f0, f1, f2, f3, f4, f5) \
+{ \
+ setup##f() \
+ jit_movi##f(_jit, FR##f0, il); \
+ jit_movi##f(_jit, JIT_F6, ir); \
+ jit_reloc_t r = jit_b##op##r##f(_jit, FR##f0, JIT_F6); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+ checkf5(f, i, l, f1, f2, f3, f4, f5) \
+}
+
+#define bff(f, l, op, il, ir, f0, f1, f2, f3, f4, f5) \
+{ \
+ setup##f() \
+ jit_movi##f(_jit, FR##f0, il); \
+ jit_movi##f(_jit, FR##f1, ir); \
+ jit_reloc_t r = jit_b##op##r##f(_jit, FR##f0, FR##f1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+ checkf4(f, r, l, f2, f3, f4, f5) \
+}
+
+#define xjmpf(f, l, op, il, ir, f0, f1, f2, f3, f4, f5) \
+ bfi(f, l, op, il, ir, f0, f1, f2, f3, f4, f5) \
+ bff(f, l, op, il, ir, f0, f1, f2, f3, f4, f5)
+#define xxjmpf(l, op, il, ir, f0, f1, f2, f3, f4, f5) \
+ xjmpf(_f, l, op, il, ir, f0, f1, f2, f3, f4, f5) \
+ xjmpf(_d, l, op, il, ir, f0, f1, f2, f3, f4, f5)
+#if __ia64__
+# define jmpf(l, op, il, ir) \
+ xxjmpf(l, op, il, ir, 0, 1, 2, 3, 4, 5)
+#else
+# define jmpf(l, op, il, ir) \
+ xxjmpf(l, op, il, ir, 0, 1, 2, 3, 4, 5) \
+ xxjmpf(l, op, il, ir, 1, 2, 3, 4, 5, 0) \
+ xxjmpf(l, op, il, ir, 2, 3, 4, 5, 0, 1) \
+ xxjmpf(l, op, il, ir, 3, 4, 5, 0, 1, 2) \
+ xxjmpf(l, op, il, ir, 4, 5, 0, 1, 2, 3) \
+ xxjmpf(l, op, il, ir, 5, 0, 1, 2, 3, 4)
+#endif
+
+static size_t
+run_test(jit_state_t *_jit, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(_jit, arena_base, arena_size);
+ size_t frame = jit_enter_jit_abi(_jit, 3, 0, 0);
+
+ void (*function)(void);
+
+ char *buff = malloc(16);
+ ASSERT(buff);
+
+ alu(__LINE__, add)
+ alux(__LINE__, add)
+ fop(__LINE__, add)
+ alu(__LINE__, sub)
+ alux(__LINE__, sub)
+ fop(__LINE__, sub)
+ alu(__LINE__, mul)
+ fop(__LINE__, mul)
+ alu(__LINE__, div)
+ alu_u(__LINE__, div)
+ fop(__LINE__, div)
+ alu(__LINE__, rem)
+ alu_u(__LINE__, rem)
+ alu(__LINE__, and)
+ alu(__LINE__, or)
+ alu(__LINE__, xor)
+ alu(__LINE__, lsh)
+ alu(__LINE__, rsh)
+ alu_u(__LINE__, rsh)
+ uni(__LINE__, negr)
+ unf(__LINE__, negr)
+ uni(__LINE__, comr)
+ unf(__LINE__, absr)
+ unf(__LINE__, sqrtr)
+ mvi(__LINE__)
+ mvf(__LINE__)
+ uni(__LINE__, extr_c)
+ uni(__LINE__, extr_uc)
+ uni(__LINE__, extr_s)
+ uni(__LINE__, extr_us)
+#if __WORDSIZE == 64
+ uni(__LINE__, extr_ui)
+#endif
+ uni(__LINE__, bswapr_us)
+ uni(__LINE__, bswapr_ui)
+#if __WORDSIZE == 64
+ uni(__LINE__, bswapr_ul)
+#endif
+ f2f(__LINE__, _f, extr_d_f)
+ f2f(__LINE__, _d, extr_f_d)
+ f2i(__LINE__, truncr)
+ i2f(__LINE__, extr)
+ ldi(__LINE__)
+ ldf(__LINE__)
+ sti(__LINE__)
+ stf(__LINE__)
+ jmpi(__LINE__, lt, , 0, 1)
+ jmpi(__LINE__, lt, _u, 0, 1)
+ jmpf(__LINE__, lt, 0, 1)
+ jmpi(__LINE__, le, , 1, 1)
+ jmpi(__LINE__, le, _u, 1, 1)
+ jmpf(__LINE__, le, 1, 1)
+ jmpi(__LINE__, eq, , -1, -1)
+ jmpf(__LINE__, eq, -1, -1)
+ jmpi(__LINE__, ge, , 2, 2)
+ jmpi(__LINE__, ge, _u, 2, 2)
+ jmpf(__LINE__, ge, 2, 2)
+ jmpi(__LINE__, gt, , 2, 1)
+ jmpi(__LINE__, gt, _u, 2, 1)
+ jmpf(__LINE__, gt, 2, 1)
+ jmpi(__LINE__, ne, , 3, 2)
+ jmpf(__LINE__, ne, 3, 2)
+ jmpi(__LINE__, ms, , 1, 1)
+ jmpi(__LINE__, mc, , 1, 2)
+#if __WORDSIZE == 32
+# define ix7f 0x7fffffff
+# define ix80 0x80000000
+# define ixff 0xffffffff
+#else
+# define ix7f 0x7fffffffffffffff
+# define ix80 0x8000000000000000
+# define ixff 0xffffffffffffffff
+#endif
+ jmpi(__LINE__, oadd, , ix7f, 1)
+ jmpi(__LINE__, oadd, _u, ixff, 1)
+ jmpi(__LINE__, xadd, , ix80, 1)
+ jmpi(__LINE__, xadd, _u, ix7f, 1)
+ jmpi(__LINE__, osub, , ix80, 1)
+ jmpi(__LINE__, osub, _u, 0, 1)
+ jmpi(__LINE__, xsub, , ix7f, 1)
+ jmpi(__LINE__, xsub, _u, ix80, 1)
+ jmpf(__LINE__, unlt, 0, 1)
+ jmpf(__LINE__, unle, 1, 1)
+ jmpf(__LINE__, uneq, 2, 2)
+ jmpf(__LINE__, unge, 3, 3)
+ jmpf(__LINE__, ungt, 4, 3)
+ jmpf(__LINE__, ltgt, 5, 4)
+ jmpf(__LINE__, ord, 0, 0)
+ jmpf(__LINE__, unord, 0, (0.0 / 0.0))
+
+ jit_leave_jit_abi(_jit, 3, 0, frame);
+ jit_ret(_jit);
+
+ size_t size = 0;
+ function = jit_end(_jit, &size);
+
+ if (function)
+ (*function)();
+ else {
+ free(buff);
+ return size;
+ }
+
+ free(buff);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return main_compiler(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/z_range.c b/deps/lightening/tests/z_range.c
new file mode 100644
index 0000000..a8b82f4
--- /dev/null
+++ b/deps/lightening/tests/z_range.c
@@ -0,0 +1,577 @@
+#include "test.h"
+
+#define M64 67108864
+
+#define aB1 (1<<1)
+#define aB2 (1<<2)
+#define aB3 (1<<3)
+#define aB4 (1<<4)
+#define aB5 (1<<5)
+#define aB6 (1<<6)
+#define aB7 (1<<7)
+#define aB8 (1<<8)
+#define aB9 (1<<9)
+#define aB10 (1<<10)
+#define aB11 (1<<11)
+#define aB12 (1<<12)
+#define aB13 (1<<13)
+#define aB14 (1<<14)
+#define aB15 (1<<15)
+#define aB16 (1<<16)
+#define aB17 (1<<17)
+#define aB18 (1<<18)
+#define aB19 (1<<19)
+#define aB20 (1<<20)
+#define aB21 (1<<21)
+#define aB22 (1<<22)
+#define aB23 (1<<23)
+#define aB24 (1<<24)
+#define aB25 (1<<25)
+#define aB26 (1<<26)
+#define bB1 (-aB1)
+#define bB2 (-aB2)
+#define bB3 (-aB3)
+#define bB4 (-aB4)
+#define bB5 (-aB5)
+#define bB6 (-aB6)
+#define bB7 (-aB7)
+#define bB8 (-aB8)
+#define bB9 (-aB9)
+#define bB10 (-aB10)
+#define bB11 (-aB11)
+#define bB12 (-aB12)
+#define bB13 (-aB13)
+#define bB14 (-aB14)
+#define bB15 (-aB15)
+#define bB16 (-aB16)
+#define bB17 (-aB17)
+#define bB18 (-aB18)
+#define bB19 (-aB19)
+#define bB20 (-aB20)
+#define bB21 (-aB21)
+#define bB22 (-aB22)
+#define bB23 (-aB23)
+#define bB24 (-aB24)
+#define bB25 (-aB25)
+#define bB26 (-aB26)
+#define cB1 (aB1-1)
+#define cB2 (aB2-1)
+#define cB3 (aB3-1)
+#define cB4 (aB4-1)
+#define cB5 (aB5-1)
+#define cB6 (aB6-1)
+#define cB7 (aB7-1)
+#define cB8 (aB8-1)
+#define cB9 (aB9-1)
+#define cB10 (aB10-1)
+#define cB11 (aB11-1)
+#define cB12 (aB12-1)
+#define cB13 (aB13-1)
+#define cB14 (aB14-1)
+#define cB15 (aB15-1)
+#define cB16 (aB16-1)
+#define cB17 (aB17-1)
+#define cB18 (aB18-1)
+#define cB19 (aB19-1)
+#define cB20 (aB20-1)
+#define cB21 (aB21-1)
+#define cB22 (aB22-1)
+#define cB23 (aB23-1)
+#define cB24 (aB24-1)
+#define cB25 (aB25-1)
+#define cB26 (aB26-1)
+#define dB1 (-aB1+1)
+#define dB2 (-aB2+1)
+#define dB3 (-aB3+1)
+#define dB4 (-aB4+1)
+#define dB5 (-aB5+1)
+#define dB6 (-aB6+1)
+#define dB7 (-aB7+1)
+#define dB8 (-aB8+1)
+#define dB9 (-aB9+1)
+#define dB10 (-aB10+1)
+#define dB11 (-aB11+1)
+#define dB12 (-aB12+1)
+#define dB13 (-aB13+1)
+#define dB14 (-aB14+1)
+#define dB15 (-aB15+1)
+#define dB16 (-aB16+1)
+#define dB17 (-aB17+1)
+#define dB18 (-aB18+1)
+#define dB19 (-aB19+1)
+#define dB20 (-aB20+1)
+#define dB21 (-aB21+1)
+#define dB22 (-aB22+1)
+#define dB23 (-aB23+1)
+#define dB24 (-aB24+1)
+#define dB25 (-aB25+1)
+#define dB26 (-aB26+1)
+
+#define add(a, b) (a + b)
+#define sub(a, b) (a - b)
+#define mul(a, b) (a * b)
+#define div(a, b) (a / b)
+#define rem(a, b) (a % b)
+#define and(a, b) (a & b)
+#define or(a, b) (a | b)
+#define xor(a, b) (a ^ b)
+
+#if defined(DEBUG)
+#define dump_args(N, X, L, R, V)\
+ jit_calli_1(_jit, puts,\
+ jit_operand_imm(JIT_OPERAND_ABI_POINTER,\
+ (jit_imm_t)#N " " #X " " #L " " #R " " #V))
+#else
+#define dump_args(N, X, L, R, V)
+#endif
+
+/* alu2 doesn't really work for jit_rshi_u, so define a shim */
+#define jit_rsh_ui jit_rshi_u
+
+#define alu2(N, X, L, R, V) \
+{ \
+ dump_args(N, X, L, R, V); \
+ jit_movi(_jit, JIT_R1, L); \
+ jit_##N##i(_jit, JIT_R0, JIT_R1, R); \
+ jit_reloc_t r = jit_beqi(_jit, JIT_R0, V); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+}
+
+#define alu1(N, M) \
+ alu2(N, N##M##1, 3, (M##1), N(3, M##1)) \
+ alu2(N, N##M##2, 3, (M##2), N(3, M##2)) \
+ alu2(N, N##M##3, 3, (M##3), N(3, M##3)) \
+ alu2(N, N##M##4, 3, (M##4), N(3, M##4)) \
+ alu2(N, N##M##5, 3, (M##5), N(3, M##5)) \
+ alu2(N, N##M##6, 3, (M##6), N(3, M##6)) \
+ alu2(N, N##M##7, 3, (M##7), N(3, M##7)) \
+ alu2(N, N##M##8, 3, (M##8), N(3, M##8)) \
+ alu2(N, N##M##9, 3, (M##9), N(3, M##9)) \
+ alu2(N, N##M##10, 3, (M##10), N(3, M##10)) \
+ alu2(N, N##M##11, 3, (M##11), N(3, M##11)) \
+ alu2(N, N##M##12, 3, (M##12), N(3, M##12)) \
+ alu2(N, N##M##13, 3, (M##13), N(3, M##13)) \
+ alu2(N, N##M##14, 3, (M##14), N(3, M##14)) \
+ alu2(N, N##M##15, 3, (M##15), N(3, M##15)) \
+ alu2(N, N##M##16, 3, (M##16), N(3, M##16)) \
+ alu2(N, N##M##17, 3, (M##17), N(3, M##17)) \
+ alu2(N, N##M##18, 3, (M##18), N(3, M##18)) \
+ alu2(N, N##M##19, 3, (M##19), N(3, M##19)) \
+ alu2(N, N##M##20, 3, (M##20), N(3, M##20)) \
+ alu2(N, N##M##21, 3, (M##21), N(3, M##21)) \
+ alu2(N, N##M##22, 3, (M##22), N(3, M##22)) \
+ alu2(N, N##M##23, 3, (M##23), N(3, M##23)) \
+ alu2(N, N##M##24, 3, (M##24), N(3, M##24)) \
+ alu2(N, N##M##25, 3, (M##25), N(3, M##25)) \
+ alu2(N, N##M##26, 3, (M##26), N(3, M##26))
+
+#define alu(N) \
+ alu1(N, aB) \
+ alu1(N, bB) \
+ alu1(N, cB) \
+ alu1(N, dB)
+
+#define _lsh(N) \
+ alu2(lsh, L##N, 1, N, (1L<<N))
+
+#if __WORDSIZE == 64
+#define _rsh(N) \
+ alu2(rsh, R##N, (1L<<63), N, ((1L<<63)>>N))
+
+#define _rush(N) \
+ alu2(rsh_u, R##N, (1UL<<63), N, ((1UL<<63)>>N))
+#else
+#define _rsh(N) \
+ alu2(rsh, R##N, (1L<<31), N, ((1L<<31)>>N))
+
+#define _rush(N) \
+ alu2(rsh_u, R##N, (1UL<<31), N, ((1UL<<31)>>N))
+#endif
+
+#if __WORDSIZE == 32
+# define xsh64(X) /**/
+#else
+# define xsh64(X) \
+ _##X##sh(32) \
+ _##X##sh(33) \
+ _##X##sh(34) \
+ _##X##sh(35) \
+ _##X##sh(36) \
+ _##X##sh(37) \
+ _##X##sh(38) \
+ _##X##sh(39) \
+ _##X##sh(40) \
+ _##X##sh(41) \
+ _##X##sh(42) \
+ _##X##sh(43) \
+ _##X##sh(44) \
+ _##X##sh(45) \
+ _##X##sh(46) \
+ _##X##sh(47) \
+ _##X##sh(48) \
+ _##X##sh(49) \
+ _##X##sh(50) \
+ _##X##sh(51) \
+ _##X##sh(52) \
+ _##X##sh(53) \
+ _##X##sh(54) \
+ _##X##sh(55) \
+ _##X##sh(56) \
+ _##X##sh(57) \
+ _##X##sh(58) \
+ _##X##sh(59) \
+ _##X##sh(60) \
+ _##X##sh(61) \
+ _##X##sh(62) \
+ _##X##sh(63)
+#endif
+
+#define xsh(X) \
+ _##X##sh(0) \
+ _##X##sh(1) \
+ _##X##sh(2) \
+ _##X##sh(3) \
+ _##X##sh(4) \
+ _##X##sh(5) \
+ _##X##sh(6) \
+ _##X##sh(7) \
+ _##X##sh(8) \
+ _##X##sh(9) \
+ _##X##sh(10) \
+ _##X##sh(11) \
+ _##X##sh(12) \
+ _##X##sh(13) \
+ _##X##sh(14) \
+ _##X##sh(15) \
+ _##X##sh(16) \
+ _##X##sh(17) \
+ _##X##sh(18) \
+ _##X##sh(19) \
+ _##X##sh(20) \
+ _##X##sh(21) \
+ _##X##sh(22) \
+ _##X##sh(23) \
+ _##X##sh(24) \
+ _##X##sh(25) \
+ _##X##sh(26) \
+ _##X##sh(27) \
+ _##X##sh(28) \
+ _##X##sh(29) \
+ _##X##sh(30) \
+ _##X##sh(31) \
+ xsh64(X)
+
+#define lsh() \
+ xsh(l)
+
+#define rsh() \
+ xsh(r)
+
+#define rsh_u() \
+ xsh(ru)
+
+#define reset(V) \
+ jit_calli_3(_jit, memset, \
+ jit_operand_imm(JIT_OPERAND_ABI_POINTER, (jit_imm_t)buf),\
+ jit_operand_imm(JIT_OPERAND_ABI_INT32, V), \
+ jit_operand_imm(JIT_OPERAND_ABI_UINT32, M64 + 8));
+
+#define stx(T, N, O, V) \
+ jit_movi(_jit, JIT_R0, V); \
+ jit_stxi##T(_jit, O, JIT_V0, JIT_R0);
+
+#define stx8(T, M, V) \
+ stx(T, 3, (M##B3), V) \
+ stx(T, 4, (M##B4), V) \
+ stx(T, 5, (M##B5), V) \
+ stx(T, 6, (M##B6), V) \
+ stx(T, 7, (M##B7), V) \
+ stx(T, 8, (M##B8), V) \
+ stx(T, 9, (M##B9), V) \
+ stx(T, 10, (M##B10), V) \
+ stx(T, 11, (M##B11), V) \
+ stx(T, 12, (M##B12), V) \
+ stx(T, 13, (M##B13), V) \
+ stx(T, 14, (M##B14), V) \
+ stx(T, 15, (M##B15), V) \
+ stx(T, 16, (M##B16), V) \
+ stx(T, 17, (M##B17), V) \
+ stx(T, 18, (M##B18), V) \
+ stx(T, 19, (M##B19), V) \
+ stx(T, 20, (M##B20), V) \
+ stx(T, 21, (M##B21), V) \
+ stx(T, 22, (M##B22), V) \
+ stx(T, 23, (M##B23), V) \
+ stx(T, 24, (M##B24), V) \
+ stx(T, 25, (M##B25), V) \
+ stx(T, 26, (M##B26), V)
+
+#define stx4(T, M, V) \
+ stx(T, 2, (M##B2), V) \
+ stx8(T, M, V)
+
+#define stx2(T, M, V) \
+ stx(T, 1, (M##B1), V) \
+ stx4(T, M, V)
+
+#define ldx(T, N, M, O, V) \
+{ \
+ dump_args(T, N, M, O, V); \
+ jit_movi(_jit, JIT_R0, 0); \
+ jit_ldxi##T(_jit, JIT_R0, JIT_V0, O); \
+ jit_reloc_t r = jit_beqi(_jit, JIT_R0, V); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+}
+
+#define ldx8(T, M, V) \
+ ldx(T, 3, M, (M##B3), V) \
+ ldx(T, 4, M, (M##B4), V) \
+ ldx(T, 5, M, (M##B5), V) \
+ ldx(T, 6, M, (M##B6), V) \
+ ldx(T, 7, M, (M##B7), V) \
+ ldx(T, 8, M, (M##B8), V) \
+ ldx(T, 9, M, (M##B9), V) \
+ ldx(T, 10, M, (M##B10), V) \
+ ldx(T, 11, M, (M##B11), V) \
+ ldx(T, 12, M, (M##B12), V) \
+ ldx(T, 13, M, (M##B13), V) \
+ ldx(T, 14, M, (M##B14), V) \
+ ldx(T, 15, M, (M##B15), V) \
+ ldx(T, 16, M, (M##B16), V) \
+ ldx(T, 17, M, (M##B17), V) \
+ ldx(T, 18, M, (M##B18), V) \
+ ldx(T, 19, M, (M##B19), V) \
+ ldx(T, 20, M, (M##B20), V) \
+ ldx(T, 21, M, (M##B21), V) \
+ ldx(T, 22, M, (M##B22), V) \
+ ldx(T, 23, M, (M##B23), V) \
+ ldx(T, 24, M, (M##B24), V) \
+ ldx(T, 25, M, (M##B25), V) \
+ ldx(T, 26, M, (M##B26), V)
+#define ldx4(T, M, V) \
+ ldx(T, 2, M, (M##B2), V) \
+ ldx8(T, M, V)
+#define ldx2(T, M, V) \
+ ldx(T, 1, M, (M##B1), V) \
+ ldx4(T, M, V)
+
+#define stf(T, N, O, V) \
+ jit_movi##T(_jit, JIT_F0, V); \
+ jit_stxi##T(_jit, O, JIT_V0, JIT_F0);
+
+#define stf8(T, M, V) \
+ stf(T, 3, (M##B3), V) \
+ stf(T, 4, (M##B4), V) \
+ stf(T, 5, (M##B5), V) \
+ stf(T, 6, (M##B6), V) \
+ stf(T, 7, (M##B7), V) \
+ stf(T, 8, (M##B8), V) \
+ stf(T, 9, (M##B9), V) \
+ stf(T, 10, (M##B10), V) \
+ stf(T, 11, (M##B11), V) \
+ stf(T, 12, (M##B12), V) \
+ stf(T, 13, (M##B13), V) \
+ stf(T, 14, (M##B14), V) \
+ stf(T, 15, (M##B15), V) \
+ stf(T, 16, (M##B16), V) \
+ stf(T, 17, (M##B17), V) \
+ stf(T, 18, (M##B18), V) \
+ stf(T, 19, (M##B19), V) \
+ stf(T, 20, (M##B20), V) \
+ stf(T, 21, (M##B21), V) \
+ stf(T, 22, (M##B22), V) \
+ stf(T, 23, (M##B23), V) \
+ stf(T, 24, (M##B24), V) \
+ stf(T, 25, (M##B25), V) \
+ stf(T, 26, (M##B26), V)
+
+#define stf4(T, M, V) \
+ stf(T, 2, (M##B2), V) \
+ stf8(T, M, V)
+
+#define ldf(T, N, M, O, V) \
+{ \
+ dump_args(T, N, M, O, V); \
+ jit_movi##T(_jit, JIT_F0, 0); \
+ jit_ldxi##T(_jit, JIT_F0, JIT_V0, O); \
+ jit_movi##T(_jit, JIT_F1, V); \
+ jit_reloc_t r = jit_beqr##T(_jit, JIT_F0, JIT_F1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+}
+
+#define ldf8(T, M, V) \
+ ldf(T, 3, M, (M##B3), V) \
+ ldf(T, 4, M, (M##B4), V) \
+ ldf(T, 5, M, (M##B5), V) \
+ ldf(T, 6, M, (M##B6), V) \
+ ldf(T, 7, M, (M##B7), V) \
+ ldf(T, 8, M, (M##B8), V) \
+ ldf(T, 9, M, (M##B9), V) \
+ ldf(T, 10, M, (M##B10), V) \
+ ldf(T, 11, M, (M##B11), V) \
+ ldf(T, 12, M, (M##B12), V) \
+ ldf(T, 13, M, (M##B13), V) \
+ ldf(T, 14, M, (M##B14), V) \
+ ldf(T, 15, M, (M##B15), V) \
+ ldf(T, 16, M, (M##B16), V) \
+ ldf(T, 17, M, (M##B17), V) \
+ ldf(T, 18, M, (M##B18), V) \
+ ldf(T, 19, M, (M##B19), V) \
+ ldf(T, 20, M, (M##B20), V) \
+ ldf(T, 21, M, (M##B21), V) \
+ ldf(T, 22, M, (M##B22), V) \
+ ldf(T, 23, M, (M##B23), V) \
+ ldf(T, 24, M, (M##B24), V) \
+ ldf(T, 25, M, (M##B25), V) \
+ ldf(T, 26, M, (M##B26), V)
+#define ldf4(T, M, V) \
+ ldf(T, 2, M, (M##B2), V) \
+ ldf8(T, M, V)
+
+#define ldst_c() \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \
+ stx2(_c, a, 0x5a) \
+ ldx2(_c, a, 0x5a) \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \
+ stx2(_c, b, 0x5a) \
+ ldx2(_c, b, 0x5a)
+
+#define ldst_uc() \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \
+ stx2(_c, a, 0x5a) \
+ ldx2(_uc, a, 0x5a) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \
+ stx2(_c, b, 0x5a) \
+ ldx2(_uc, b, 0x5a)
+
+#define ldst_s() \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \
+ stx2(_s, a, 0x5a5a) \
+ ldx2(_s, a, 0x5a5a) \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \
+ stx2(_s, b, 0x5a5a) \
+ ldx2(_s, b, 0x5a5a)
+
+#define ldst_us() \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \
+ stx2(_s, a, 0x5a5a) \
+ ldx2(_us, a, 0x5a5a) \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \
+ stx2(_s, b, 0x5a5a) \
+ ldx2(_us, b, 0x5a5a)
+
+#define ldst_i() \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \
+ stx4(_i, a, 0x5a5a5a5a) \
+ ldx4(_i, a, 0x5a5a5a5a) \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \
+ stx4(_i, b, 0x5a5a5a5a) \
+ ldx4(_i, b, 0x5a5a5a5a)
+
+#define ldst_ui() \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \
+ stx4(_i, a, 0x5a5a5a5a) \
+ ldx4(_ui, a, 0x5a5a5a5a) \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \
+ stx4(_i, b, 0x5a5a5a5a) \
+ ldx4(_ui, b, 0x5a5a5a5a)
+
+#define ldst_l() \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \
+ stx8(_l, a, 0x5a5a5a5a5a5a5a5a) \
+ ldx8(_l, a, 0x5a5a5a5a5a5a5a5a) \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \
+ stx8(_l, b, 0x5a5a5a5a5a5a5a5a) \
+ ldx8(_l, b, 0x5a5a5a5a5a5a5a5a)
+
+#define ldst_f() \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \
+ stf4(_f, a, 0.5) \
+ ldf4(_f, a, 0.5) \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \
+ stf4(_f, b, 0.5) \
+ ldf4(_f, b, 0.5)
+
+#define ldst_d() \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \
+ stf8(_d, a, 0.5) \
+ ldf8(_d, a, 0.5) \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \
+ stf8(_d, b, 0.5) \
+ ldf8(_d, b, 0.5)
+
+static size_t
+run_test(jit_state_t *_jit, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(_jit, arena_base, arena_size);
+ size_t frame = jit_enter_jit_abi(_jit, 3, 0, 0);
+
+ void (*function)(void);
+
+ char *buf = malloc(M64 + 8);
+ ASSERT(buf);
+
+ alu(add)
+ alu(sub)
+ alu(mul)
+ alu(div)
+ alu(rem)
+ lsh()
+ rsh()
+ rsh_u()
+ alu(and)
+ alu(or)
+ alu(xor)
+ ldst_c()
+ ldst_uc()
+ ldst_s()
+ ldst_us()
+ ldst_i()
+#if __WORDSIZE == 64
+ ldst_ui()
+ ldst_l()
+#endif
+ ldst_f()
+ ldst_d()
+
+ jit_leave_jit_abi(_jit, 3, 0, frame);
+ jit_ret(_jit);
+
+ size_t size = 0;
+ function = jit_end(_jit, &size);
+
+ if (function)
+ (*function)();
+ else {
+ free(buf);
+ return size;
+ }
+
+ free(buf);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return main_compiler(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/z_ranger.c b/deps/lightening/tests/z_ranger.c
new file mode 100644
index 0000000..aa9eadd
--- /dev/null
+++ b/deps/lightening/tests/z_ranger.c
@@ -0,0 +1,580 @@
+#include "test.h"
+
+#define M64 67108864
+
+#define aB1 (1<<1)
+#define aB2 (1<<2)
+#define aB3 (1<<3)
+#define aB4 (1<<4)
+#define aB5 (1<<5)
+#define aB6 (1<<6)
+#define aB7 (1<<7)
+#define aB8 (1<<8)
+#define aB9 (1<<9)
+#define aB10 (1<<10)
+#define aB11 (1<<11)
+#define aB12 (1<<12)
+#define aB13 (1<<13)
+#define aB14 (1<<14)
+#define aB15 (1<<15)
+#define aB16 (1<<16)
+#define aB17 (1<<17)
+#define aB18 (1<<18)
+#define aB19 (1<<19)
+#define aB20 (1<<20)
+#define aB21 (1<<21)
+#define aB22 (1<<22)
+#define aB23 (1<<23)
+#define aB24 (1<<24)
+#define aB25 (1<<25)
+#define aB26 (1<<26)
+#define bB1 (-aB1)
+#define bB2 (-aB2)
+#define bB3 (-aB3)
+#define bB4 (-aB4)
+#define bB5 (-aB5)
+#define bB6 (-aB6)
+#define bB7 (-aB7)
+#define bB8 (-aB8)
+#define bB9 (-aB9)
+#define bB10 (-aB10)
+#define bB11 (-aB11)
+#define bB12 (-aB12)
+#define bB13 (-aB13)
+#define bB14 (-aB14)
+#define bB15 (-aB15)
+#define bB16 (-aB16)
+#define bB17 (-aB17)
+#define bB18 (-aB18)
+#define bB19 (-aB19)
+#define bB20 (-aB20)
+#define bB21 (-aB21)
+#define bB22 (-aB22)
+#define bB23 (-aB23)
+#define bB24 (-aB24)
+#define bB25 (-aB25)
+#define bB26 (-aB26)
+#define cB1 (aB1-1)
+#define cB2 (aB2-1)
+#define cB3 (aB3-1)
+#define cB4 (aB4-1)
+#define cB5 (aB5-1)
+#define cB6 (aB6-1)
+#define cB7 (aB7-1)
+#define cB8 (aB8-1)
+#define cB9 (aB9-1)
+#define cB10 (aB10-1)
+#define cB11 (aB11-1)
+#define cB12 (aB12-1)
+#define cB13 (aB13-1)
+#define cB14 (aB14-1)
+#define cB15 (aB15-1)
+#define cB16 (aB16-1)
+#define cB17 (aB17-1)
+#define cB18 (aB18-1)
+#define cB19 (aB19-1)
+#define cB20 (aB20-1)
+#define cB21 (aB21-1)
+#define cB22 (aB22-1)
+#define cB23 (aB23-1)
+#define cB24 (aB24-1)
+#define cB25 (aB25-1)
+#define cB26 (aB26-1)
+#define dB1 (-aB1+1)
+#define dB2 (-aB2+1)
+#define dB3 (-aB3+1)
+#define dB4 (-aB4+1)
+#define dB5 (-aB5+1)
+#define dB6 (-aB6+1)
+#define dB7 (-aB7+1)
+#define dB8 (-aB8+1)
+#define dB9 (-aB9+1)
+#define dB10 (-aB10+1)
+#define dB11 (-aB11+1)
+#define dB12 (-aB12+1)
+#define dB13 (-aB13+1)
+#define dB14 (-aB14+1)
+#define dB15 (-aB15+1)
+#define dB16 (-aB16+1)
+#define dB17 (-aB17+1)
+#define dB18 (-aB18+1)
+#define dB19 (-aB19+1)
+#define dB20 (-aB20+1)
+#define dB21 (-aB21+1)
+#define dB22 (-aB22+1)
+#define dB23 (-aB23+1)
+#define dB24 (-aB24+1)
+#define dB25 (-aB25+1)
+#define dB26 (-aB26+1)
+
+#define add(a, b) (a + b)
+#define sub(a, b) (a - b)
+#define mul(a, b) (a * b)
+#define div(a, b) (a / b)
+#define rem(a, b) (a % b)
+#define and(a, b) (a & b)
+#define or(a, b) (a | b)
+#define xor(a, b) (a ^ b)
+
+#if defined(DEBUG)
+#define dump_args(N, X, L, R, V)\
+ jit_calli_1(_jit, puts,\
+ jit_operand_imm(JIT_OPERAND_ABI_POINTER,\
+ (jit_imm_t)#N " " #X " " #L " " #R " " #V))
+#else
+#define dump_args(N, X, L, R, V)
+#endif
+
+#define jit_rshi_ui jit_rshi_u
+
+#define alu2(N, X, L, R, V) \
+{ \
+ dump_args(N, X, L, R, V); \
+ jit_movi(_jit, JIT_R1, L); \
+ jit_movi(_jit, JIT_R2, R); \
+ jit_##N##r(_jit, JIT_R0, JIT_R1, JIT_R2); \
+ jit_reloc_t r = jit_beqi(_jit, JIT_R0, V); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+}
+
+#define alu1(N, M) \
+ alu2(N, N##M##1, 3, (M##1), N(3, M##1)) \
+ alu2(N, N##M##2, 3, (M##2), N(3, M##2)) \
+ alu2(N, N##M##3, 3, (M##3), N(3, M##3)) \
+ alu2(N, N##M##4, 3, (M##4), N(3, M##4)) \
+ alu2(N, N##M##5, 3, (M##5), N(3, M##5)) \
+ alu2(N, N##M##6, 3, (M##6), N(3, M##6)) \
+ alu2(N, N##M##7, 3, (M##7), N(3, M##7)) \
+ alu2(N, N##M##8, 3, (M##8), N(3, M##8)) \
+ alu2(N, N##M##9, 3, (M##9), N(3, M##9)) \
+ alu2(N, N##M##10, 3, (M##10), N(3, M##10)) \
+ alu2(N, N##M##11, 3, (M##11), N(3, M##11)) \
+ alu2(N, N##M##12, 3, (M##12), N(3, M##12)) \
+ alu2(N, N##M##13, 3, (M##13), N(3, M##13)) \
+ alu2(N, N##M##14, 3, (M##14), N(3, M##14)) \
+ alu2(N, N##M##15, 3, (M##15), N(3, M##15)) \
+ alu2(N, N##M##16, 3, (M##16), N(3, M##16)) \
+ alu2(N, N##M##17, 3, (M##17), N(3, M##17)) \
+ alu2(N, N##M##18, 3, (M##18), N(3, M##18)) \
+ alu2(N, N##M##19, 3, (M##19), N(3, M##19)) \
+ alu2(N, N##M##20, 3, (M##20), N(3, M##20)) \
+ alu2(N, N##M##21, 3, (M##21), N(3, M##21)) \
+ alu2(N, N##M##22, 3, (M##22), N(3, M##22)) \
+ alu2(N, N##M##23, 3, (M##23), N(3, M##23)) \
+ alu2(N, N##M##24, 3, (M##24), N(3, M##24)) \
+ alu2(N, N##M##25, 3, (M##25), N(3, M##25)) \
+ alu2(N, N##M##26, 3, (M##26), N(3, M##26))
+
+#define alu(N) \
+ alu1(N, aB) \
+ alu1(N, bB) \
+ alu1(N, cB) \
+ alu1(N, dB)
+
+#define _lsh(N) \
+ alu2(lsh, L##N, 1, N, (1L<<N))
+
+#if __WORDSIZE == 64
+#define _rsh(N) \
+ alu2(rsh, R##N, (1L<<63), N, ((1L<<63)>>N))
+
+#define _rush(N) \
+ alu2(rsh_u, R##N, (1UL<<63), N, ((1UL<<63)>>N))
+#else
+#define _rsh(N) \
+ alu2(rsh, R##N, (1L<<31), N, ((1L<<31)>>N))
+
+#define _rush(N) \
+ alu2(rsh_u, R##N, (1UL<<31), N, ((1UL<<31)>>N))
+#endif
+
+#if __WORDSIZE == 32
+# define xsh64(X) /**/
+#else
+# define xsh64(X) \
+ _##X##sh(32) \
+ _##X##sh(33) \
+ _##X##sh(34) \
+ _##X##sh(35) \
+ _##X##sh(36) \
+ _##X##sh(37) \
+ _##X##sh(38) \
+ _##X##sh(39) \
+ _##X##sh(40) \
+ _##X##sh(41) \
+ _##X##sh(42) \
+ _##X##sh(43) \
+ _##X##sh(44) \
+ _##X##sh(45) \
+ _##X##sh(46) \
+ _##X##sh(47) \
+ _##X##sh(48) \
+ _##X##sh(49) \
+ _##X##sh(50) \
+ _##X##sh(51) \
+ _##X##sh(52) \
+ _##X##sh(53) \
+ _##X##sh(54) \
+ _##X##sh(55) \
+ _##X##sh(56) \
+ _##X##sh(57) \
+ _##X##sh(58) \
+ _##X##sh(59) \
+ _##X##sh(60) \
+ _##X##sh(61) \
+ _##X##sh(62) \
+ _##X##sh(63)
+#endif
+
+#define xsh(X) \
+ _##X##sh(0) \
+ _##X##sh(1) \
+ _##X##sh(2) \
+ _##X##sh(3) \
+ _##X##sh(4) \
+ _##X##sh(5) \
+ _##X##sh(6) \
+ _##X##sh(7) \
+ _##X##sh(8) \
+ _##X##sh(9) \
+ _##X##sh(10) \
+ _##X##sh(11) \
+ _##X##sh(12) \
+ _##X##sh(13) \
+ _##X##sh(14) \
+ _##X##sh(15) \
+ _##X##sh(16) \
+ _##X##sh(17) \
+ _##X##sh(18) \
+ _##X##sh(19) \
+ _##X##sh(20) \
+ _##X##sh(21) \
+ _##X##sh(22) \
+ _##X##sh(23) \
+ _##X##sh(24) \
+ _##X##sh(25) \
+ _##X##sh(26) \
+ _##X##sh(27) \
+ _##X##sh(28) \
+ _##X##sh(29) \
+ _##X##sh(30) \
+ _##X##sh(31) \
+ xsh64(X)
+
+#define lsh() \
+ xsh(l)
+
+#define rsh() \
+ xsh(r)
+
+#define rsh_u() \
+ xsh(ru)
+
+#define reset(V) \
+ jit_calli_3(_jit, memset, \
+ jit_operand_imm(JIT_OPERAND_ABI_POINTER, (jit_imm_t)buf),\
+ jit_operand_imm(JIT_OPERAND_ABI_INT32, V), \
+ jit_operand_imm(JIT_OPERAND_ABI_UINT32, M64 + 8));
+
+#define stx(T, N, O, V) \
+ jit_movi(_jit, JIT_R0, V); \
+ jit_movi(_jit, JIT_R1, O); \
+ jit_stxr##T(_jit, JIT_R1, JIT_V0, JIT_R0);
+
+#define stx8(T, M, V) \
+ stx(T, 3, (M##B3), V) \
+ stx(T, 4, (M##B4), V) \
+ stx(T, 5, (M##B5), V) \
+ stx(T, 6, (M##B6), V) \
+ stx(T, 7, (M##B7), V) \
+ stx(T, 8, (M##B8), V) \
+ stx(T, 9, (M##B9), V) \
+ stx(T, 10, (M##B10), V) \
+ stx(T, 11, (M##B11), V) \
+ stx(T, 12, (M##B12), V) \
+ stx(T, 13, (M##B13), V) \
+ stx(T, 14, (M##B14), V) \
+ stx(T, 15, (M##B15), V) \
+ stx(T, 16, (M##B16), V) \
+ stx(T, 17, (M##B17), V) \
+ stx(T, 18, (M##B18), V) \
+ stx(T, 19, (M##B19), V) \
+ stx(T, 20, (M##B20), V) \
+ stx(T, 21, (M##B21), V) \
+ stx(T, 22, (M##B22), V) \
+ stx(T, 23, (M##B23), V) \
+ stx(T, 24, (M##B24), V) \
+ stx(T, 25, (M##B25), V) \
+ stx(T, 26, (M##B26), V)
+
+#define stx4(T, M, V) \
+ stx(T, 2, (M##B2), V) \
+ stx8(T, M, V)
+
+#define stx2(T, M, V) \
+ stx(T, 1, (M##B1), V) \
+ stx4(T, M, V)
+
+#define ldx(T, N, M, O, V) \
+{ \
+ dump_args(T, N, M, O, V); \
+ jit_movi(_jit, JIT_R0, 0); \
+ jit_ldxi##T(_jit, JIT_R0, JIT_V0, O); \
+ jit_reloc_t r = jit_beqi(_jit, JIT_R0, V); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+}
+
+#define ldx8(T, M, V) \
+ ldx(T, 3, M, (M##B3), V) \
+ ldx(T, 4, M, (M##B4), V) \
+ ldx(T, 5, M, (M##B5), V) \
+ ldx(T, 6, M, (M##B6), V) \
+ ldx(T, 7, M, (M##B7), V) \
+ ldx(T, 8, M, (M##B8), V) \
+ ldx(T, 9, M, (M##B9), V) \
+ ldx(T, 10, M, (M##B10), V) \
+ ldx(T, 11, M, (M##B11), V) \
+ ldx(T, 12, M, (M##B12), V) \
+ ldx(T, 13, M, (M##B13), V) \
+ ldx(T, 14, M, (M##B14), V) \
+ ldx(T, 15, M, (M##B15), V) \
+ ldx(T, 16, M, (M##B16), V) \
+ ldx(T, 17, M, (M##B17), V) \
+ ldx(T, 18, M, (M##B18), V) \
+ ldx(T, 19, M, (M##B19), V) \
+ ldx(T, 20, M, (M##B20), V) \
+ ldx(T, 21, M, (M##B21), V) \
+ ldx(T, 22, M, (M##B22), V) \
+ ldx(T, 23, M, (M##B23), V) \
+ ldx(T, 24, M, (M##B24), V) \
+ ldx(T, 25, M, (M##B25), V) \
+ ldx(T, 26, M, (M##B26), V)
+
+#define ldx4(T, M, V) \
+ ldx(T, 2, M, (M##B2), V) \
+ ldx8(T, M, V)
+
+#define ldx2(T, M, V) \
+ ldx(T, 1, M, (M##B1), V) \
+ ldx4(T, M, V)
+
+#define stf(T, N, O, V) \
+ jit_movi##T(_jit, JIT_F0, V); \
+ jit_movi(_jit, JIT_R0, O); \
+ jit_stxr##T(_jit, JIT_R0, JIT_V0, JIT_F0);
+
+#define stf8(T, M, V) \
+ stf(T, 3, (M##B3), V) \
+ stf(T, 4, (M##B4), V) \
+ stf(T, 5, (M##B5), V) \
+ stf(T, 6, (M##B6), V) \
+ stf(T, 7, (M##B7), V) \
+ stf(T, 8, (M##B8), V) \
+ stf(T, 9, (M##B9), V) \
+ stf(T, 10, (M##B10), V) \
+ stf(T, 11, (M##B11), V) \
+ stf(T, 12, (M##B12), V) \
+ stf(T, 13, (M##B13), V) \
+ stf(T, 14, (M##B14), V) \
+ stf(T, 15, (M##B15), V) \
+ stf(T, 16, (M##B16), V) \
+ stf(T, 17, (M##B17), V) \
+ stf(T, 18, (M##B18), V) \
+ stf(T, 19, (M##B19), V) \
+ stf(T, 20, (M##B20), V) \
+ stf(T, 21, (M##B21), V) \
+ stf(T, 22, (M##B22), V) \
+ stf(T, 23, (M##B23), V) \
+ stf(T, 24, (M##B24), V) \
+ stf(T, 25, (M##B25), V) \
+ stf(T, 26, (M##B26), V)
+
+#define stf4(T, M, V) \
+ stf(T, 2, (M##B2), V) \
+ stf8(T, M, V)
+
+#define ldf(T, N, M, O, V) \
+{ \
+ dump_args(T, N, M, O, V); \
+ jit_movi##T(_jit, JIT_F0, 0); \
+ jit_ldxi##T(_jit, JIT_F0, JIT_V0, O); \
+ jit_movi##T(_jit, JIT_F1, V); \
+ jit_reloc_t r = jit_beqr##T(_jit, JIT_F0, JIT_F1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+}
+
+#define ldf8(T, M, V) \
+ ldf(T, 3, M, (M##B3), V) \
+ ldf(T, 4, M, (M##B4), V) \
+ ldf(T, 5, M, (M##B5), V) \
+ ldf(T, 6, M, (M##B6), V) \
+ ldf(T, 7, M, (M##B7), V) \
+ ldf(T, 8, M, (M##B8), V) \
+ ldf(T, 9, M, (M##B9), V) \
+ ldf(T, 10, M, (M##B10), V) \
+ ldf(T, 11, M, (M##B11), V) \
+ ldf(T, 12, M, (M##B12), V) \
+ ldf(T, 13, M, (M##B13), V) \
+ ldf(T, 14, M, (M##B14), V) \
+ ldf(T, 15, M, (M##B15), V) \
+ ldf(T, 16, M, (M##B16), V) \
+ ldf(T, 17, M, (M##B17), V) \
+ ldf(T, 18, M, (M##B18), V) \
+ ldf(T, 19, M, (M##B19), V) \
+ ldf(T, 20, M, (M##B20), V) \
+ ldf(T, 21, M, (M##B21), V) \
+ ldf(T, 22, M, (M##B22), V) \
+ ldf(T, 23, M, (M##B23), V) \
+ ldf(T, 24, M, (M##B24), V) \
+ ldf(T, 25, M, (M##B25), V) \
+ ldf(T, 26, M, (M##B26), V)
+#define ldf4(T, M, V) \
+ ldf(T, 2, M, (M##B2), V) \
+ ldf8(T, M, V)
+
+#define ldst_c() \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \
+ stx2(_c, a, 0x5a) \
+ ldx2(_c, a, 0x5a) \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \
+ stx2(_c, b, 0x5a) \
+ ldx2(_c, b, 0x5a)
+
+#define ldst_uc() \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \
+ stx2(_c, a, 0x5a) \
+ ldx2(_uc, a, 0x5a) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \
+ stx2(_c, b, 0x5a) \
+ ldx2(_uc, b, 0x5a)
+
+#define ldst_s() \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \
+ stx2(_s, a, 0x5a5a) \
+ ldx2(_s, a, 0x5a5a) \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \
+ stx2(_s, b, 0x5a5a) \
+ ldx2(_s, b, 0x5a5a)
+
+#define ldst_us() \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \
+ stx2(_s, a, 0x5a5a) \
+ ldx2(_us, a, 0x5a5a) \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \
+ stx2(_s, b, 0x5a5a) \
+ ldx2(_us, b, 0x5a5a)
+
+#define ldst_i() \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \
+ stx4(_i, a, 0x5a5a5a5a) \
+ ldx4(_i, a, 0x5a5a5a5a) \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \
+ stx4(_i, b, 0x5a5a5a5a) \
+ ldx4(_i, b, 0x5a5a5a5a)
+
+#define ldst_ui() \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \
+ stx4(_i, a, 0x5a5a5a5a) \
+ ldx4(_ui, a, 0x5a5a5a5a) \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \
+ stx4(_i, b, 0x5a5a5a5a) \
+ ldx4(_ui, b, 0x5a5a5a5a)
+
+#define ldst_l() \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \
+ stx8(_l, a, 0x5a5a5a5a5a5a5a5a) \
+ ldx8(_l, a, 0x5a5a5a5a5a5a5a5a) \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \
+ stx8(_l, b, 0x5a5a5a5a5a5a5a5a) \
+ ldx8(_l, b, 0x5a5a5a5a5a5a5a5a)
+
+#define ldst_f() \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \
+ stf4(_f, a, 0.5) \
+ ldf4(_f, a, 0.5) \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \
+ stf4(_f, b, 0.5) \
+ ldf4(_f, b, 0.5)
+
+#define ldst_d() \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \
+ stf8(_d, a, 0.5) \
+ ldf8(_d, a, 0.5) \
+ reset(0xa5) \
+ jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \
+ stf8(_d, b, 0.5) \
+ ldf8(_d, b, 0.5)
+
+static size_t
+run_test(jit_state_t *_jit, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(_jit, arena_base, arena_size);
+ size_t frame = jit_enter_jit_abi(_jit, 3, 0, 0);
+
+ void (*function)(void);
+
+ char *buf = malloc(M64 + 8);
+ ASSERT(buf);
+
+ alu(add)
+ alu(sub)
+ alu(mul)
+ alu(div)
+ alu(rem)
+ lsh()
+ rsh()
+ alu(and)
+ alu(or)
+ alu(xor)
+ ldst_c()
+ ldst_uc()
+ ldst_s()
+ ldst_us()
+ ldst_i()
+#if __WORDSIZE == 64
+ ldst_ui()
+ ldst_l()
+#endif
+ ldst_f()
+ ldst_d()
+
+ jit_leave_jit_abi(_jit, 3, 0, frame);
+ jit_ret(_jit);
+
+ size_t size = 0;
+ function = jit_end(_jit, &size);
+
+ if (function)
+ (*function)();
+ else {
+ free(buf);
+ return size;
+ }
+
+ free(buf);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return main_compiler(argc, argv, run_test);
+}
diff --git a/deps/lightening/tests/z_stack.c b/deps/lightening/tests/z_stack.c
new file mode 100644
index 0000000..0ee9590
--- /dev/null
+++ b/deps/lightening/tests/z_stack.c
@@ -0,0 +1,374 @@
+#include "test.h"
+
+#if defined(DEBUG)
+#define dump_args(N, M, T) \
+ jit_calli_1(_jit, puts, \
+ jit_operand_imm(JIT_OPERAND_ABI_POINTER, \
+ (jit_imm_t)#N " " #M " " #T));
+#else
+#define dump_args(N, M, T)
+#endif
+
+#define szof_c 1
+#define szof_uc szof_c
+#define szof_s 2
+#define szof_us szof_s
+#define szof_i 4
+#if __WORDSIZE == 64
+# define szof_ui szof_i
+# define szof_l 8
+#endif
+#define szof_max 8
+
+#define operand_c JIT_OPERAND_ABI_INT8
+#define operand_uc JIT_OPERAND_ABI_UINT8
+#define operand_s JIT_OPERAND_ABI_INT16
+#define operand_us JIT_OPERAND_ABI_UINT16
+#define operand_i JIT_OPERAND_ABI_INT32
+#define operand_ui JIT_OPERAND_ABI_UINT32
+#define operand_l JIT_OPERAND_ABI_INT64
+#define operand_ul JIT_OPERAND_ABI_UINT64
+#define operand_f JIT_OPERAND_ABI_FLOAT
+#define operand_d JIT_OPERAND_ABI_DOUBLE
+
+#define FILL(T) \
+ void *fill##T = jit_address(_jit); \
+{ \
+ size_t frame = jit_enter_jit_abi(_jit, 2, 0, 0); \
+ jit_load_args_2(_jit, \
+ jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V0), \
+ jit_operand_gpr(JIT_OPERAND_ABI_UINT32, JIT_R0)); \
+ \
+ jit_muli(_jit, JIT_R0, JIT_R0, szof##T); \
+ jit_addr(_jit, JIT_V1, JIT_V0, JIT_R0); \
+ jit_movi(_jit, JIT_R0, 0); \
+ \
+ void *loop = jit_address(_jit); \
+ jit_reloc_t done = jit_bger(_jit, JIT_V0, JIT_V1); \
+ jit_str##T(_jit, JIT_V0, JIT_R0); \
+ jit_addi(_jit, JIT_R0, JIT_R0, 1); \
+ jit_addi(_jit, JIT_V0, JIT_V0, szof##T); \
+ jit_jmpi(_jit, loop); \
+ \
+ jit_patch_here(_jit, done); \
+ jit_leave_jit_abi(_jit, 2, 0, frame); \
+ jit_ret(_jit); \
+}
+
+#define fill_uc fill_c
+#define fill_us fill_s
+#define fill_ui fill_i
+
+#define ARG( T, N) jit_operand_mem(operand##T, JIT_SP, - ((N + 1) * szof##T))
+
+#define ARG1( K, T) ARG##K(T, 0)
+#define ARG2( K, T) ARG1( K, T), ARG##K(T, 1)
+#define ARG3( K, T) ARG2( K, T), ARG##K(T, 2)
+#define ARG4( K, T) ARG3( K, T), ARG##K(T, 3)
+#define ARG5( K, T) ARG4( K, T), ARG##K(T, 4)
+#define ARG6( K, T) ARG5( K, T), ARG##K(T, 5)
+#define ARG7( K, T) ARG6( K, T), ARG##K(T, 6)
+#define ARG8( K, T) ARG7( K, T), ARG##K(T, 7)
+#define ARG9( K, T) ARG8( K, T), ARG##K(T, 8)
+#define ARG10(K, T) ARG9( K, T), ARG##K(T, 9)
+#define ARG11(K, T) ARG10(K, T), ARG##K(T, 10)
+#define ARG12(K, T) ARG11(K, T), ARG##K(T, 11)
+#define ARG13(K, T) ARG12(K, T), ARG##K(T, 12)
+#define ARG14(K, T) ARG13(K, T), ARG##K(T, 13)
+#define ARG15(K, T) ARG14(K, T), ARG##K(T, 14)
+#define ARG16(K, T) ARG15(K, T), ARG##K(T, 15)
+#define ARG_c(N) ARG##N( , _c)
+#define ARG_uc(N) ARG##N( , _uc)
+#define ARG_s(N) ARG##N( , _s)
+#define ARG_us(N) ARG##N( , _us)
+#define ARG_i(N) ARG##N( , _i)
+#define ARG_ui(N) ARG##N( , _ui)
+#define ARG_l(N) ARG##N( , _l)
+#define ARG_f(N) ARG##N(F, _f)
+#define ARG_d(N) ARG##N(F, _d)
+
+#define CHK(N, T, V) \
+{ \
+ jit_ldxi##T(_jit, JIT_R0, JIT_SP, arg_space - ((V + 1) * szof##T)); \
+ jit_ldxi##T(_jit, JIT_R1, JIT_V0, (V * szof##T)); \
+ jit_reloc_t r = jit_beqr(_jit, JIT_R0, JIT_R1); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+}
+
+#define GET1( K, N, T, V) CHK##K(N, T, 0)
+#define GET2( K, N, T, V) GET1( K, N, T, V) CHK##K(N, T, 1)
+#define GET3( K, N, T, V) GET2( K, N, T, V) CHK##K(N, T, 2)
+#define GET4( K, N, T, V) GET3( K, N, T, V) CHK##K(N, T, 3)
+#define GET5( K, N, T, V) GET4( K, N, T, V) CHK##K(N, T, 4)
+#define GET6( K, N, T, V) GET5( K, N, T, V) CHK##K(N, T, 5)
+#define GET7( K, N, T, V) GET6( K, N, T, V) CHK##K(N, T, 6)
+#define GET8( K, N, T, V) GET7( K, N, T, V) CHK##K(N, T, 7)
+#define GET9( K, N, T, V) GET8( K, N, T, V) CHK##K(N, T, 8)
+#define GET10(K, N, T, V) GET9( K, N, T, V) CHK##K(N, T, 9)
+#define GET11(K, N, T, V) GET10(K, N, T, V) CHK##K(N, T, 10)
+#define GET12(K, N, T, V) GET11(K, N, T, V) CHK##K(N, T, 11)
+#define GET13(K, N, T, V) GET12(K, N, T, V) CHK##K(N, T, 12)
+#define GET14(K, N, T, V) GET13(K, N, T, V) CHK##K(N, T, 13)
+#define GET15(K, N, T, V) GET14(K, N, T, V) CHK##K(N, T, 14)
+#define GET16(K, N, T, V) GET15(K, N, T, V) CHK##K(N, T, 15)
+
+#define GET_c(N, M) GET##N( , c##N, _c, M)
+#define GET_uc(N, M) GET##N( , uc##N, _uc, M)
+#define GET_s(N, M) GET##N( , s##N, _s, M)
+#define GET_us(N, M) GET##N( , us##N, _us, M)
+#define GET_i(N, M) GET##N( , i##N, _i, M)
+#define GET_ui(N, M) GET##N( , ui##N, _ui, M)
+#define GET_l(N, M) GET##N( , l##N, _l, M)
+#define GET_f(N, M) GET##N(F, f##N, _f, M)
+#define GET_d(N, M) GET##N(F, d##N, _d, M)
+
+#define PUSH( T, V) jit_operand_imm(operand##T, V)
+#define PUSH0( K, T) /**/
+#define PUSH1( K, T) PUSH##K(T, 0)
+#define PUSH2( K, T) PUSH1( K, T), PUSH##K(T, 1)
+#define PUSH3( K, T) PUSH2( K, T), PUSH##K(T, 2)
+#define PUSH4( K, T) PUSH3( K, T), PUSH##K(T, 3)
+#define PUSH5( K, T) PUSH4( K, T), PUSH##K(T, 4)
+#define PUSH6( K, T) PUSH5( K, T), PUSH##K(T, 5)
+#define PUSH7( K, T) PUSH6( K, T), PUSH##K(T, 6)
+#define PUSH8( K, T) PUSH7( K, T), PUSH##K(T, 7)
+#define PUSH9( K, T) PUSH8( K, T), PUSH##K(T, 8)
+#define PUSH10(K, T) PUSH9( K, T), PUSH##K(T, 9)
+#define PUSH11(K, T) PUSH10(K, T), PUSH##K(T, 10)
+#define PUSH12(K, T) PUSH11(K, T), PUSH##K(T, 11)
+#define PUSH13(K, T) PUSH12(K, T), PUSH##K(T, 12)
+#define PUSH14(K, T) PUSH13(K, T), PUSH##K(T, 13)
+#define PUSH15(K, T) PUSH14(K, T), PUSH##K(T, 14)
+#define PUSH16(K, T) PUSH15(K, T), PUSH##K(T, 15)
+
+#define PUSH_c( N) PUSH##N( , _c)
+#define PUSH_uc(N) PUSH##N( , _uc)
+#define PUSH_s( N) PUSH##N( , _s)
+#define PUSH_us(N) PUSH##N( , _us)
+#define PUSH_i( N) PUSH##N( , _i)
+#define PUSH_ui(N) PUSH##N( , _ui)
+#define PUSH_l( N) PUSH##N( , _l)
+#define PUSH_f( N) PUSH##N(F, _f)
+#define PUSH_d( N) PUSH##N(F, _d)
+
+/* bottom function */
+#define DEF0(T) \
+ void *test##T##_0 = jit_address(_jit); \
+{ \
+ size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); \
+ dump_args(0, 0, T); \
+ jit_leave_jit_abi(_jit, 0, 0, frame); \
+ jit_ret(_jit); \
+}
+
+/*
+ * stack frame:
+ * | lightening reserved stuff - frame |
+ * |-----------------------------------| <- JIT_SP at entry
+ * | argument save area - arg_space |
+ * |-----------------------------------| <- JIT_SP during argument validation
+ * | stack buffer - stack |
+ * |-----------------------------------| <- JIT_SP during next call
+ *
+ * at entry, first push arguments in ascending order (0, 1, 2, ...)
+ * to stack, and afterwards move JIT_SP forward to not mess with the internal
+ * load_args stuff.
+ */
+#define DEFN(N, M, T) \
+ void *test##T##_##N = jit_address(_jit); \
+{ \
+ size_t frame = jit_enter_jit_abi(_jit, 3, 0, 0); \
+ jit_operand_t args[] = \
+ {jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V0), \
+ ARG##T(N)}; \
+ jit_load_args(_jit, N + 1, args); \
+ \
+ size_t arg_space = jit_align_stack(_jit, N * szof##T); \
+ \
+ dump_args(N, M, T); \
+ \
+ /* validate arguments */ \
+ GET##T(N, M) \
+ \
+ /* heap buffer in %v1 */ \
+ jit_calli_1(_jit, malloc, \
+ jit_operand_imm(JIT_OPERAND_ABI_UINT32, N * szof##T)); \
+ jit_retval(_jit, JIT_V1); \
+ \
+ /* copy stack buffer to heap buffer */ \
+ jit_calli_3(_jit, memcpy, \
+ jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V1), \
+ jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V0), \
+ jit_operand_imm(JIT_OPERAND_ABI_UINT32, N * szof##T)); \
+ \
+ /* stack buffer for next function in %v2 */ \
+ size_t stack = jit_align_stack(_jit, M * szof##T); \
+ jit_movr(_jit, JIT_V2, JIT_SP); \
+ \
+ /* fill stack buffer for next function */ \
+ jit_calli_2(_jit, fill##T, \
+ jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V2), \
+ jit_operand_imm(JIT_OPERAND_ABI_UINT32, M)); \
+ \
+ /* call next function */ \
+ jit_operand_t call_args[] = \
+ {jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V2), \
+ PUSH##T(M)}; \
+ jit_calli(_jit, test##T##_##M, M + 1, call_args); \
+ \
+ /* validate stack buffer */ \
+ jit_calli_3(_jit, memcmp, \
+ jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V1), \
+ jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V0), \
+ jit_operand_imm(JIT_OPERAND_ABI_UINT32, N * szof##T)); \
+ jit_retval(_jit, JIT_R0); \
+ jit_reloc_t r = jit_beqi(_jit, JIT_R0, 0); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+ \
+ /* release heap bufer */ \
+ jit_calli_1(_jit, free, \
+ jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V1)); \
+ jit_shrink_stack(_jit, arg_space); \
+ jit_shrink_stack(_jit, stack); \
+ jit_leave_jit_abi(_jit, 3, 0, frame); \
+ jit_ret(_jit); \
+}
+
+/* top function */
+#define DEFX(T) \
+ void *test##T##_17 = jit_address(_jit); \
+{ \
+ size_t frame = jit_enter_jit_abi(_jit, 3, 0, 0); \
+ size_t arg_space = jit_align_stack(_jit, 16 * szof##T); \
+ \
+ dump_args(17, top, T) \
+ \
+ /* heap buffer in %v1 */ \
+ jit_calli_1(_jit, malloc, \
+ jit_operand_imm(JIT_OPERAND_ABI_UINT32, 16 * szof##T)); \
+ jit_retval(_jit, JIT_V1); \
+ \
+ /* stack buffer for next function in %v2 */ \
+ size_t stack = jit_align_stack(_jit, 16 * szof##T); \
+ jit_movr(_jit, JIT_V2, JIT_SP); \
+ \
+ /* fill stack buffer for next function */ \
+ jit_calli_2(_jit, fill##T, \
+ jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V2), \
+ jit_operand_imm(JIT_OPERAND_ABI_UINT32, 16)); \
+ \
+ /* copy stack buffer to heap buffer */ \
+ jit_calli_3(_jit, memcpy, \
+ jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V1), \
+ jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V2), \
+ jit_operand_imm(JIT_OPERAND_ABI_UINT32, 16 * szof##T)); \
+ \
+ /* call next function */ \
+ jit_operand_t args[] = \
+ {jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V2), \
+ PUSH##T(16)}; \
+ jit_calli(_jit, test##T##_16, 17, args); \
+ \
+ /* validate stack buffer */ \
+ jit_calli_3(_jit, memcmp, \
+ jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V1), \
+ jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V2), \
+ jit_operand_imm(JIT_OPERAND_ABI_UINT32, 16 * szof##T)); \
+ jit_retval(_jit, JIT_R0); \
+ jit_reloc_t r = jit_beqi(_jit, JIT_R0, 0); \
+ jit_calli_0(_jit, abort); \
+ jit_patch_here(_jit, r); \
+ \
+ /* release heap bufer */ \
+ jit_calli_1(_jit, free, \
+ jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V1)); \
+ /* technically speaking not necessary */ \
+ /* jit_leave_jit_abi will shrink stack for us */ \
+ jit_shrink_stack(_jit, arg_space); \
+ jit_shrink_stack(_jit, stack); \
+ jit_leave_jit_abi(_jit, 3, 0, frame); \
+ jit_ret(_jit); \
+}
+
+#define DEF( T) \
+ DEF0( T) \
+ DEFN( 1, 0, T) \
+ DEFN( 2, 1, T) \
+ DEFN( 3, 2, T) \
+ DEFN( 4, 3, T) \
+ DEFN( 5, 4, T) \
+ DEFN( 6, 5, T) \
+ DEFN( 7, 6, T) \
+ DEFN( 8, 7, T) \
+ DEFN( 9, 8, T) \
+ DEFN(10, 9, T) \
+ DEFN(11, 10, T) \
+ DEFN(12, 11, T) \
+ DEFN(13, 12, T) \
+ DEFN(14, 13, T) \
+ DEFN(15, 14, T) \
+ DEFN(16, 15, T) \
+ DEFX(T)
+
+#define CALL(T) jit_calli_0(_jit, test##T##_17);
+
+static size_t
+run_test(jit_state_t *_jit, uint8_t *arena_base, size_t arena_size)
+{
+ jit_begin(_jit, arena_base, arena_size);
+ int32_t (*function)();
+
+ jit_reloc_t main = jit_jmp(_jit);
+
+ FILL(_c)
+ FILL(_s)
+ FILL(_i)
+#if __WORDSIZE == 64
+ FILL(_l)
+#endif
+
+ DEF(_c)
+ DEF(_uc)
+ DEF(_s)
+ DEF(_us)
+ DEF(_i)
+#if __WORDSIZE == 64
+ DEF(_ui)
+ DEF(_l)
+#endif
+
+ jit_patch_here(_jit, main);
+ /* not sure about the actual number of registers, but too many can't
+ * hurt. */
+ size_t frame = jit_enter_jit_abi(_jit, 3, 0, 0);
+
+ CALL(_c)
+ CALL(_uc)
+ CALL(_s)
+ CALL(_us)
+ CALL(_i)
+#if __WORDSIZE == 64
+ CALL(_ui)
+ CALL(_l)
+#endif
+
+ jit_leave_jit_abi(_jit, 3, 0, frame);
+ jit_ret(_jit);
+
+ size_t size;
+ function = jit_end(_jit, &size);
+
+ if (function)
+ (*function)();
+ else
+ return size;
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return main_compiler(argc, argv, run_test);
+}