diff options
Diffstat (limited to 'deps/lightening')
256 files changed, 50415 insertions, 0 deletions
diff --git a/deps/lightening/.gitignore b/deps/lightening/.gitignore new file mode 100644 index 0000000..d2a82cf --- /dev/null +++ b/deps/lightening/.gitignore @@ -0,0 +1,4 @@ +*.o ++* +/lightning.info +/tests/test-* diff --git a/deps/lightening/.gitlab-ci.yml b/deps/lightening/.gitlab-ci.yml new file mode 100644 index 0000000..955a8c8 --- /dev/null +++ b/deps/lightening/.gitlab-ci.yml @@ -0,0 +1,71 @@ +image: debian:stable + +before_script: + - apt-get update -qq + - apt-get install -y + make qemu binfmt-support qemu-user-static qemu-user + - update-binfmts --enable + +x86-64: + stage: test + script: + - dpkg --add-architecture amd64 + - apt-get update -qq + - apt-get install -y libc6-dev:amd64 gcc + - make -C tests test-native + +i686: + stage: test + script: + - dpkg --add-architecture i386 + - apt-get update -qq + - apt-get install -y gcc-i686-linux-gnu libc6-dev-i386-cross libc6:i386 + - make -C tests test-ia32 CC_IA32=i686-linux-gnu-gcc + +aarch64: + stage: test + script: + - dpkg --add-architecture arm64 + - apt-get update -qq + - apt-get install -y gcc-aarch64-linux-gnu libc6-dev-arm64-cross libc6:arm64 + - make -C tests test-aarch64 CC_AARCH64=aarch64-linux-gnu-gcc + +armhf: + stage: test + script: + - dpkg --add-architecture armhf + - apt-get update -qq + - apt-get install -y gcc-arm-linux-gnueabihf libc6-dev-armhf-cross libc6:armhf + - make -C tests test-armv7 CC_ARMv7="arm-linux-gnueabihf-gcc -marm" + +armhf-thumb: + stage: test + script: + - dpkg --add-architecture armhf + - apt-get update -qq + - apt-get install -y gcc-arm-linux-gnueabihf libc6-dev-armhf-cross libc6:armhf + - make -C tests test-armv7 CC_ARMv7="arm-linux-gnueabihf-gcc -mthumb" + +mipsel: + stage: test + script: + - dpkg --add-architecture mipsel + - apt-get update -qq + - apt-get install -y gcc-mipsel-linux-gnu libc6-dev-mipsel-cross libc6:mipsel + - make -C tests test-mipsel CC_MIPSEL="mipsel-linux-gnu-gcc" + +mips64el: + stage: test + script: + - dpkg --add-architecture mips64el + - apt-get update -qq + - apt-get install -y gcc-mips64el-linux-gnuabi64 libc6-dev-mips64el-cross libc6:mips64el + - make -C tests test-mips64el CC_MIPS64EL="mips64el-linux-gnuabi64-gcc" + +ppc64el: + stage: test + script: + - dpkg --add-architecture ppc64el + - apt-get update -qq + - apt-get install -y gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross libc6:ppc64el + - make -C tests test-ppc64le CC_PPC64LE="powerpc64le-linux-gnu-gcc" diff --git a/deps/lightening/AUTHORS b/deps/lightening/AUTHORS new file mode 100644 index 0000000..2097c63 --- /dev/null +++ b/deps/lightening/AUTHORS @@ -0,0 +1,14 @@ +Paulo Cesar Pereira de Andrade <pcpa@gnu.org> + +Paolo Bonzini <bonzini@gnu.org> + +PPC assembler by Ian Piumarta <piumarta@inria.fr> + +i386 assembler by Ian Piumarta <piumarta@inria.fr> +and Gwenole Beauchesne <gb.public@free.fr> + +x86-64 backend by Matthew Flatt <mflatt@cs.utah.edu> + +Major PPC contributions by Laurent Michel <ldm@thorgal.homelinux.org> + +Major SPARC contributions by Ludovic Courtes <ludo@chbouib.org> diff --git a/deps/lightening/COPYING b/deps/lightening/COPYING new file mode 100644 index 0000000..4432540 --- /dev/null +++ b/deps/lightening/COPYING @@ -0,0 +1,676 @@ + + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + <program> Copyright (C) <year> <name of author> + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +<http://www.gnu.org/licenses/>. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +<http://www.gnu.org/philosophy/why-not-lgpl.html>. + diff --git a/deps/lightening/COPYING.DOC b/deps/lightening/COPYING.DOC new file mode 100644 index 0000000..1a86456 --- /dev/null +++ b/deps/lightening/COPYING.DOC @@ -0,0 +1,355 @@ + GNU Free Documentation License + Version 1.1, March 2000 + + Copyright (C) 2000 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + +0. PREAMBLE + +The purpose of this License is to make a manual, textbook, or other +written document "free" in the sense of freedom: to assure everyone +the effective freedom to copy and redistribute it, with or without +modifying it, either commercially or noncommercially. Secondarily, +this License preserves for the author and publisher a way to get +credit for their work, while not being considered responsible for +modifications made by others. + +This License is a kind of "copyleft", which means that derivative +works of the document must themselves be free in the same sense. It +complements the GNU General Public License, which is a copyleft +license designed for free software. + +We have designed this License in order to use it for manuals for free +software, because free software needs free documentation: a free +program should come with manuals providing the same freedoms that the +software does. But this License is not limited to software manuals; +it can be used for any textual work, regardless of subject matter or +whether it is published as a printed book. We recommend this License +principally for works whose purpose is instruction or reference. + + +1. APPLICABILITY AND DEFINITIONS + +This License applies to any manual or other work that contains a +notice placed by the copyright holder saying it can be distributed +under the terms of this License. The "Document", below, refers to any +such manual or work. Any member of the public is a licensee, and is +addressed as "you". + +A "Modified Version" of the Document means any work containing the +Document or a portion of it, either copied verbatim, or with +modifications and/or translated into another language. + +A "Secondary Section" is a named appendix or a front-matter section of +the Document that deals exclusively with the relationship of the +publishers or authors of the Document to the Document's overall subject +(or to related matters) and contains nothing that could fall directly +within that overall subject. (For example, if the Document is in part a +textbook of mathematics, a Secondary Section may not explain any +mathematics.) The relationship could be a matter of historical +connection with the subject or with related matters, or of legal, +commercial, philosophical, ethical or political position regarding +them. + +The "Invariant Sections" are certain Secondary Sections whose titles +are designated, as being those of Invariant Sections, in the notice +that says that the Document is released under this License. + +The "Cover Texts" are certain short passages of text that are listed, +as Front-Cover Texts or Back-Cover Texts, in the notice that says that +the Document is released under this License. + +A "Transparent" copy of the Document means a machine-readable copy, +represented in a format whose specification is available to the +general public, whose contents can be viewed and edited directly and +straightforwardly with generic text editors or (for images composed of +pixels) generic paint programs or (for drawings) some widely available +drawing editor, and that is suitable for input to text formatters or +for automatic translation to a variety of formats suitable for input +to text formatters. A copy made in an otherwise Transparent file +format whose markup has been designed to thwart or discourage +subsequent modification by readers is not Transparent. A copy that is +not "Transparent" is called "Opaque". + +Examples of suitable formats for Transparent copies include plain +ASCII without markup, Texinfo input format, LaTeX input format, SGML +or XML using a publicly available DTD, and standard-conforming simple +HTML designed for human modification. Opaque formats include +PostScript, PDF, proprietary formats that can be read and edited only +by proprietary word processors, SGML or XML for which the DTD and/or +processing tools are not generally available, and the +machine-generated HTML produced by some word processors for output +purposes only. + +The "Title Page" means, for a printed book, the title page itself, +plus such following pages as are needed to hold, legibly, the material +this License requires to appear in the title page. For works in +formats which do not have any title page as such, "Title Page" means +the text near the most prominent appearance of the work's title, +preceding the beginning of the body of the text. + + +2. VERBATIM COPYING + +You may copy and distribute the Document in any medium, either +commercially or noncommercially, provided that this License, the +copyright notices, and the license notice saying this License applies +to the Document are reproduced in all copies, and that you add no other +conditions whatsoever to those of this License. You may not use +technical measures to obstruct or control the reading or further +copying of the copies you make or distribute. However, you may accept +compensation in exchange for copies. If you distribute a large enough +number of copies you must also follow the conditions in section 3. + +You may also lend copies, under the same conditions stated above, and +you may publicly display copies. + + +3. COPYING IN QUANTITY + +If you publish printed copies of the Document numbering more than 100, +and the Document's license notice requires Cover Texts, you must enclose +the copies in covers that carry, clearly and legibly, all these Cover +Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on +the back cover. Both covers must also clearly and legibly identify +you as the publisher of these copies. The front cover must present +the full title with all words of the title equally prominent and +visible. You may add other material on the covers in addition. +Copying with changes limited to the covers, as long as they preserve +the title of the Document and satisfy these conditions, can be treated +as verbatim copying in other respects. + +If the required texts for either cover are too voluminous to fit +legibly, you should put the first ones listed (as many as fit +reasonably) on the actual cover, and continue the rest onto adjacent +pages. + +If you publish or distribute Opaque copies of the Document numbering +more than 100, you must either include a machine-readable Transparent +copy along with each Opaque copy, or state in or with each Opaque copy +a publicly-accessible computer-network location containing a complete +Transparent copy of the Document, free of added material, which the +general network-using public has access to download anonymously at no +charge using public-standard network protocols. If you use the latter +option, you must take reasonably prudent steps, when you begin +distribution of Opaque copies in quantity, to ensure that this +Transparent copy will remain thus accessible at the stated location +until at least one year after the last time you distribute an Opaque +copy (directly or through your agents or retailers) of that edition to +the public. + +It is requested, but not required, that you contact the authors of the +Document well before redistributing any large number of copies, to give +them a chance to provide you with an updated version of the Document. + + +4. MODIFICATIONS + +You may copy and distribute a Modified Version of the Document under +the conditions of sections 2 and 3 above, provided that you release +the Modified Version under precisely this License, with the Modified +Version filling the role of the Document, thus licensing distribution +and modification of the Modified Version to whoever possesses a copy +of it. In addition, you must do these things in the Modified Version: + +A. Use in the Title Page (and on the covers, if any) a title distinct + from that of the Document, and from those of previous versions + (which should, if there were any, be listed in the History section + of the Document). You may use the same title as a previous version + if the original publisher of that version gives permission. +B. List on the Title Page, as authors, one or more persons or entities + responsible for authorship of the modifications in the Modified + Version, together with at least five of the principal authors of the + Document (all of its principal authors, if it has less than five). +C. State on the Title page the name of the publisher of the + Modified Version, as the publisher. +D. Preserve all the copyright notices of the Document. +E. Add an appropriate copyright notice for your modifications + adjacent to the other copyright notices. +F. Include, immediately after the copyright notices, a license notice + giving the public permission to use the Modified Version under the + terms of this License, in the form shown in the Addendum below. +G. Preserve in that license notice the full lists of Invariant Sections + and required Cover Texts given in the Document's license notice. +H. Include an unaltered copy of this License. +I. Preserve the section entitled "History", and its title, and add to + it an item stating at least the title, year, new authors, and + publisher of the Modified Version as given on the Title Page. If + there is no section entitled "History" in the Document, create one + stating the title, year, authors, and publisher of the Document as + given on its Title Page, then add an item describing the Modified + Version as stated in the previous sentence. +J. Preserve the network location, if any, given in the Document for + public access to a Transparent copy of the Document, and likewise + the network locations given in the Document for previous versions + it was based on. These may be placed in the "History" section. + You may omit a network location for a work that was published at + least four years before the Document itself, or if the original + publisher of the version it refers to gives permission. +K. In any section entitled "Acknowledgements" or "Dedications", + preserve the section's title, and preserve in the section all the + substance and tone of each of the contributor acknowledgements + and/or dedications given therein. +L. Preserve all the Invariant Sections of the Document, + unaltered in their text and in their titles. Section numbers + or the equivalent are not considered part of the section titles. +M. Delete any section entitled "Endorsements". Such a section + may not be included in the Modified Version. +N. Do not retitle any existing section as "Endorsements" + or to conflict in title with any Invariant Section. + +If the Modified Version includes new front-matter sections or +appendices that qualify as Secondary Sections and contain no material +copied from the Document, you may at your option designate some or all +of these sections as invariant. To do this, add their titles to the +list of Invariant Sections in the Modified Version's license notice. +These titles must be distinct from any other section titles. + +You may add a section entitled "Endorsements", provided it contains +nothing but endorsements of your Modified Version by various +parties--for example, statements of peer review or that the text has +been approved by an organization as the authoritative definition of a +standard. + +You may add a passage of up to five words as a Front-Cover Text, and a +passage of up to 25 words as a Back-Cover Text, to the end of the list +of Cover Texts in the Modified Version. Only one passage of +Front-Cover Text and one of Back-Cover Text may be added by (or +through arrangements made by) any one entity. If the Document already +includes a cover text for the same cover, previously added by you or +by arrangement made by the same entity you are acting on behalf of, +you may not add another; but you may replace the old one, on explicit +permission from the previous publisher that added the old one. + +The author(s) and publisher(s) of the Document do not by this License +give permission to use their names for publicity for or to assert or +imply endorsement of any Modified Version. + + +5. COMBINING DOCUMENTS + +You may combine the Document with other documents released under this +License, under the terms defined in section 4 above for modified +versions, provided that you include in the combination all of the +Invariant Sections of all of the original documents, unmodified, and +list them all as Invariant Sections of your combined work in its +license notice. + +The combined work need only contain one copy of this License, and +multiple identical Invariant Sections may be replaced with a single +copy. If there are multiple Invariant Sections with the same name but +different contents, make the title of each such section unique by +adding at the end of it, in parentheses, the name of the original +author or publisher of that section if known, or else a unique number. +Make the same adjustment to the section titles in the list of +Invariant Sections in the license notice of the combined work. + +In the combination, you must combine any sections entitled "History" +in the various original documents, forming one section entitled +"History"; likewise combine any sections entitled "Acknowledgements", +and any sections entitled "Dedications". You must delete all sections +entitled "Endorsements." + + +6. COLLECTIONS OF DOCUMENTS + +You may make a collection consisting of the Document and other documents +released under this License, and replace the individual copies of this +License in the various documents with a single copy that is included in +the collection, provided that you follow the rules of this License for +verbatim copying of each of the documents in all other respects. + +You may extract a single document from such a collection, and distribute +it individually under this License, provided you insert a copy of this +License into the extracted document, and follow this License in all +other respects regarding verbatim copying of that document. + + +7. AGGREGATION WITH INDEPENDENT WORKS + +A compilation of the Document or its derivatives with other separate +and independent documents or works, in or on a volume of a storage or +distribution medium, does not as a whole count as a Modified Version +of the Document, provided no compilation copyright is claimed for the +compilation. Such a compilation is called an "aggregate", and this +License does not apply to the other self-contained works thus compiled +with the Document, on account of their being thus compiled, if they +are not themselves derivative works of the Document. + +If the Cover Text requirement of section 3 is applicable to these +copies of the Document, then if the Document is less than one quarter +of the entire aggregate, the Document's Cover Texts may be placed on +covers that surround only the Document within the aggregate. +Otherwise they must appear on covers around the whole aggregate. + + +8. TRANSLATION + +Translation is considered a kind of modification, so you may +distribute translations of the Document under the terms of section 4. +Replacing Invariant Sections with translations requires special +permission from their copyright holders, but you may include +translations of some or all Invariant Sections in addition to the +original versions of these Invariant Sections. You may include a +translation of this License provided that you also include the +original English version of this License. In case of a disagreement +between the translation and the original English version of this +License, the original English version will prevail. + + +9. TERMINATION + +You may not copy, modify, sublicense, or distribute the Document except +as expressly provided for under this License. Any other attempt to +copy, modify, sublicense or distribute the Document is void, and will +automatically terminate your rights under this License. However, +parties who have received copies, or rights, from you under this +License will not have their licenses terminated so long as such +parties remain in full compliance. + + +10. FUTURE REVISIONS OF THIS LICENSE + +The Free Software Foundation may publish new, revised versions +of the GNU Free Documentation License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. See +http://www.gnu.org/copyleft/. + +Each version of the License is given a distinguishing version number. +If the Document specifies that a particular numbered version of this +License "or any later version" applies to it, you have the option of +following the terms and conditions either of that specified version or +of any later version that has been published (not as a draft) by the +Free Software Foundation. If the Document does not specify a version +number of this License, you may choose any version ever published (not +as a draft) by the Free Software Foundation. + + +ADDENDUM: How to use this License for your documents + +To use this License in a document you have written, include a copy of +the License in the document and put the following copyright and +license notices just after the title page: + + Copyright (c) YEAR YOUR NAME. + Permission is granted to copy, distribute and/or modify this document + under the terms of the GNU Free Documentation License, Version 1.1 + or any later version published by the Free Software Foundation; + with the Invariant Sections being LIST THEIR TITLES, with the + Front-Cover Texts being LIST, and with the Back-Cover Texts being LIST. + A copy of the license is included in the section entitled "GNU + Free Documentation License". + +If you have no Invariant Sections, write "with no Invariant Sections" +instead of saying which ones are invariant. If you have no +Front-Cover Texts, write "no Front-Cover Texts" instead of +"Front-Cover Texts being LIST"; likewise for Back-Cover Texts. + +If your document contains nontrivial examples of program code, we +recommend releasing these examples in parallel under your choice of +free software license, such as the GNU General Public License, +to permit their use in free software. diff --git a/deps/lightening/COPYING.LESSER b/deps/lightening/COPYING.LESSER new file mode 100644 index 0000000..fc8a5de --- /dev/null +++ b/deps/lightening/COPYING.LESSER @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/deps/lightening/ChangeLog b/deps/lightening/ChangeLog new file mode 100644 index 0000000..cc7c8e9 --- /dev/null +++ b/deps/lightening/ChangeLog @@ -0,0 +1,17 @@ +-*- text -*- + +Starting from October 30, 2018, the Lightening project no longer stores +change logs in `ChangeLog' files. Instead, changes are detailed in the +version control system's logs. They can be seen by downloading a copy +of the Git repository: + + $ git clone https://gitlab.com/wingo/lightening + $ git log + +Alternatively, they can be seen on the web, using the Gitweb interface +at: + + https://gitlab.com/wingo/lightening + +Change logs from upstream GNU Lightning are still available in +ChangeLog.lightning. diff --git a/deps/lightening/ChangeLog.lightning b/deps/lightening/ChangeLog.lightning new file mode 100644 index 0000000..19b3335 --- /dev/null +++ b/deps/lightening/ChangeLog.lightning @@ -0,0 +1,4018 @@ +2018-04-20 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_private.h: Add new register classes to + flag float registers and double only registers, required for sparc64 + where only low 32 bit fpr registers can be used for single precision + operations. + Add new 128 bit jit_regset_t type for sparc64 register set. + + * include/lightning/jit_sparc.h, lib/jit_sparc-cpu.c, lib/jit_sparc-fpu.c, + lib/jit_sparc-sz.c, lib/jit_sparc.c: Update for 64 bits sparc. + + * lib/lightning.c: Update for new jit_regset_t required for sparc64. + +2018-02-26 Paulo Andrade <pcpa@gnu.org> + + * check/lightning.c, include/lightning.h: Add the new jit_va_push + interface. That should be called when passing a va_list to a C + function. This is required because on Alpha a va_list is passed + by value, and lightning does not know about data types, so, cannot + understand it is pushing a va_list as argument. + + * lib/jit_names.c, lib/lightning.c: Minor changes for the new + jit_code_va_push. + + * check/cva_list.c: Update only test case using jit_va_push, to + pass a va_list to a C function. + + doc/body.texi: Better documentation of the varargs interface. + + * jit_alpha.c, jit_alpha-cpu.c: Update to properly push a + C va_list and correctly calculate varargs offset. + + * lib/jit_aarch64-sz.c, lib/jit_aarch64.c, lib/jit_alpha-sz.c, + lib/jit_arm-sz.c, lib/jit_arm.c, lib/jit_hppa-sz.c, lib/jit_hppa.c, + lib/jit_ia64-sz.c, lib/jit_ia64.c, lib/jit_mips-sz.c, lib/jit_mips.c, + lib/jit_ppc-sz.c, lib/jit_ppc.c, lib/jit_s390-sz.c, lib/jit_s390.c, + lib/jit_sparc-sz.c, lib/jit_sparc.c, lib/jit_x86-sz.c, lib/jit_x86.c: + Update for the new jit_va_push interface. + +2018-02-22 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_alpha-cpu.c: Always set t12 to the address of the + current function, to properly work on all systems. Previously + the shortcut did only work on Tru64. For Linux and glibc the + change is required. + +2018-02-22 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_aarch64.c, lib/jit_alpha.c, lib/jit_arm.c, + lib/jit_mips.c, lib/jit_ppc.c, lib/jit_sparc.c, lib/jit_x86.c: + Correct wrong logic in usage of jit_live in jit_retr. The + problem is that if a temporary is required during epilog, + the return register might be allocated, so, jit_live must always + be used. + +2018-01-31 Paulo Andrade <pcpa@gnu.org> + + * lib/lightning.c: Avoid deep recursions when computing live + register ranges. + +2018-01-31 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_mips-cpu.c: Correct frame size and varargs + initialization for the n32 abi. + * lib/jit_mips.c, lib/jit_mips-fpu.c: Correct 32 bit abis + in big-endian. + +2017-09-13 Paulo Andrade <pcpa@gnu.org> + + * configure.ac: Add check for binutils 2.29 prototype to the + disassembler function. + * lib/jit_disasm.c: Adapt for binutils 2.29 change. + +2017-06-09 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_private.h, lib/lightning.c: Add a + second pass from start when computing register live ranges. + This should be used temporarily, and is required for certain + loop constructs, with several consecutive blocks not referencing + a live register. + +2016-05-05 Paulo Andrade <pcpa@gnu.org> + + * lib/lightning.c: Correct wrong movr simplification, + remove no longer needed code to set return registers live + and update live register set when reaching a label boundary, + but do not descend if the block has been already visited. + The later need some tuning for complex code generation, where + it will still have issues. + +2015-11-30 Paulo Andrade <pcpa@gnu.org> + + * doc/body.texi: Change documentation to no longer say + it is a variant of the Fibonacci sequence, and document + a proper implementation. + Thanks to Jon Arintok for pointing out that the Fibonacci + sequence generation was incorrect. It was documented, but + still confusing. + + * check/fib.tst, check/fib.ok, check/bp.tst, check/bp.ok, + doc/ifib.c, doc/rbif.c: Implement a proper Fibonacci + sequence implementation. + +2015-07-03 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_mips-cpu.c: Correct definition of htonr_ul. + Correct prolog/epilog/va* routines to work on o64 abi. + + * lib/jit_mips-fpu.c: Correct load of double literal + argument when not using a data buffer. + Remove alignment correction in vaarg_d if using the + new mips abi. + + * lib/jit_mips.c: Correct code to allow creating variadic + jit functions when using the new mips abi. + + * lib/jit_rewind.c: Minor adjust for rewind when using + the new mips abi, if there are varargs arguments in + registers. + +2015-06-06 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ia64-cpu.c: Search backward for the last output + register used, otherwise would stop too early if a float + argument used the slot. + Correct offset of first va_list argument, and use proper + va_list abi. + + * lib/jit_ia64-fpu.c: Add new functions to move a gpr + to a fpr register, to counterpart the ones that move a + fpr to a gpr. These are required to properly implement + jit_getarg*_{f,d} on complex prototypes, or variadic + jit functions. + + * lib/jit_ia64-sz.c: Update for support to jit variadic + functions. + + * lib/jit_ia64.c: Implement proper abi for variadic + jit functions. + +2015-06-04 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_rewind.c: New file implementing generic functions + to "rewind", or rewrite IR code sequences. + + * include/lightning.h: Add several new codes, that previously + were a function call, that would synthesize the operation. + Now, there is a code for the operation, and a new flag to + know an operation is synthesized. + + * include/lightning/jit_private.h: Add several new macros to + help construct synthesized IR code sequences. + + * lib/Makefile.am: Update for lib/jit_rewind.c. + + * lib/jit_disasm.c: Update for a small rework on jit_node_t, + so that --enable-devel-disassembler does not need a change + in the layout of jit_node_t. + + * lib/jit_names.c: Update for the new codes. + + * lib/jit_print.c: Update to print more readable output, and + flag synthesized IR code sequences. + + * lib/jit_aarch64-sz.c, lib/jit_aarch64.c, + lib/jit_arm-sz.c, lib/jit_arm.c, lib/jit_x86-sz.c, + lib/jit_x86.c: Update for new synthesized IR code sequences. + + * lib/jit_ppc-cpu.c, lib/jit_ppc-fpu., lib/jit_ppc-sz.c, + lib/jit_ppc.c, lib/jit_mips-cpu.c, lib/jit_mips-fpu.c, + lib/jit_mips-sz.c, lib/jit_mips.c, lib/jit_s390-fpu.c, + lib/jit_s390-sz.c, lib/jit_s390.c: Update for new synthesized + IR code sequences and correct bugs in the initial varargs + implementation support. + + * lib/jit_alpha-sz.c, lib/jit_alpha.c, lib/jit_hppa-sz.c, + lib/jit_hppa.c, lib/jit_ia64-sz.c, lib/jit_ia64.c, + lib/jit_sparc-sz.c, lib/jit_sparc.c: Add generic, untested + support for the new synthesized IR code sequences. Known + most likely broken right now, and should be corrected once + access to these hosts is available. + + * lib/lightning.c: Update for new IR codes, and add support + for not yet existing instructions that change third argument. + + * size.c: Change to use different tables for LE and BE PowerPC. + Correct a wrong endif for x32. + +2015-05-25 Paulo Andrade <pcpa@gnu.org> + + * check/cva_list.c: New file implementing a test to ensure + the value returned by jit_va_start is a valid C va_list. + + * check/va_list.ok: New simple helper file, as now the + va_list.tst test is enabled. + + * check/va_list.tst: Rewritten for an extensive variadic + jit functions test. + + * check/Makefile.am: Update for the new tests. + + * lib/jit_arm-cpu.c, lib/jit_arm-swf.c, lib/jit_arm-vfp.c, + lib/jit_arm.c: Correct broken software float in a previous + commit. Note that the hard float abi implementation is known + broken at this time, for special cases involving variadic + functions, and should be corrected next. + + lib/jit_x86-cpu.c, lib/jit_x86-sz.c, lib/jit_x86.c: Correct + the jit_va_list_t semantics to match C va_list. + +2015-05-24 Paulo Andrade <pcpa@gnu.org> + + * lib/Makefile.am: Bump library major. This is a preparation + for a rework that was due for quite some time, but that is + now required to properly implement variadic jit functions. + The rework is mainly required to know at prolog parsing, if + a function is variadic or not. This will benefit a few + backends, and is mandatory for the hard float arm abi. + The rework was already planned for quite some time, to + be able to use a variable stack framesize, and for leaf + functions optimization where applicable. + The change will be source compatible, but will change + some internals, and jit_code_t values, as some new will + be added. + The only behavior change is that, jit_arg_register_p may + change return value on hard float arm abi, if called before + or after jit_ellipsis. Common sense anyway, would say to + make that call after jit_ellipsis, but documentation + should be updated for it. + +2015-05-24 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_aarch64-fpu.c, lib/jit_aarch64.c: Correct base + aarch64 varargs code. + +2015-05-24 Paulo Andrade <pcpa@gnu.org> + + * check/lightning.c: Clearly run check if clang is the system + compiler. + +2015-05-20 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_sparc-cpu.c, lib/jit_sparc-fpu.c, lib/jit_sparc.c: + Add base support to jit vararg functions to the sparc backend. + +2015-05-20 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_alpha-cpu.c, lib/jit_alpha-fpu.c, lib/jit_alpha.c: + Add base support to jit vararg functions to the alpha backend. + +2015-05-19 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_hppa-cpu.c, lib/jit_hppa-fpu.c, lib/jit_hppa.c: + Add base support to jit vararg functions to the hppa backend. + +2015-05-10 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ia64-cpu.c, lib/jit_ia64-fpu.c, lib/jit_ia64.c: + Add base support to jit vararg functions to the ia64 backend. + +2015-05-10 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ia64-fpu.c, lib/jit_ia64.c: Correct movi_d_w + and movi_f_w implementation to work when not using a + data buffer. This causes the check varargs.tst to + work when passing "-d" to the lightning test tool. + +2015-05-10 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ia64.c: Implement inline assembly cache flush, + required on multiprocessor systems. + +2015-05-06 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_mips-cpu.c, lib/jit_mips-fpu.c, lib/jit_mips.c: + Add base support to jit vararg functions to the mips backend. + Currently only supported on the o32 abi, until access to a + n32 system is arranged. + +2015-05-05 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ppc-cpu.c, lib/jit_ppc-fpu.c, lib/jit_ppc.c: + Add base support to jit vararg functions to the PowerPC backend. + +2015-05-02 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_s390-cpu.c, lib/jit_s390-fpu.c, lib/jit_s390.c: + Add base support to jit vararg functions to the s390 backend. + +2015-05-01 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_arm-cpu.c, lib/jit_arm-swf.c, lib/jit_arm-vfp.c, + lib/jit_arm.c: Add base support to jit vararg + functions to the arm backend. + +2015-04-30 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_aarch64-cpu.c, lib/jit_aarch64-fpu.c, + lib/jit_aarch64.c: Add base support to jit vararg + functions to the aarch64 backend. + +2015-04-27 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, include/lightning/jit_private.h, + lib/jit_names.c, lib/lightning.c: Add initial support + for the new jit_va_start, jit_va_arg, jit_va_arg_d, and + jit_va_end interfaces. The jit_va_start call is supposed + to return a va_list compatible pointer, but not yet + decided if it will be "declared" stdarg compatible, + as for now only x86 support has been added (and should + be compatible), but issues may arise on other backends. + + * check/lightning.c: Add wrappers to call the new jit_va_* + interfaces. + + * lib/jit_x86-cpu.c, lib/jit_x86.c: Implement the new + jit_va_* for x86. + + * lib/jit_x86-sz.c: Add fields, but not yet fully updated, + as this is an intermediate commit. + + * lib/jit_aarch64-sz.c, lib/jit_aarch64.c, + lib/jit_alpha-sz.c, lib/jit_alpha.c, + lib/jit_arm-sz.c, lib/jit_arm.c, + lib/jit_hppa-sz.c, lib/jit_hppa.c, + lib/jit_ia64-sz.c, lib/jit_ia64.c, + lib/jit_mips-sz.c, lib/jit_mips.c, + lib/jit_ppc-sz.c, lib/jit_ppc.c, + lib/jit_s390-sz.c, lib/jit_s390.c, + lib/jit_sparc-sz.c, lib/jit_sparc.c: Prepare for the + new jit_va_* interfaces. Not yet implemented, and will + cause an assertion if used. + + * check/va_list.tst: Simple early test case, that works + on x86_64, x32, ix86, cygwin, and cygwin64. + +2015-02-17 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, include/lightning/jit_private.h, + lib/jit_aarch64-cpu.c, lib/jit_aarch64.c, + lib/jit_alpha-cpu.c, lib/jit_alpha.c, + lib/jit_arm-cpu.c, lib/jit_arm.c, + lib/jit_hppa-cpu.c, lib/jit_hppa.c, + lib/jit_ia64-cpu.c, lib/jit_ia64.c, + lib/jit_mips-cpu.c, lib/jit_mips.c, + lib/jit_ppc-cpu.c, lib/jit_ppc.c, + lib/jit_s390-cpu.c, lib/jit_s390.c, + lib/jit_sparc-cpu.c, lib/jit_sparc.c, + lib/jit_x86-cpu.c, lib/jit_x86.c: Implement the new + jit_allocar(offs, size) interface, that receives + two integer registers arguments, allocates space + dynamically in the stack, returns the offset in + the first argument, and uses the second argument + for the size in bytes of the memory to be allocated. + + * check/allocar.ok, check/allocar.tst: New files + implementing test cases for the new jit_allocar + interface. + + * check/Makefile.am, check/lightning.c: Update for + the new test case and interface. + + * doc/body.texi: Add documentation of the new + interface. + +2015-02-17 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_x86.h, lib/jit_x86-cpu.c, + lib/jit_x86-x87.c: No longer make st(7) available. + Need to keep one x87 slots empty to avoid exceptions. + This has the side effect of no longer needing the + hackish emms instruction before a function call. + +2015-02-16 Paulo Andrade <pcpa@gnu.org> + + * lib/lightning.c: Remove the jit_regno_patch bitfield + register fields before actual emit, as it is only really + used before emit, otherwise, on special conditions it + may consider live registers as dead during code emit. + +2015-02-15 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_x86-cpu.c, lib/jit_x86-sse.c, lib/jit_x86-x87.c: + Correct encoding of ldxr* stxr* in the x32 abi. If the + displacement register is negative, it would generate + a 64 bit instruction with a 32 bit unsigned displacement. + + * check/ranger.tst, check/ranger.ok: New files, implementing + a test case for negative loads and stores. This is range.tst + converted to use registers instead of immediate offsets. + + check/Makefile.am: Update for the new test case. + +2015-02-07 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_size.c: Preventively use at least 144 bytes + if JIT_INSTR_MAX is less than it. The logic is not + guaranteed to be 100% precise, it is mostly heuristics + to allocate a buffer with as close as possible size, + but a wrong value may cause code generation to write + past the end of the buffer. + +2015-02-03 Paulo Andrade <pcpa@gnu.org> + + * lib/lightning.c: Correct the reason the bug in + simplify_stxi was not triggered before, it was due to + incorrectly resetting the value->code field, what was + causing it to never properly optimize: + stxi Im0 Rb0 Rt0 + ldxi Rt1 Rb1 Im1 + when Rb0 == Rb1, Rt0 == Rt1 and Im0 == Im1 + There was another possible issue, that has been also + addressed in this commit, that would be the case of + Rbn == Rtn, where no redundancy removal is possible. + +2015-02-03 Paulo Andrade <pcpa@gnu.org> + + * lib/lightning.c: Correct wrong check in simplify_stxi. + The test was incorrectly comparing the target register + and the displacement offset. This was a time bomb bug, + that would trigger in code like: + stxi Im0 Rb0 Rt0 + stxi Im1 Rb1 Rt1 + if Rb0 == Rb1 && Rt0 == Rt1 && Im0 == Rt1, that is, + the wrong check was Im0 == Rt1, instead of the supposed + Im0 == Imm1 (that was what the code mean't to do). It + was removing the second stxi assuming it was redundantly + generated; as that is not uncommon pattern on + translators generating jit. + +2015-02-02 Paulo Andrade <pcpa@gnu.org> + + * configure.ac, include/lightning/jit_private.h, + lib/jit_aarch64.c, lib/jit_alpha.c, lib/jit_arm.c, + lib/jit_disasm.c, lib/jit_hppa.c, lib/jit_ia64.c, + lib/jit_mips.c, lib/jit_ppc.c, lib/jit_print.c, + lib/jit_s390.c, lib/jit_sparc.c, lib/jit_x86.c: Add a new + --enable-devel-disassembler option, that should be used + during development, or lightning debug. This option + intermixes previous jit_print and jit_disassemble + output, making it easier to visualize what lightning + call was used, and what code was generated. + +2015-01-31 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_arm-cpu.c, lib/jit_arm.c: Only limit to 24 bit + displacement non conditional jump in the same jit_state_t. + +2015-01-19 Paulo Andrade <pcpa@gnu.org> + + * doc/body.texi: Reorder documentation, making jit_frame + and jit_tramp the lightning response to the need of + trampolines, continuations and tail call optimizations. + A pseudo code example of a factorial function was added. + Also added a section for description of the available + predicates. + + * doc/fact.c: New file, implementing a simple example of + a translation of a trivial, recursive, tail call optimization + into lightning calls. This is the conversion to functional C + code of the example in doc/body.texi. + + * doc/Makefile.am: Update for the next test case. + +2015-01-17 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, lib/jit_aarch64.c, + lib/jit_alpha.c, lib/jit_arm-vfp.c, lib/jit_arm.c, + lib/jit_hppa.c, lib/jit_ia64.c, lib/jit_mips.c, + lib/jit_ppc.c, lib/jit_s390.c, lib/jit_sparc.c, + lib/jit_x86.c: Add the new jit_arg_register_p predicate. + The predicate is expected to be used to know if an + argument is in a register, what would need special + handling if code that can overwrite non callee save + registers is executed. + + * check/carg.c: New test case to check consistency and + expected usage of jit_arg_register_p. + + * check/Makefile.am: Update for new test case. + +2015-01-17 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_aarch64.h, + include/lightning/jit_alpha.h, + include/lightning/jit_arm.h, + include/lightning/jit_hppa.h, + include/lightning/jit_mips.h, + include/lightning/jit_ppc.h, + include/lightning/jit_s390.h, + include/lightning/jit_sparc.h, + include/lightning/jit_x86.h, + lib/jit_aarch64.c, lib/jit_alpha.c, + lib/jit_arm.c, lib/jit_hppa.c, + lib/jit_ia64.c, lib/jit_mips.c, + lib/jit_ppc.c, lib/jit_s390.c, + lib/jit_sparc.c, lib/jit_x86.c: Remove jit_arg_reg_p and + jit_arg_f_reg_p from a public header, and define it only + on port specific files where an integer offset is used + to qualify an argument identifier. Exported code expects + an opaque pointer (but of jit_node_t* type) to "qualify" + an argument identifier. + This patch, and the code review/simplification done during + it also corrected some bugs: + o Inconsistent jit_arg_d value of double argument after 3 + integer arguments in arm for jit_functions; tested, C + functions were being properly called. + o Inconsistent use of getarg_{f,d} and putarg*_{f,d} on + s390 (32-bit) that happened to not have a proper test + case, as it would only happen for jit functions, and + tested, called C functions had proper arguments. + o Corrected a "last minute" correction that did not go + to the committed version, and would not compile on hppa, + due to bad _jit_putargi_d prototype definition. + +2015-01-17 Paulo Andrade <pcpa@gnu.org> + + * doc/body.texi: Correct wrong/outdated information for + hton*, pusharg* and ret*, and add missing documentation + for rsb*, qmul*, qdvi* and putarg*. + +2015-01-15 Paulo Andrade <pcpa@gnu.org> + + * configure.ac, lib/jit_disasm.c: Rewrite workaround + to apparent problem to initialize powerpc disassembler. + +2015-01-15 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, lib/jit_aarch64.c, + lib/jit_alpha.c, lib/jit_arm.c, lib/jit_hppa.c, + lib/jit_ia64.c, lib/jit_mips.c, lib/jit_ppc.c, + lib/jit_s390.c, lib/jit_sparc.c, lib/jit_x86.c: + Implement jit_putarg*. It works as a mix of jit_getarg* + and jit_pusharg*, in the way that the first argument is + a register or immediate, and the second is a pointer + returned by jit_arg*. The use of the interface is to change + values of arguments to the current jit function. + + * check/put.ok, check/put.tst: New test cases exercising + the new jit_putarg* interface. + + * check/Makefile.am, check/lightning.c: Update for the + new test case and interface. + +2015-01-08 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_s390.h, lib/jit_s390-cpu.c, + lib/jit_s390-fpu.c, lib/jit_s390-sz.c, lib/jit_s390.c: + Renamed s390x* files to s390*. + + * check/float.tst, check/lightning.c, configure.ac, + include/lightning.h, include/lightning/Makefile.am, + lib/Makefile.am, lib/jit_s390.c, lib/jit_size.c, + lib/lightning.c: Update for renamed files. + +2015-01-08 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, include/lightning/jit_private.h, + include/lightning/jit_s390x.h, lib/jit_disasm.c, + lib/jit_s390x-cpu.c, lib/jit_s390x-fpu.c, lib/jit_s390x-sz.c, + lib/jit_s390x.c, lib/jit_size.c, lib/lightning.c: + Add support for generating jit for s390 32 bit. This change + also removed %f15 from the list of temporaries fpr registers; + it was not being used, but if were, it would corrupt the + stack frame because the spill address would overwrite grp + offsets. + +2014-12-26 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ppc-cpu.c, lib/jit_ppc.c: Correct some endianess issues + on the powerpc le backend. + +2014-12-26 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ppc-cpu.c: Add mcrxr instruction emulation, + as this instruction has been phased out, and should be + implemented as a kernel trap. + +2014-12-26 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_arm.c: Better check for need to flush constants + before the pool being no longer reachable. + +2014-12-25 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h: Split jit_htonr in the new 3 interfaces + jit_htonr_us, jit_htonr_ui and jit_htonr_ul, the later only + available on 64 bit. The plain/untyped jit_htonr macro call + maps to the wordsize one. + * lib/jit_aarch64-cpu.c, lib/jit_aarch64-sz.c, lib/jit_aarch64.c, + lib/jit_alpha-cpu.c, lib/jit_alpha-sz.c, lib/jit_alpha.c, + lib/jit_arm-cpu.c, lib/jit_arm-sz.c, lib/jit_arm.c, + lib/jit_hppa-cpu.c, lib/jit_hppa-sz.c, lib/jit_hppa.c, + lib/jit_ia64-cpu.c, lib/jit_ia64-sz.c, lib/jit_ia64.c, + lib/jit_mips-cpu.c, lib/jit_mips-sz.c, lib/jit_mips.c, + lib/jit_ppc-cpu.c, lib/jit_ppc-sz.c, lib/jit_ppc.c, + lib/jit_s390x-cpu.c, lib/jit_s390x-sz.c, lib/jit_s390x.c, + lib/jit_sparc-cpu.c, lib/jit_sparc-sz.c, lib/jit_sparc.c, + lib/jit_x86-cpu.c, lib/jit_x86-sz.c, lib/jit_x86.c: + Update backends for the new jit_htonr*. + * check/lightning.c, lib/jit_names.c, lib/lightning.c: + Update for the new jit_htonr* interfaces. + * check/Makefile.am: Update for new test cases. + * check/hton.ok, check/hton.tst: New test cases. + +2014-12-24 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_private.h, include/lightning/jit_x86.h, + lib/jit_disasm.c, lib/jit_x86-cpu.c, lib/jit_x86-sse.c, + lib/jit_x86-sz.c, lib/jit_x86-x87.c, lib/jit_x86.c, + size.c: Implement support for the x32 abi. Built and + tested on Gentoo default/linux/amd64/13.0/x32 profile. + +2014-12-24 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_names.c: Add missing rsbi_f and rsbi_d strings. + +2014-12-21 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_arm.c: Call __clear_cache for every page. + This should only be required for older boards or + toolchain setup, but has been reported to be required + for lightning at some point. + +2014-12-21 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_arm.c: Correct check to guard overflow of index + of constants from program counter. + +2014-11-24 Paulo Andrade <pcpa@gnu.org> + + * lib/lightning.c: Remove an optimization to calee save + registers that may incorrectly remove a jit_movr under + special conditions. + +2014-11-20 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_ppc.h, lib/jit_ppc-cpu.c, + lib/jit_ppc.c: Add initial powerpc le support. + +2014-11-20 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_disasm.c: Change thumb or arm disassemble based on + jit code before disassembly. + + * lib/jit_arm-cpu.c: Correct reversed arguments to LDRD and + STRD instructions, and correct checking for support of those. + + * lib/jit_arm-swf.c: Correct wrong use of LDRD and STRD and + only use those if the register is even. + + * check/check.arm.swf.sh, check/check.arm4.swf.sh: New files + to test LDRD and STRD, as well as the alternate code path + when those are not available, in the .arm4. test case. + + * check/Makefile.am: Update for the new test cases. + +2014-11-08 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_private.h, lib/jit_aarch64.c, + lib/jit_alpha.c, lib/jit_arm.c, lib/jit_hppa.c, + lib/jit_ia64.c, lib/jit_mips.c, lib/jit_ppc.c, + lib/jit_s390x.c, lib/jit_sparc.c, lib/jit_x86.c: + Implement a private jit_flush call, that flushes + the cache, if applicable, aligning down to the + previous and up to the next page boundary. + +2014-11-08 Paulo Andrade <pcpa@gnu.org> + + * check/ctramp.c: New file. It just repeats the test + of tramp.tst, but using two jit_state_t, what should + test possible issues with two contexts, and also validate + jit_tramp works on backends with function descriptions. + + * check/Makefile.am: Update for new test case. + +2014-11-03 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_mips.h: Do not make the t9 register + JIT_R11 (or JIT_R7 for n32 or n64 abi) available. Previously + it cause problems if one expects it to not be changed in a + function call. For example, calling a jit function, where it + really does not need to be changed. + +2014-10-26 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_aarch64.c, lib/jit_alpha.c, lib/jit_arm.c, + lib/jit_hppa.c, lib/jit_ia64.c, lib/jit_mips.c, lib/jit_ppc.c, + lib/jit_s390x.c, lib/jit_sparc.c, lib/jit_x86.c: Add an + assertion to all code generation "drivers" to ensure + _jitc->regarg is empty or in an expected state, after + translation of a lightning instruction to native code. + This change was a brute force test to find out other cases + of a temporary not being release (like was happening with + _bmsi and _bmci on x86), but no other case was found, + after running make check, with assertions enabled, on all + backends. + +2014-10-26 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_x86-cpu.c: Correct a register allocation leak in + _bmsi and _bmci. + +2014-10-25 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_disasm.c: Do not cause an fatal error if init_jit + fails in the jit_init_debug call. + +2014-10-24 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ia64.c, lib/jit_ppc.c: Correct handling of function + descriptor when first prolog is a jit_tramp prolog. The + test case was using the same jit_context_t, so was not + triggering this condition. + + * lib/jit_ppc-cpu.c: Properly handle jump displacements that + do not fit on 24 powerpc. This required changing from previous + "mtlr reg, blr" to "mtctr reg, bctr" to properly handle + the logic to "hide" function descriptors, but that would + also be required as the proper jit_jmpr when/if implementing + optimizations to leaf functions (was working with blr because + it is saved/reloaded in prolog/epilog). + +2014-10-21 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, lib/lightning.c: Add three predicates + to query information about labels. jit_forward_p(label) + will return non zero if the label is "forward", that is + need a call to jit_link(label), jit_indirect_p(label) + that returns non zero if the label was created with the + jit_indirect() call, and jit_target_p(label) that will + return non zero if there is at least one jump patched + to land at that label. + +2014-10-18 Paulo Andrade <pcpa@gnu.org> + + * check/range.ok, check/range.tst: New test case designed + to catch incorrect code generation, usually due to incorrect + test of immediate size. The test checks a large amount of + encodings in "power of two" boundaries. This test exorcises + a significant amount of code paths that was previously not + tested. + + * check/Makefile.am: Add range test to make check target. + + * lib/jit_aarch64-cpu.c: Correct wrong address calculation + for stxi_c, stxi_s, stxi_i and stxi_l when the offset is + too large. + + * lib/jit_mips-fpu.c: Correct wrong size test to check if + an immediate can be encoded in a float or double store. + + * lib/jit_s390x-cpu.c: Correct inverted encoding to stxi_s + when the offset cannot be encoded, and fallbacks to an + alternate encoding in 2 instructions. + +2014-10-17 Paulo Andrade <pcpa@gnu.org> + + * check/alu_rsb.ok, check/alu_rsb.tst: New files implementing + tests for jit_rsb*. + + * check/Makefile.am, check/lightning.c, include/lightning.h, + lib/jit_aarch64-cpu.c, lib/jit_aarch64-fpu.c, lib/jit_aarch64-sz.c, + lib/jit_aarch64.c, lib/jit_alpha-cpu.c, lib/jit_alpha-fpu.c, + lib/jit_alpha-sz.c, lib/jit_alpha.c, lib/jit_arm-cpu.c, + lib/jit_arm-swf.c, lib/jit_arm-sz.c, lib/jit_arm-vfp.c, + lib/jit_arm.c, lib/jit_hppa-cpu.c, lib/jit_hppa-fpu.c, + lib/jit_hppa-sz.c, lib/jit_hppa.c, lib/jit_ia64-cpu.c, + lib/jit_ia64-fpu.c, lib/jit_ia64-sz.c, lib/jit_ia64.c, + lib/jit_mips-cpu.c, lib/jit_mips-fpu.c, lib/jit_mips-sz.c, + lib/jit_mips.c, lib/jit_names.c, lib/jit_ppc-cpu.c, + lib/jit_ppc-fpu.c, lib/jit_ppc-sz.c, lib/jit_ppc.c, + lib/jit_s390x-cpu.c, lib/jit_s390x-fpu.c, lib/jit_s390x-sz.c, + lib/jit_s390x.c, lib/jit_sparc-cpu.c, lib/jit_sparc-fpu.c, + lib/jit_sparc-sz.c, lib/jit_sparc.c, lib/jit_x86-cpu.c, + lib/jit_x86-sse.c, lib/jit_x86-sz.c, lib/jit_x86-x87.c, + lib/jit_x86.c, lib/lightning.c: Implement jit_rsb*. This + was a missing lightning 1.x interface, that on most + backends is synthesized, but on a few backends (hppa and ia64), + it can generate better code as on those there is, or the + only instruction with an immediate is in "rsb" format + (left operand). + +2014-10-17 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_names.c: New file with single definition of string + representation of lightning IR codes. + + * size.c: Modified to append the code name in a C comment + after the maximum instruction size. + + * lib/jit_print.c: Minor change to not duplicate jit_names.c + contents. + + * lib/jit_aarch64-sz.c, lib/jit_alpha-sz.c, lib/jit_arm-sz.c, + lib/jit_hppa-sz.c, lib/jit_ia64-sz.c, lib/jit_mips-sz.c, + lib/jit_ppc-sz.c, lib/jit_s390x-sz.c, lib/jit_sparc-sz.c, + lib/jit_x86-sz.c: Rewritten to add string representation of + IR codes in a C comment. + +2014-10-14 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_aarch64-cpu.c, lib/jit_alpha-cpu.c, lib/jit_arm-cpu.c, + lib/jit_hppa-cpu.c, lib/jit_mips-cpu.c, lib/jit_ppc-cpu.c, + lib/jit_sparc-cpu.c: Implement or correct the internal + nop(count) call that receives an argument that tells the + modulo bytes to align the code for the next instruction. + + * include/lightning.h, lib/lightning.c, lib/jit_aarch64.c, + lib/jit_alpha.c, lib/jit_arm.c, lib/jit_hppa.c, lib/jit_ia64.c, + lib/jit_mips.c, lib/jit_ppc.c, lib/jit_s390x.c, lib/jit_sparc.c, + lib/jit_x86.c: Implement the new jit_align() call that receive + an argument, that tells the modulo, in bytes, to align the + next instruction. In most backends the only value that makes + a difference is a value that matches sizeof(void*), as all + other values usually are already automatically aligned in + labels, but not guaranteed to be aligned at word size bytes. + + * check/align.ok, check/align.tst: New files, implementing + a simple test for the new jit_align() interface. + + * check/Makefile.am, check/lightning.c, lib/jit_aarch64-sz.c, + lib/jit_alpha-sz.c, lib/jit_arm-sz.c, lib/jit_hppa-sz.c, + lib/jit_ia64-sz.c, lib/jit_mips-sz.c, lib/jit_ppc-sz.c, + lib/jit_print.c, lib/jit_s390x-sz.c, lib/jit_sparc-sz.c, + lib/jit_x86-sz.c: Update for the new jit_code_align code and + the jit_align() interface. + +2014-10-13 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, lib/jit_size.c, size.c: Use a + symbolic value for the last IR code. + +2014-10-12 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, include/lightning/jit_private.h, + lib/jit_aarch64-cpu.c, lib/jit_alpha-cpu.c, lib/jit_arm-cpu.c, + lib/jit_hppa-cpu.c, lib/jit_ia64-cpu.c, lib/jit_mips-cpu.c, + lib/jit_ppc-cpu.c, lib/jit_s390x-cpu.c, lib/jit_sparc-cpu.c, + lib/jit_x86-cpu.c, lib/lightning.c: Implement the new + jit_frame and jit_tramp interfaces, that allow writing + trampoline like calls, where a single dispatcher jit buffer + is written, and later other jit buffers are created, with + the same stack frame layout as the dispatcher. This is the + logic that GNU Smalltalk used in lightning 1.x, and is required + to make a sane port for lighting 2.x. + + * jit_ia64-cpu.c: Implement support for jit_frame and jit_tramp, + and also correct wrong encoding for B4 instructions, that + implement jmpr, as well as correct reverse logic in _jmpr, + that was moving the branch register to the jump register, + and not vice-versa. + Also, if a stack frame is to be assumed, always assume it may + call a function with up to 8 arguments, regardless of the + hint frame argument. + + * lib/jit_arm.c: Add a new must_align_p() interface to ensure + function prologs are always aligned. This condition was + previously always true, somewhat by accident, but with + jit_tramp it is not guaranteed. + + * jit_ia64-cpu.c: lib/jit_ppc.c: Add minor special handling + required to implement jit_tramp, where a function descriptor + should not be added before a prolog, as jit_tramp means omit + prolog. + + * check/lightning.c: Update test driver for the new interfaces. + + * check/Makefile.am, check/tramp.tst, check/tramp.ok: Add + a simple test and example of the jit_frame and jit_tramp + usage implementing a simple Fibonacci function using a + simulation of an interpreter stack and how it would handle + state in language specific variables. + + * doc/body.texi: Add documentation for jit_frame and + jit_tramp. + +2014-09-29 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_aarch64.c, lib/jit_alpha.c, lib/jit_arm.c, + lib/jit_hppa.c, lib/jit_ia64.c, lib/jit_mips.c, + lib/jit_ppc.c, lib/jit_s390x.c, lib/jit_sparc.c, + lib/jit_x86.c, lib/lightning.c: Allow jit_jmpi on a + target that is not a node. This may lead to hard to + debug code generation, but is a required feature for + certain generators, like the ones that used lightning + 1.2x. Note that previously, but not really well + documented, it was instructed to use: + jit_movi(rn, addr); jit_jmpr(rn); + but now, plain: + jit_patch_abs(jit_jmpi(), addr); + should also work. + +2014-09-24 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_x86-sz.c: Generate information about instruction + lengths for more precise calculation of buffer size on + Windows x64. This change is specially important because + the maximum instruction length is larger than other + systems, what could cause an out of bounds write on + special conditions without this update. + +2014-09-24 Paulo Andrade <pcpa@gnu.org> + + * check/lightning.c: Add workaround to conflicting global + optind variable in cygwin binutils that have an internal + getopt* implementation. + + * lib/jit_x86-cpu.c: Add a simple define ffsl ffs if building + for 32 bit and there is no ffsl function. + +2014-09-24 Paulo Andrade <pcpa@gnu.org> + + * check/lightning.c: Add a hopefully temporary kludge to not use + sprintf and sscanf returned by dlsym. This is required to pass + the varargs test. + + * include/lightning/jit_private.h: Use symbolic name for first + integer register argument, as this is different in sysv and + win64 abi. + + * include/lightning/jit_x86.h: Add conditionals and definitions + for Windows x64 (under __CYGWIN__ preprocessor conditional). + + * lib/jit_x86-cpu.c: Correct one instruction encoding bug, that + was working by accident. Only use rax to rdx for some byte + operations to work on compatibility mode (that is, to generate + the proper encoding, instead of actually generating encoding + for high byte registers, e.g. %bh). + Add proper prolog and epilog for windows x64. + + * lib/jit_x86-sse.c: Correct a swapped rex prefix for float + operations. + + * lib/jit_x86.c: Adjust to support Windows x64 abi. + + * check/check.x87.nodata.sh: New file, previously used but that + was missing git. + +2014-09-07 Paulo Andrade <pcpa@gnu.org> + + * lib/lightning.c: Mark all registers advertised as live, as + per jit_callee_save_p as live whenever reaching a jump that + cannot be tracked. This is a rethink of the previous commit, + and is a better approach, otherwise there would not be much + sense on relying on jit_callee_save_p if it could not be + trusted. + + * check/jmpr.tst, check/jmpr.ok: New files implementing a very + simple test case, that would actually cause an assertion on + code before the change to only mark as live when reaching a + jump that could not tracked, the actually advertised as callee + save registers. + + check/Makefile.am: Update for new jmpr test case. + +2014-09-01 Paulo Andrade <pcpa@gnu.org> + + * lib/lightning.c: Do not mark all registers in unknown state + as live on jit_jmpr, or jit_jmpi to an absolute address. Instead, + treat it as a function call, and only consider JIT_Vn registers + as possibly live. + +2014-08-29 Paulo Andrade <pcpa@gnu.org> + + * doc/body.texi: Add a proper info menu entry for + GNU lightning. + + * doc/version.texi: Regenerate. + +2014-08-16 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_aarch64-cpu.c, lib/jit_aarch64-fpu.c, + lib/jit_arm-cpu.c, lib/jit_arm-vfp.c, + lib/jit_hppa-cpu.c, lib/jit_hppa-fpu.c, + lib/jit_ia64-cpu.c, lib/jit_ia64-fpu.c, + lib/jit_mips-cpu.c, lib/jit_mips-fpu.c, + lib/jit_ppc-cpu.c, lib/jit_ppc-fpu.c, + lib/jit_s390x-cpu.c, lib/jit_s390x-fpu.c, + lib/jit_s390x.c, lib/jit_sparc-cpu.c, + lib/jit_x86-cpu.c, lib/jit_x86-sse.c, + lib/jit_x86-x87.c: Review generation of all branch + instructions and always adds the jit_class_nospill + bitfield for temporary registers that cannot be spilled + because the reload would be after a conditional jump; the + patch only adds an extra assertion. These conditions do + not happen on documented lightning usage, but can happen + if one uses the not exported jit_get_reg and jit_unget_reg + calls and cause enough register starvation. + +2014-08-16 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_alpha.c: Correct wrong bitmask of most argument + float register arguments, that were being set as callee + save instead of argument registers class. + +2014-08-16 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_arm-sz.c: Regenerate table of known maximum + instruction sizes for the software float fallback, + that implements "virtual" float registers in the stack + and operations as calls to libgcc. + + * size.c: Correct typo in the generated jit_arm-sz.c file. + +2014-08-10 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_alpha.h, lib/jit_alpha-cpu.c, + lib/jit_alpha-fpu.c, lib/jit_alpha-sz.c, lib/jit_alpha.c: + New files implementing a lightning Alpha port. Thanks + to Trent Nelson and snakebit.net staff for providing access + to an Alpha system. + + * check/float.tst, check/lightning.c, configure.ac, + include/lightning.h, include/lightning/Makefile.am, + include/lightning/jit_private.h, lib/Makefile.am, + lib/jit_disasm.c, lib/jit_size.c, lib/lightning.c: + Minor changes to adapt for the new Alpha port. + +2014-08-10 Paulo Andrade <pcpa@gnu.org> + + * lib/lightning.c: Always mark JIT_RET and JIT_FRET as + live in a function epilog. This is required because + on some ports a complex sequence, allocating one or more + registers, may be required to jump from a ret* to the + epilog, and the lightning api does not have annotations + to know if a function returns a value, or the type of + the return value. + +2014-08-10 Paulo Andrade <pcpa@gnu.org> + + * lib/lightning.c: Change the correct live bitmask of + return registers after a function call in jit_update. + +2014-08-10 Paulo Andrade <pcpa@gnu.org> + + * lib/lightning.c: Change assertions to have an int + result and correct a bad bit mask assertion. + +2014-08-10 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_aarch64.c: Correct bad setup for assertion + of consistency before a patch. + +2014-08-10 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_mips-cpu.c: Correct typo in the jit_bmsr + implementation that was using the wrong test result + register. + +2014-07-28 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_memory.c: Do not call free on NULL pointers. + + * include/lightning/jit_private.h, lib/jit_note.c, + lib/lightning.c: Add a wrapper to memcpy and memmove + to not actually call those functions with a zero size + argument, and likely also a null src or dst. + +2014-07-27 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_private.h, lib/jit_disasm.c, + lib/lightning.c: Remove the global jit_progname variable. + It was being only used in jit_init_debug, that is called + from init_jit, so, just pass an argument. + +2014-07-27 Paulo Andrade <pcpa@gnu.org> + + * doc/body.texi: Add note that jit_set_memory_functions + should be called before init_jit, because init_jit + itself may call the memory wrappers. + +2014-04-22 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_arm.c: Do not get confused with default settings + if /proc is not mounted on Linux specific code path. + +2014-04-09 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_aarch64.h, include/lightning/jit_arm.h, + include/lightning/jit_hppa.h, include/lightning/jit_ia64.h, + include/lightning/jit_mips.h, include/lightning/jit_ppc.h, + include/lightning/jit_private.h, include/lightning/jit_s390x.h, + include/lightning/jit_sparc.h, include/lightning/jit_x86.h: + Do not add jit_regset_t, JIT_RA0, and JIT_FA0 to the installed + header file. These types and definitions are supposed to be + only used internally. + +2014-04-05 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_arm-cpu.c: Only adjust stack pointer in prolog if + need stack space, that is, do not emit a nop instruction + subtracting zero from the stack pointer. + +2014-04-04 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_disasm.c: Correct a crash in the doc/printf example + on arm due to releasing the data_info information in + jit_clear_state. This is a special case for arm only, and + actually, only armv5 or older uses the data_info buffer, + or when forcing arm instruction set mode besides thumb + available. + +2014-12-03 Paulo Andrade <pcpa@gnu.org> + + * doc/body.texi: Write detailed description and examples for + jit_get_memory_functions, jit_set_memory_functions, + jit_get_code, jit_set_code, jit_get_data and jit_set_data. + +2014-12-03 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, include/lightning/jit_private.h, + lib/lightning.c: Implement the new jit_set_data() interface, + and the new jit_get_data() helper. Like jit_set_code(), + jit_realize() should be called before jit_set_data(). + The most common usage should be jit_set_data(JIT_DISABLE_DATA + | JIT_DISABLE_NOTE), to force synthesize any float/double + constant in the stack and not generate any debug information. + + * lib/jit_note.c: Minor change to debug note generation as + now it uses an alternate temporary data buffer during constants + and debug generation to accommodate the possibility of the user + setting an alternate data buffer. + + * lib/jit_hppa-fpu.c, lib/jit_s390x.c, lib/jit_s390x-cpu.c, + lib/jit_s390x-fpu.c, lib/jit_sparc.c, lib/jit_sparc-fpu.c, + lib/jit_x86-sse.c, lib/jit_x86-x87.c: Implement jit_set_data. + + * lib/jit_hppa-sz.c, lib/jit_sparc-sz.c, lib/jit_x86-sz.c, + lib/jit_s390x-sz.c: Update for several instructions that now + have a different maximum length due to jit_set_data. + + * lib/jit_mips-fpu.c: Implement jit_set_data, but missing + validation on n32 and n64 abis (and/or big endian). + + * lib/jit_mips-sz.c: Update for changes in o32. + + * lib/jit_ppc-fpu.c: Implement jit_set_data, but missing + validation on Darwin PPC. + + * lib/jit_ppc-sz.c: Update for changes in powerpc 32 and + 64 bit. + + * lib/jit_ia64-fpu.c: Implement untested jit_set_data. + + * TODO: Add note to list ports that were not tested for the + new jit_set_data() feature, due to no longer having access + to them. + + * check/nodata.c: New file implementing a simple test exercising + several different conditions created by jit_set_data(). + + * check/check.nodata.sh: New file implementing a wrapper + over the existing *.tst files, that runs all tests without + using a data buffer for constants; only meaningful (and + enabled) on architectures that used to store float/double + constants on a read only data buffer. + + * configure.ac, check/Makefile.am: Update for the new test + cases. + + * check/lightning.c: Implement the new "-d" option that + sets an internal flag to call jit_set_data() disable + constants and debug, that is, using only a pure code + buffer. + +2014-11-03 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, include/lightning/jit_private.h, + lib/lightning.c: Implement the new jit_set_code() interface, + that allows instructing lightning to use an alternate code + buffer. The new jit_realize() function should be called + before jit_set_code(), and usually call jit_get_code() + to query the amount of bytes expected to be required for + the code. + + * lib/jit_size.c: Minor update to have less chances of + miscalculating the code buffer by starting the counter + with the size of the longest instruction instead of zero, + as code emit fails if at any moment less than the longest + instruction bytes are available. + + * check/setcode.c: New file implementing some basic tests + of the new jit_set_code() interface. + + * check/Makefile.am: Update for newer test case. + +2014-06-03 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, lib/lightning.c: Add the new + jit_indirect() call, that returns a special label node, + and tells lightning that the label may be the target of + an indirect jump. + + * doc/body.texi: Document the new jit_indirect() call, and + add examples of different ways to create labels and branches. + +2014-23-02 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_x86.c: Rewrite previous patch to inline save/restore + because clobbering %ebx in x86 is treated as an error + (jit_x86.c:239:5: error: PIC register clobbered by 'ebx' in 'asm'). + +2014-19-02 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_x86.c: Rewrite incorrect inline assembly that could + truncate a variable in a callee save register. Now it simply + tells gcc that the register is clobbered, instead of using a + *32 bit* swap with a temporary variable. The problem only + happens when compiling with optimization. + +2014-19-02 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_aarch64.h, include/lightning/jit_arm.h, + include/lightning/jit_hppa.h, include/lightning/jit_ia64.h, + include/lightning/jit_mips.h, include/lightning/jit_ppc.h, + include/lightning/jit_s390x.h, include/lightning/jit_sparc.h, + include/lightning/jit_x86.h: Change jit_regset_t to an + unsigned type, to allow safe right shift. + + * lib/lightning.c: Rewrite jit_regset_scan1 to allow easier + compiler optimization. + +2013-12-03 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_x86-x87.c: Correct wrong optimization when + loading the log(2) constant. + +2013-12-03 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_x86-cpu.c: Use the emms instruction before + calling any function. This is particularly important + when using c99 complex functions as it can easily + overflow the x87 stack due to the way lightning uses + the x87 stack as a flat register file. + +2013-12-02 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_x86-x87.c: Correct wrong code generation due + to comparing the base and not the value register with + %st(0) in stxi_f. + +2013-12-02 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_x86-x87.c, lib/jit_x86.c: Use 8 bytes aligned + stack offset for float/double x87 to/from sse move. + +2013-11-27 Paulo Andrade <pcpa@gnu.org> + + * configure.ac, lib/jit_arm-swf.c, lib/jit_arm.c: Add + changes that should at least allow building lightning + on Apple iOS7. + +2013-10-08 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ppc-cpu.c: Correct wrong shortcut for ldxi_l with + a zero offset, that was calling ldr_i instead of ldr_l. + +2013-10-08 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_arm.h, lib/jit_arm-cpu.c: Do not use + by default load/store instructions that map to ldrt/strt. + There is already the long displacement version for positive + offsets, and when using a (shorter) negative offset it does + not map to ldrt/strt. At least on qemu strt may cause + reproducible, but unexpected SIGILL. + +2013-10-08 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_arm-vfp.c: Correct wrong load/store offset + calculation when the displacement is constant but too + large to use an instruction with an immediate offset. + +2013-10-07 Paulo Andrade <pcpa@gnu.org> + + * check/self.c: Extend tests to validate jit_callee_save_p + does not cause an assertion on valid arguments, and test + extra registers defined on some backends. + + * configure.ac: Do not ignore environment CFLAGS when + checking if need to test runtime configurable options, + like use x87 when sse2 is available, arm instruction set + instead of thumb, etc. + + * include/lightning/jit_arm.h: Correct wrong jit_f macro + definition. + + * include/lightning/jit_ia64.h, include/lightning/jit_ppc.h: + Correct wrong jit_r macro definition. + + * lib/jit_x86-x87.c, lib/jit_x86.c: Actually use the + reserved stack space for integer to/from float conversion. + The stack space was also changed to ensure it is 8 bytes + aligned. Also, for Solaris x86 in 32 bit mode, an alternate + truncr_d was implemented because for some reason it is + failing with SIGILL if using the "fisttpl" instructions, + that must be available on p6 or newer, but for the sake of + making all tests pass, implement a 486 or newer sequence + if "sun" is defined. + +2013-10-03 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_mips.h, lib/jit_mips-cpu.c, + lib/jit_mips-sz.c, lib/jit_mips.c, size: Build and + pass all test cases on Irix big endian mips using + the 64 bit abi. + +2013-10-02 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_mips.h: Add proper mips abi detection. + +2013-09-30 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_print.c: Do not crash if calling jit_print from + gdb before actually emitting code. + + * lib/lightning.c: Correct misplaced check for already + visited blocks on conditional branches, what was preventing + proper merge live bit masks of forward blocks. + +2013-09-30 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_x86-cpu.c: Correct not properly tested case of using + %r12 as index register, what was causing an invalid assertion. + %r12 is mapped to the "extra" JIT_R3 register, and test cases + only test "standard" lightning registers. + +2013-09-28 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ia64.c: Minor change to force collecting the maximum + instruction length in the --enable-devel-get-jit-size build + mode. The actual generated file did not change because the + sampling was large enough that it had already collected proper + information in the previously slightly buggy code (not forcing + a sync of the instructions that could be combined). + +2013-09-27 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_arm.c: Correct build when disassembler is + disabled. + +2013-09-25 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ia64-cpu.c, lib/jit_ia64-fpu.c: Correct some + off by one range checks (that were only accepting values + one less than the maximum allowed) and an invalid test + condition check that was forcing it to always use + indirect jumps even when reachable with an immediate + displacement. + +2013-09-24 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_aarch64-sz.c, lib/jit_arm-sz.c, lib/jit_hppa-sz.c, + lib/jit_ia64-sz.c, lib/jit_mips-sz.c, lib/jit_ppc-sz.c, + lib/jit_s390x-sz.c, lib/jit_size.c, lib/jit_sparc-sz.c, + lib/jit_x86-sz.c: New files implementing static tables + with longest known instructions length generated to match + a lightning instruction. These tables should make it easier + to make it very unlikely to ever miscalculate, or by too + much, the size of a code buffer. + + * lib/jit_size.c: New file that aids to either collect + jit code size information, or use the information depending + on build options. + + * size.c: New helper file that parses input for, and create + an initial jit_$arch-sz.c file, that needs some minor edit + for arches with multiple configurations. + + * configure.ac, Makefile.am: Add the new, devel mode only + --enable-devel-get-jit-size configure option, that sets + compile time flags to collect jit code size information, + that will be used as input for the "noinst size program". + + * lib/jit_aarch64.c, lib/jit_arm.c, lib/jit_disasm.c, + lib/jit_hppa.c, lib/jit_ia64.c, lib/jit_memory.c, + lib/jit_mips.c, lib/jit_ppc.c, lib/jit_s390x.c, + lib/jit_sparc.c, lib/jit_x86.c, lib/lightning.c: Minor + changes for the --enable-devel-get-jit-size build mode, + as well as the "production build mode" with jit code + size information. + +2013-09-14 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, lib/lightning.c: Add the new + jit_pointer_p interface, that returns a boolean value + telling if the pointer argument is inside the jit + code buffer. This is useful to avoid the need to add + extra labels and calls to jit_address to figure bounds + of code buffer, and still keep internal data private. + +2013-09-13 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, include/lightning/jit_private.h, + lib/jit_note.c: Change the code argument of jit_get_note + to a jit_pointer_t and make jit_get_note a public interface. + It was intended so since start, as a way to map an offset + in the code to a function name, file name and line number + mapping. + +2013-09-11 Paulo Andrade <pcpa@gnu.org> + + * doc/body.texi: Correct reversed arguments in example of + usage in a (possibly) multi threaded, multiple jit_state_t + environments. + + * include/lightning/jit_arm.h, include/lightning/jit_private.h, + lib/jit_arm-cpu.c, lib/jit_arm.c: Make a previously, non + documented, global state private to the related jit_state_t + generating code. + +2013-09-10 Paulo Andrade <pcpa@gnu.org> + + * check/self.c, check/self.ok: New files implementing simple + consistency check assertions. At first validating some macros + that use values from different sources agree. + + * check/Makefile.am: Update for the new test case. + + * include/lightning.h, lib/lightning.c: Add the new + jit_callee_save_p() call, that is intended to be used when + writing complex code using lightning, so that one does not + need to verify what backend is being used, or have access to + private data, to query if a register is callee save or not; + on several backends the scratch registers are actually callee + save. + + * include/lightning/jit_aarch64.h, include/lightning/jit_arm.h, + include/lightning/jit_hppa.h, include/lightning/jit_mips.h, + include/lightning/jit_ppc.h, include/lightning/jit_sparc.h, + include/lightning/jit_x86.h: Add an explicit definition for + JIT_R3-JIT_Rn, JIT_V3-JIT_Vn and JIT_F6-JIT_Fn when applicable. + This allows one to write code based on "#if defined(JIT_XN)" + and therefore, not need to check what is the current backend + or have access to private data structures. This is particularly + useful when writing virtual machines with several specialized, + global registers. + + * lib/jit_ia64.c: Properly flag the callee save general + purpose registers as such, so that jit_callee_save_p() works + as intended. + +2013-09-10 Paulo Andrade <pcpa@gnu.org> + + * check/lightning.c, configure.ac: Conditionally use the + code written to workaround a bug in the Hercules emulator, + as isnan and isinf are not available at least on HP-UX ia64. + +2013-09-10 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_s390x-cpu.c: Spill/reload correct callee save + float registers. + +2013-09-10 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_hppa-cpu.c: Correct code to call a function stored + in a register or a patched function address. + +2013-09-10 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ia64-cpu.c: Correct incorrect logic when restoring + the value of the "r2" callee save register. + +2013-08-29 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_arm-cpu.c, lib/jit_arm.c: Correct wrong test and update + of the thumb offset information, when checking if needing to + patch a jump from arm to thumb mode. The problem would happen when + remapping the code buffer, and the new address being lower than + the previous one. + +2013-08-26 Paulo Andrade <pcpa@gnu.org> + + * configure.ac: Extend FreeBSD test to also handle NetBSD. + + * lib/jit_x86-cpu.c: Correct wrongly defined offset type of + ldxi_ui. Problem detected when building on NetBSD. + + * lib/lightning.c: Adjust code to handle NetBSD mremap, + where arguments do not match Linux mremap. + +2013-08-26 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ppc.c: Correct C sequence point problem miscalculating + the actual function address in a function descriptor. Problem + happens with gcc 4.8.1 at least. + +2013-08-11 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_s390x-cpu.c: Correct code checking if immediate + fits instruction, but using the negated value. + +2013-07-28 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_s390x.h, lib/jit_s390x-cpu.c, + lib/jit_s390x-fpu.c, lib/jit_s390x.c: New files + implementing the new s390x port. + + * configure.ac, include/lightning.h, + include/lightning/Makefile.am, + include/lightning/jit_private.h, + lib/Makefile.am, lib/jit_disasm.c, lib/lightning.c: + Minor adaptation for the new s390x backend. + + * check/float.tst: Update for the s390x result of + truncating +Inf to integer. + + * check/qalu_mul.tst: Add extra test cases to better test + high word of signed multiplication as the result is + adjust from unsigned multiplication on s390x. + +2013-07-28 Paulo Andrade <pcpa@gnu.org> + + * check/lightning.c: Do not assume casting a double NaN or + Inf to float will produce the expected float NaN or Inf. + This is not true at least under s390x. + +2013-07-28 Paulo Andrade <pcpa@gnu.org> + + * check/check.arm.sh, check/check.sh, check/check.swf.sh, + check/check.x87.sh: Properly check test programs output, + not just rely on the test program self testing the results + and not crashing. + +2013-07-28 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_aarch64.c: Remove unused macros left from cut&paste + of jit_arm.c. + +2013-07-16 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_aarch64.h, lib/jit_aarch64-cpu.c, + lib/jit_aarch64-fpu.c, lib/jit_aarch64.c: New files + implementing the new aarch64 port, as a new architecture, + not as an expansion of the existing armv[4-7] port. + + * check/lightning.c: Add aarch64 support and a small + change to recognize character constants as immediate + values. + + * check/float.tst: Add aarch64 preprocessor conditionals + to select proper expected value when converting [+-]Inf + and NaN to integer. + + * include/lightning/jit_arm.h, lib/jit_arm.c: Minor changes + to better match the new aarch64 files. + + * configure.ac, include/lightning.h, + include/lightning/Makefile.am, include/lightning/jit_private.h, + lib/Makefile.am, lib/lightning.c: Minor adjustments + for the aarch64 port. + +2013-07-08 Paulo Andrade <pcpa@gnu.org> + + * NEWS, THANKS, configure.ac, doc/version.texi: Update for + the 1.99a second alpha release. + +2013-06-25 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_mips.c: Correct cut&paste error that caused wrong + stack offset calculation for double arguments in stack in + the o32 abi. + Correct typo in the __LITTLE_ENDIAN macro name, that came + from cut&paste error in the original typo in lib/jit_ppc.c. + + * lib/jit_ia64.c, lib/jit_ppc.c: Correct typo in the + __LITTLE_ENDIAN macro name. + +2013-06-22 Paulo Andrade <pcpa@gnu.org> + + * check/lightning.c, configure.ac, include/lightning.h, + lib/lightning.c: Add tests and quirks to build/detect + and/or work on Irix. + + * include/lightning/jit_mips.h, lib/jit_mips-cpu.c, + lib/jit_mips-fpu.c, lib/jit_mips.c: Adapt code to run + in big endian mips, using the n32 abi. + +2013-06-18 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h: Minor extra preprocessor testing + to "detect" byte order on x86 solaris, that now builds + and pass all test cases. + +2013-06-18 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_sparc-cpu.c: Correct compiler warning of value + used before assignment. The usage is bogus as the api + requires always patching jumps, but the random value used + could cause an assertion due to invalid displacement. + + * lib/jit_sparc.c: Always load and store double arguments + in stack as 2 float loads or stores, for safety, as unaligned + access is not allowed in Sparc Solaris. + +2013-06-14 Paulo Andrade <pcpa@gnu.org> + + * configure.ac: Force -mlp64 to CFLAGS on HP-UX ia64 port. + It is the only supported mode, and expects gcc as C compiler. + + * include/lightning.h, lib/jit_ia64-cpu.c, lib/jit_ia64.c: + Correct ia64 port to work on HP-UX that runs it in big endian + mode. + +2013-06-10 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_hppa.c: Sanitize the cache synchronization inline + assembly code that was doing twice the work and redundantly + flushing the end address every loop iteration. + +2013-06-09 Paulo Andrade <pcpa@gnu.org> + + * configure.ac, check/Makefile.am, doc/Makefile.am: Do not + explicitly link to -ldl, but instead autodetect the library + with dlopen, dlsym, etc. + + * check/lightning.c: Add workaround to apparently buggy + getopt in HP-UX that sets optind to the wrong index, and + use RTLD_NEXT on HP-UX instead of RTLD_DEFAULT to dlsym + global symbols. + + * include/lightning.h: Rework definitions of wordsize and + byte order to detect proper values on HP-UX. + + * lib/lightning.c: Minor correction to use MAP_ANONYMOUS + instead of MAP_ANON on HP-UX. + + * lib/jit_hppa.c: Float arguments must be passed on integer + registers on HP-UX, not only for varargs functions. + Add code to properly clear instruction cache. This was + not required on Debian hppa port, but may have been working + by accident. + + * lib/jit_hppa-cpu.c: Follow pattern of HP-UX binaries and + use bve,n instead of bv,n to return from functions. + + * lib/jit_hppa-fpu.c: For some reason "fst? frX,rX,(rY)" did + not work on the tested computer (HP-UX B.11.23 U 9000/785 HP-UX) + so the code was changed, at first for __hpux only to add the + base and offset register and use the instruction with an + immediate (zero) offset. + +2013-06-07 Paulo Andrade <pcpa@gnu.org> + + * check/lightning.c, lib/jit_disasm.c, lib/jit_ppc-cpu.c, + lib/jit_ppc-fpu.c, lib/jit_ppc.c, include/lightning.h, + include/lightning/jit_ppc.h, include/lightning/jit_private.h: + Adapt code to work on 32 bit AIX ppc using gcc. Most changes + are basically to adapt the elf64 logic to 32 bit, as it does + not use the same convention of 32 bit Darwin ppc. + + * check/stack.tst: Add a fake memcpy function to the test + case if running under AIX, as it is not available to dlsym. + + * configure.ac: Check for getopt.h header, not available in + AIX. + +2013-06-01 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_hppa.h, lib/jit_hppa-cpu.c, + lib/jit_hppa-fpu.c, lib/jit_hppa.c: New files implementing + the hppa port. Built on Debian Linux PA-RISC 2.0, 32 bit. + + * check/float.tst: Add preprocessor for hppa expected + values when converting NaN and +-Inf to an integer. + + * check/ldst.inc: Ensure double load/store tests use an + 8 byte aligned address by default. + + * lib/lightning.c: Correct a bug found during tests in + the new port, where qmul* and qdiv* were not properly + setting one of the result registers as modified in the + function, what would be a problem if the only "write" + usage were the qmul* or qdiv*. + + * check/varargs.tst, check/varargs.ok: Add one extra + interleaved integer/double test to validate proper code + generation in the extra case. + + * check/lightning.c, configure.ac, include/lightning.h, + include/lightning/Makefile.am, + include/lightning/jit_private.h, lib/Makefile.am, + lib/jit_disasm.c: Update for the hppa port. + +2013-04-27 Paulo Andrade <pcpa@gnu.org> + + * check/varargs.tst: Correct misplaced .align directive + that was causing the double buffer to not be aligned at + 8 bytes. + * lib/jit_ia64-cpu.c: + Properly implement abi for excess arguments passed on + stack. + Simplify load/store with immediate displacement argument + with zero value. + Simplify some calls to "subi" changing to "addi" with + a negative argument. + Remove some #if 0'ed code, that could be useful in + special conditions, but the most useful one would be + to "optimize" "static" jit functions, but for the sake + of simplicity, jit functions are implemented in a way + that can be passed back to C code as C function pointers. + Add an attribute to prototypes of several unused functions. + These functions are defined for the sake of implementing all + Itanium documented instructions, but a significant amount of + them is not used by lightning. + * lib/jit_ia64-fpu.c: Simplify load/store with zero immediate + displacement and add unused attribute for functions not used + by lightning, but required to provide macros implementing all + Itanium documented instructions. + * lib/jit_ia64.c: Update for the properly implemented abi + for stack arguments. + * lib/lightning.c: Mark an unused function as such. + +2013-04-27 Paulo Andrade <pcpa@gnu.org> + + lib/jit_ia64-cpu.c: + Correct immediate range check of integer comparisons when + inverting arguments. + Correct gei_u that was not decrementing immediate when + inverting arguments. + Correct b?add* and b?sub* that were not properly updating + the result register. + +2013-04-27 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ia64-cpu.c: Correct wrong mapping of 2 instructions + in "M-, stop, M-, stop" translation, that was ignoring the + last stop (implemented as a nop I- stop). + + * lib/jit_ia64-fpu.c: Properly implement fnorm.s and fnorm.d, + as well as the proper integer to float or double conversion. + +2013-04-27 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ia64-cpu.c: Correct bogus implementation of ldr_T + for signed integers, that was using ld1.s, ld2.s and ld4.s. + The ".s" stands for speculative load, not sign extend. + + * lib/jit_ia64-fpu.c: Correct bogus implementation of ldxr_T + for float and double. The third (actually, second) argument + is indeed added to the base register, but the base register + is modified. The actual M7 implementation was already correct, + just the ldxr_f and ldxr_d implementation that was kept in + a prototype state, misinterpreting what M7 does. + +2013-04-27 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ia64-cpu.c: Correct X2 pattern matching by preventing + it to attempt to require a stop between the L and the X + instruction; that is, check the registers and predicates + before emitting the L instruction, not after. + + * lib/jit_ia64-fpu.c: Slightly simplify and correct + divr_f and divrd_d implementation. + + * check/lightning.c: Add __ia64__ preprocessor define + on Itanium. + + * check/alu.inc, check/clobber.tst, check/float.tst: Define + several macros conditionally to __ia64__. This is required + because __ia64__ jit generation can use way too many memory, + due to not implementing instruction reordering to avoid + as much as possible "stops", what causes way too many nops + to be generated, as well as the fact that division and + remainder requires function calls, and float division + requires significant code to implement. + +2013-04-27 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h: Add new backend specific movr_w_d, + movr_d_w and movi_d_w codes as helpers to ia64 varargs + functions arguments. + + * lib/jit_ia64-cpu.c: + Correct wrong encoding of A5 small integers. + Correct define of "mux" instruction modifiers. + Correct ordering of arguments and predicates of cmp_xy + implementation with immediate arguments; like most other + codes with an immediate, the immediate is the second, not + the third argument. + + * lib/jit_ia64-fpu.c: Actual implementation of the code + to move to/from gpr to/from fpr, to implement varargs abi. + + * lib/jit_ia64.c: Make fpr argument registers not allocatable + as temporaries, no need for the extra checks when there are + plenty registers. + + * lib/jit_print.c, lib/lightning.c: Minor updates for the + new movr_w_d, movr_d_w and movi_d_w codes. + +2013-04-26 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ia64-cpu.c, lib/jit_ia64-fpu.c: Correct code to + also insert a stop to break an instruction group if a + register is written more than once in the same group. + This may happen if a register is argument and result of + some lightning call (not a real instruction). The most + common case should be code in the pattern: + movl rn=largenum + ... + mov rn=smallnum + where "rn" would end up holding "largenum". + But the problem possibly could happen in other circumstances. + +2013-04-26 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_ia64.h, lib/jit_ia64-cpu.c, + lib/jit_ia64-fpu.c, lib/jit_ia64.c: + Relocate JIT_Rn registers to the local registers, as, like + float registers, div/rem and sqrt are implemented as function + calls, and may overwrite non saved scratch registers. + Change patch_at to receive a jit_code_t instead of a + jit_node_t, so that it is easier to "inline" patches when + some instruction requires complex code to implement, e.g. + uneq and ltgt. + Correct arguments to FMA and FMA like instructions that, + due to a cut&paste error were passing the wrong argument + to the related F- implementation function. + Rewrite ltgt to return the proper result if one (or both) + of the arguments is unordered. + +2013-04-26 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_ia64.h, include/lightning/jit_private.h, + lib/jit_ia64-cpu.c, lib/jit_ia64-fpu.c, lib/jit_ia64.c, + lib/lightning.c: Rework code to detect need of a "stop" to + also handle predicates, as if a predicate is written, it + cannot be read in the same instruction group. + Use a single jit_regset_t variable for all registers when + checking need for a stop (increment value by 128 for + float registers). + Correct wrong "subi" implementation, as the code executed + is r0=im-r1, not r0=r1-im. + Use standard lightning 6 fpr registers, and rework to + use callee save float registers, that may be spill/reloaded + in prolog/epilog. This is required because some jit + instructions implementations need to call functions; currently + integer div/mod and float sqrt, what may change the value of + scratch float registers. + Rework point of "sync" of branches that need to return a + patch'able address, because the need for a "stop" before a + predicate read causes all branches to be the instruction + in slot 0, as there is no template to "stop" and branch + in the same instruction "bundle". + +2013-04-25 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_ia64.h, lib/jit_ia64-cpu.c, + lib/jit_ia64-fpu.c, lib/jit_ia64.c: New files implementing + the basic infrastructure of an Itanium port. The code + compiles and can generate jit for basic hello world like + functions. + + * check/lightning.c, configure.ac, include/lightning.h, + include/lightning/Makefile.am, include/lightning/jit_private.h, + lib/Makefile.am, lib/lightning.c: Update for the Itanium + port. + + * lib/jit_mips-cpu.c, lib/jit_mips.c: Correct typo and + make the jit_carry register local to the jit_state_t. + This matches code reviewed in the Itanium port, that + should use the same base logic to handle carry/borrow. + +2013-04-10 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_private.h, lib/jit_arm.c, + lib/jit_mips-cpu.c, lib/jit_mips.c, lib/jit_ppc-cpu.c, + lib/jit_ppc.c, lib/jit_print.c, lib/jit_sparc-cpu.c, + lib/jit_sparc.c, lib/jit_x86-cpu.c, lib/jit_x86.c, + lib/lightning.c: Change all jit_regset macros to take + a pointer argument, to avoid structure copies when + adding a port to an architecture with more than 64 + registers. + +2013-04-08 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_arm.c, lib/jit_ppc.c: Do not rely on __clear_cache + aligning to the next page boundary the end argument. It may + actually truncate it. + +2013-03-29 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_private.h, lib/jit_arm.c, lib/jit_memory.c, + lib/jit_mips.c, lib/jit_ppc.c, lib/jit_sparc.c, lib/jit_x86.c, + lib/lightning.c: Do not start over jit generation if can grow + the code buffer with mremap without moving the base pointer. + +2013-03-29 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_memory.c: Implement a simple memory allocation wrapper + to allow overriding calls to malloc/calloc/realloc/free, as well + as ensuring all memory containing pointers is zero or points to + allocated memory. + + * include/lightning.h, include/lightning/jit_private.h: Definitions + for the memory allocation wrapper. + + * lib/Makefile.am: Update for new jit_memory.c file. + + * lib/jit_arm.c, lib/jit_disasm.c, lib/jit_mips.c, lib/jit_note.c, + lib/jit_ppc.c, lib/jit_sparc.c, lib/jit_x86.c, lib/lightning.c: + Use the new memory allocation wrapper code. + +2013-03-22 Paulo Andrade <pcpa@gnu.org> + + * configure.ac, include/lightning/jit_private.h, lib/lightning.c: + Remove dependency on gmp. Only a simple bitmap was required, and + that was not enough reason to force linking to gmp and possible + complications caused by it. + +2013-03-10 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h: Add check for __powerpc__ defined + in Linux, while Darwin defines __ppc__. + + * include/lightning/jit_ppc.h: Adjust register definitions + for Darwin 32 bit and Linux 64 bit ppc usage and/or ABI. + + * include/lightning/jit_private.h: Add proper check for + Linux __powerpc__ and an data definition for an workaround + to properly handle code that starts with a jump to a "main" + label. + + * lib/jit_disasm.c: Add extra disassembler initialization + for __powerpc64__. + + * lib/jit_ppc-cpu.c: Add extra macros and functions, and + correct/adapt previous ones to handle powerpc64. + + * lib/jit_ppc-fpu.c: Adapt for 64 bit wordsize. Basically + add conversion from/to int32/int64 and proper handling of + load/store offsets too large for 32 bit. + + * lib/jit_ppc.c: Add calls to 64 bit codes and adaptation + for the PowerPC 64 bit Linux ABI. + + * lib/jit_arm.c, lib/jit_mips.c, lib/jit_sparc, lib/jit_x86.c, + lib/lightning.c: Correct off by one error when restarting jit + of a function due to finding too late that needs to spill/reload + some register. Problem was found by accident on a very special + condition during PowerPC 64 code adaptation. + +2013-03-08 Paulo Andrade <pcpa@gnu.org> + + * check/lightning.c: Add missing ppc preprocessor definition. + +2013-03-06 Paulo Andrade <pcpa@gnu.org> + + * check/float.tst: Comment out the int to negative infinity + test in mips for the moment because not all Loongson agrees + on the result. + + * lib/jit_disasm.c: Add a test instead of an assertion + when loading symbols for disassembly due to a failure with + a simple binutils build in Debian mipsel64. + +2013-03-06 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_private.h, lib/jit_arm-cpu.c, + lib/jit_arm.c, lib/jit_disasm.c, lib/jit_mips-cpu.c, + lib/jit_mips.c, lib/jit_note.c, lib/jit_ppc-cpu.c, + lib/jit_ppc.c, lib/jit_print.c, lib/jit_sparc-cpu.c, + lib/jit_sparc.c, lib/jit_x86-cpu.c, lib/jit_x86.c, + lib/lightning.c: Add an extra structure for data storage + during jit generation, and release it after generating + jit, to reduce a bit memory usage, and also to make it + easier to understand what data is available during + jit runtime. + +2013-03-06 Paulo Andrade <pcpa@gnu.org> + + * lib/lightning.c: Make data and code buffer readonly. + +2013-02-20 Paulo Andrade <pcpa@gnu.org> + + * doc/body.texi: Fool proof validate the examples of what + an assembly-language programmer would write and correct the + wrong sparc example. + +2013-02-19 Paulo Andrade <pcpa@gnu.org> + + * doc/body.texi: Add back the SPARC code generation example. + +2013-02-19 Paulo Andrade <pcpa@gnu.org> + + * check/lightning.c: Remove state flag to work with partial + sparc port, by just disassembling if there was incomplete + code generation. + + * jit_sparc-cpu.c: Correct wrong range check for immediate + integer constants (off by one bit shift). + Correct macro implementing equivalent "rd %y, rd" assembly. + Implement qmul* and qdiv*. + + * jit_sparc.c: Update for qmul* and qdiv* and remove logic + to handle incomplete code generation during sparc port. + +2013-02-18 Paulo Andrade <pcpa@gnu.org> + + * check/float.tst: Add sparc to list of known NaN and +-Inf + to integer conversion. + + * check/lightning.c: Define __sparc__ to preprocessor in + the sparc backend. + + * include/lightning/jit_private.h: Correct wrong definition + of emit_stxi_d, that has lived for a long time, but would + cause problems whenever needing to spill/reload a float + register. + + * include/lightning/jit_sparc.h: Can only use %g2,%g3,%g4 + for scratch variables, as other "global" registers are + reserved for the system, e.g. libc. + Reorder float register naming to make it easier to + access odd float registers, so that generating code for + pusharg and getarg is easier for the IR. + + * lib/jit_mips-cpu.c, lib/jit_ppc-cpu.c: Update to match + new code in jit_sparc-cpu.c. It must call jit_get_reg + with jit_class_nospill if using the register to move + an unconditional branch address to it, as the reload + will not happen (actually could happen in the delay + slot...) + + * lib/jit_sparc-cpu.c: Correct wrong macro definition for + ldxr_s. + Properly implement div* and implement rem. Div* needs + to use the y register, and rem* needs to be synthesized. + Correct b?sub* macro definitions. + + * lib/jit_sparc-fpu.c: Correct reversed float to/from double + conversion. + Correct wrong jit_get_reg call asking for a gpr and then + using the fpr with that number. + Correct wrong branch displacement computation for + conditional branches. + + * lib/jit_sparc.c: Correct getarg_d and pushargi_d implementation. + Add rem* entries to the switch converting IR to machine code. + + * lib/lightning.c: Correct a problem detected when adding + the jit_class_nospill flag to jit_get_reg, that was caused + when having a branch to an "epilog" node, what would cause + the code to think all registers in unknown state were live, + while in truth, all registers in unknown state in the + "just after return" point are actually dead. + +2013-02-17 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_sparc.h, lib/jit_sparc-cpu.c, + lib/jit_sparc-fpu.c, lib/jit_sparc.c: New files implementing + the basic framework of the sparc port. + + * configure.ac, include/lightning.h, include/lightning/Makefile.am, + include/lightning/jit_private.h, lib/jit_disasm.c: Update + for the sparc port framework. + + * lib/jit_mips.c: Correct reversed retr/reti logic. + + * lib/jit_ppc.c: Correct misspelled __LITTLE_ENDIAN. + + * lib/lightning.c: Always do byte hashing in hash_data, because + the logic to "compress" strings causes large pointers to not + be guaranteed aligned at 4 byte boundaries. + Update for the sparc port framework. + +2013-02-11 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_arm.c: Correct jit_pushargi_f in the arm hardfp abi. + Most of the logic uses even numbered register numbers, so that + a float and a double can be used in the same register, but + the abi requires packing the float arguments, so jit_pushargi_f + needs to allocate a temporary register to modify only the + proper register argument (or be very smart to push two + immediate arguments if applicable). + +2013-02-11 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, lib/lightning.c: Implement the new + jit_clear_state and jit_destroy_state calls. jit_clear_state + releases all memory not required during jit_execution; that + is, leaves only the mmap'ed data and code buffers allocated. + jit_destroy_state releases the mmap'ed buffers as well as + the jit_state_t object itself, that holds pointers to the + code and data buffers, as well as annotation pointers (for + disassembly or backtrace) in the data buffer. + + * lib/jit_note.c: Correct invalid vector offset access. + + * check/ccall.c, check/lightning.c, doc/ifib.c, doc/incr.c, + doc/printf.c, doc/rfib.c, doc/rpn.c: Use the new jit_clear_state + and jit_destroy_state calls, to demonstrate the new code to + release all jit memory. + + * doc/body.texi: Add basic documentation and usage description + of jit_clear_state and jit_destroy_state. + +2013-02-11 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_private.h, lib/jit_note.c, lib/lightning.c: + Store all annotation information in the mmap'ed area reserved for + read only data. This adds code to not allocate memory for jit_note_t + objects, and to relocate jit_line_t objects and its contents after + calculating annotation information. The jit_line_t objects are + relocated because it is not possible to always calculate before + hand data layout because note information may be extended or + redundant entries removed, as well as allowed to be added in + non sequential order. + A bug was also corrected in _jit_set_note, that was causing it + to allocate new jit_line_t objects when not needed. It was still + working correctly, but allocating way more memory than required. + +2013-02-05 Paulo Andrade <pcpa@gnu.org> + + *include/lightning.h, lib/lightning.c: Add the new jit_live code + to explicitly mark a register as live. It is required to avoid + assuming functions always return a value in the gpr and fpr return + register, and to avoid the need of some very specialized codes + that vary too much from backend to backend, to instruct the + optimization code the return register is live. + + * lib/jit_arm.c, lib/jit_mips.c, lib/jit_ppc.c, lib/jit_print.c, + lib/jit_x86.c: Update for the new jit_live code. + + * check/ret.ok, check/ret.tst: New files implementing a simple + test case that would previously fail at least in ix86/x86_64. + + * check/Makefile.am: Update for new "ret" test case. + +2013-02-05 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ppc-cpu.c, lib/jit_ppc.c: Validate and correct + problems in the qmul and qdiv ppc implementation. + +2013-02-04 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, include/lightning/jit_private.h, + lib/jit_arm-cpu.c, lib/jit_arm.c, lib/jit_mips-cpu.c, + lib/jit_mips.c, lib/jit_ppc-cpu.c, lib/jit_ppc.c, + lib/jit_x86-cpu.c, lib/jit_x86.c, lib/lightning.c: + Implement the new qmul and qdiv instructions that return signed + and unsigned lo/hi multiplication result and div/rem division result. + These should be useful for jit translation of code that needs to + know if a multiplication overflows (no branch opcode added) or if + a division is exact (easy check if remainder is zero). + + * check/lightning.c, lib/jit_print.c, check/Makefile.am, + check/all.tst: Update for the new qmul and qdiv instructions. + + * check/qalu.inc, check/qalu_div.ok, check/qalu_div.tst, + check/qalu_mul.ok, check/qalu_mul.tst: New files implementing + simple test cases for qmul and qdiv. + +2013-01-30 Paulo Andrade <pcpa@gnu.org> + + * doc/body.texi: Correct "jmpi" description that incorrectly + told it was possible to pass any address as jump target. The + only way to do that is "movi+jmpr". + +2013-01-30 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_x86-cpu.c: Correct undefined behavior code. + http://gcc.gnu.org/bugzilla/show_bug.cgi?id=56143 + +2013-01-29 Paulo Andrade <pcpa@gnu.org> + + * configure.ac: Use AC_CONFIG_HEADERS instead of AC_CONFIG_HEADER + to have HAVE_CONFIG_H defined with latest aclocal. + + * include/lightning/jit_private.h, lib/lightning.c: Add new + abstraction to use an heuristic to calculate amount of space + required for jit generation, and code to reallocate buffer if + did miscalculate it. + + * lib/jit_arm.c, lib/jit_mips.c, lib/jit_ppc.c, lib/jit_x86.c: + Update to use new code to estimate and resize of required buffer + for jit code. + + * lib/jit_x86-cpu.c: Minor cosmetic change to avoid adding a + non required rex prefix when calling a function pointer stored + in a register. + +2013-01-24 Paulo Andrade <pcpa@gnu.org> + + * check/Makefile.am: "make debug" target should pass only + the main test tool program as argument for running gdb + + * configure.ac: Add the --enable-assertions options. + + * doc/Makefile.am, doc/body.texi, doc/lightning.texi: + Major rewrite of the documentation to match the current + implementation. + + * doc/version.texi: Automatic date update. + + * doc/ifib.c, doc/incr.c, doc/printf.c, doc/rfib.c, doc/rpn.c: + Implementation of the documentation examples, that are also + compiled during a normal build. + + * doc/p-lightning.texi, doc/porting.texi, doc/toc.texi, + doc/u-lightning.texi, doc/using.texi: These files were + renamed in the documentation rewrite, as the documentation + was significantly trimmed due to full removal of the porting + chapters. Better porting documentation should be added but + for the moment it was just removed the documentation not + matching the implementation. + +2013-01-18 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_note.c: Correct bounds check and wrong code keeping + a pointer that could be changed after a realloc call. + +2013-01-18 Paulo Andrade <pcpa@gnu.org> + + * check/3to2.tst, check/add.tst, check/allocai.tst, check/bp.tst, + check/call.tst, check/ccall.c, check/clobber.tst, check/divi.tst, + check/fib.tst, check/ldsti.tst, check/ldstr-c.tst, check/ldstr.tst, + check/ldstxi-c.tst, check/ldstxi.tst, check/ldstxr-c.tst, + check/ldstxr.tst, check/lightning.c, check/rpn.tst, check/stack.tst, + check/varargs.tst, include/lightning.h, + include/lightning/jit_private.h, lib/jit_arm.c, lib/jit_disasm.c, + lib/jit_mips.c, lib/jit_note.c, lib/jit_ppc.c, lib/jit_print.c, + lib/jit_x86.c, lib/lightning.c: Extend the "jit_note" abstraction + with the new "jit_name" call, that receives a string argument, and + should usually be called to mark boundaries of functions of code + generating jit (that is, it is not expected that the language + generating jit map its functions to jit functions). + +2013-01-17 Paulo Andrade <pcpa@gnu.org> + + * check/add.tst, check/allocai.tst, check/bp.tst, check/divi.tst, + check/fib.tst, check/lightning.c, include/lightning/jit_arm.h, + include/lightning/jit_mips.h, include/lightning/jit_ppc.h, + include/lightning/jit_private.h, include/lightning/jit_x86.h: + Make JIT_RET, JIT_FRET and JIT_SP private. These should not be + used in any operations due to frequently having special + constraints (usually JIT_FRET). JIT_FP must be made available + because it must be used as the base register to access stack + space allocated with jit_allocai. + +2013-01-14 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, lib/lightning.c: Add an extra align + argument to the jit_data call (that should be made private), + so that it should not align strings at 8 bytes. + Correct the jit_note call to include the null ending byte + when adding label/note names to the "jit data section". + +2013-01-11 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_note.c: New file implementing a simple string+integer + annotation, that should be used to map filename and line number + to offsets in the generated jit. + + * include/lightning.h, lib/lightning.c: Update for the new + note code. + Add an extra mandatory argument to init_jit, that is used + as argument to bfd_openr. + Change from generic void* to char* the argument to jit_note + and add an extra integer argument, to map to filename and + line number. + + * check/ccall.c, check/lightning.c, include/lightning/jit_private.h, + lib/jit_arm.c, lib/jit_disasm.c, lib/jit_mips.c, lib/jit_ppc.c, + lib/jit_print.c, lib/jit_x86.c: lib/Makefile.am: Update for the + new annotation code. + + * configure.ac, check/Makefile.am: Update to work with latest + automake. + +2013-01-09 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, lib/jit_arm.c, jit_mips-fpu.c, + lib/jit_mips.c, lib/jit_print.c, lib/jit_x86.c, lib/lightning.c: + Remove the jit_code_getarg_{f,d} and jit_code_pusharg{i,r}_{f,d} + calls, replacing them with the new, internal only, jit_movr_w_f, + jit_mov{r,i}_f_w, jit_movr_ww_d, and jit_mov{i,r}_d_ww, that + better describe the operation being done, and allow removing + the hackish code to detect special conditions for arm when + moving from/to vfp from/to a grp register pair. + Rename jit_code_retval_{f,d} to jit_code_x86_retval_{f,d} as + it is specific to 32 bit x86, and used to move abi return + value in x87 register to a sse register. + +2013-01-05 Paulo Andrade <pcpa@gnu.org> + + * check/cccall.c, check/ccall.ok: New test case to validate + interleaved calls from/to C code and jit. + + * check/Makefile.am: Update for the new ccall test case. + + * include/lightning.h, lib/lightning.c: Add the new jit_address + call that returns the real/final address of a "note" in the + generated jit. It requires a jit_node_t as returned by the + jit_note call, and is only valid after calling jit_emit. + Add an intermediate solution to properly handle arm + soft and softfp modes that move a double to an integer register + pair. Currently it just adds extra tests for the condition, + but the proper solution should be to have extra lightning + codes for these conditions, codes which should be only used + by the backends that need it, and merged with the existing + jit_pusharg*_{f,d}. + + * include/lightning/jit_private.h: Add new jit_state_t flag + to know it finished jit_emit, so that calls to jit_address + are valid. + + * lib/jit_mips.c: Correct abi implementation so that the + new ccall test case pass. Major problem was using + _jit->function.self.arg{i,f} as boolean values, but that + would cause lightning.c:patch_registers() to incorrectly + assume only one register was used as argument when calling + jit_regarg_p(); _jit->function.self.arg{i,f} must be the + number of registers used as arguments (in all backends). + + * lib/jit_x86.c: Add workaround, by marking %rax as used, + to a special condition, when running out of registers and the + allocator trying to spill and reload %rax, but %rax was used + as a pointer to a function, what would cause the reload to + destroy the return value. This condition can be better + generalized, but the current solution is good enough. + + * include/lightning/jit_ppc.h, lib/jit_ppc-cpu.c, lib/jit_ppc.c: + Rewrite logic to handle arguments, as the original code was + written based on a SysV pdf about the generic powerpc ABI, + what did "invent" a new abi for the previous test cases, but + failed in the new ccall test in Darwin PPC. Now it properly + handles 13 float registers for arguments, as well as proper + computation of stack offsets when running out of registers + for arguments. + +2013-01-02 Paulo Andrade <pcpa@gnu.org> + + * check/float.tst: Correct test case to match ppc also + converting positive infinity to 0x7fffffff. + + * lib/jit_arm-swf.c: Correct typos with double underscores. + + * lib/lightning.c: Correct remaining wrong reverse jump logic. + +2012-12-29 Paulo Andrade <pcpa@gnu.org> + + * lib/lightning.c: Correct both, wrong and confusing logic + to compute the reverse of a jump. Now it properly matches + C semantics for "eq" (==) and "ne" (!=) and correct computation + of reverse of "uneq" as "gt". + + * check/branch.tst: Update "ne" float branch check that + previously happened to be wrongly tested with a NaN argument. + +2012-12-29 Paulo Andrade <pcpa@gnu.org> + + * check/float.ok, check/float.tst: New test cases implementing + extensive validation of float comparison and branch code + generation as well as integer conversion, involving NaN and + [+-]Inf. + + * lib/jit_arm-swf.c, lib/jit_x86-sse.c, lib/jit_x86-x87.c: + Correct bugs found by new float test case. + + * lib/jit_x86.c: Correct cut&paste error added in commit to + convert jit_arg* return value to a jit_node_t*, that would + cause it to not properly handle double arguments in ix86. + + * check/Makefile.am: Update for the new test case. + +2012-12-28 Paulo Andrade <pcpa@gnu.org> + + * check/lightning.c, include/lightning.h, lib/jit_arm.c, + lib/jit_mips.c, lib/jit_ppc.c, lib/jit_print.c, lib/jit_x86.c, + lib/lightning.c: Change return value of jit_arg{,_f,_d} to + a jit_node_t* object, that should be used as argument to + jit_getarg_{c,uc,s,us,i,ui,l,f,d}. This just requires changing + from jit_int32_t to jit_pointer_t (or jit_node_t*) the "handle" + for the getarg calls, with the benefit that it makes it easy + to implement patching of the stack address of non register + arguments, this way allowing to implement variable size stack + frames if applicable; useful if there are too many registers and + jit functions uses only a few callee save registers. + +2012-12-27 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_arm.c, lib/jit_mips-cpu.c, lib/jit_mips.c: Correct + regressions when patching jit_calli for a forward function. + + * lib/jit_ppc-cpu.c: Correct wrong arguments to ANDI opcode + in jit_getarg_u{c,s} implementation. + +2012-12-23 Paulo Andrade <pcpa@gnu.org> + + * check/call.ok, check/call.tst: New test cases to validate + simple typed argument and return values in function calls. + + * check/lightning.c: Properly handle jit_movi of labels for + backward and forward code labels. + + * check/Makefile.am: Update for new test case. + +2012-12-23 Paulo Andrade <pcpa@gnu.org> + + * check/carry.ok, check/carry.tst: New test case to validate + carry condition handling. + + * check/Makefile.am: Update for new test case. + +2012-12-22 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ppc-cpu.c, lib/jit_ppc.c: Implement logic for + jit_htonr for big endian, so that ppc (big endian) pass the + new clobber.tst test case. + +2012-12-22 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_arm.c: Correct use of wrong argument offset + variable in armv7l or float/double argument for varargs + function in armv7hl. + Correct jit_getarg* logic in software float mode to + match expected behavior in other backends, that is, if + a function is not called, it is safe to use a few lightning + calls before a next jit_getarg* call, as done in the test + case check/stack.tst. The proper solution should be to + extend the parser in lib/lightning.c to check if there is + some float operation that will call some (libgcc?) function, + but software float arm should be a very uncommon backend for + lightning, so, just load the already in place arguments + saved to stack, assuming the register argument was clobbered + (what should not be the case most times...). + +2012-12-22 Paulo Andrade <pcpa@gnu.org> + + * check/clobber.ok, check/clobber.tst: New test case doing + extensive validation tests to ensure registers not used in + a operation are not clobbered. + + * check/Makefile.am: Update for new test case. + +2012-12-21 Paulo Andrade <pcpa@gnu.org> + + * lib/lightning.c: Partially rewrite/revert code to compute + initial register live state at the start of a basic block. + The original logic was corrupted when adding optimizations + to do as few computations as possible in jit_update. The + reglive field must be always a known set of live registers + at the start of a basic block. The value that was incorrect + was the regmask field, that must be the set of registers + that are in unknown state, because they are not known live, + neither set (or possibly not set) in the basic block, and + *must* store the state at the start of the basic block. + +2012-12-20 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_ppc.h: Correct mismatch of JIT_F{1,5} + with enum codes, that were correct, and returned by jit_f(). + + * lib/jit_ppc-cpu.c, lib/jit_ppc-fpu.c, lib/jit_ppc.c: Properly + implement and better describe values when generating stack + frames. + +2012-12-18 Paulo Andrade <pcpa@gnu.org> + + * check/stack.ok, check/stack.tst: New files to test data + integrity on a deep chain of stack frames. + + * lib/jit_arm.c, lib/jit_arm-cpu.c, lib/jit_mips.c, + lib/jit_mips-cpu.c, lib/jit_ppc.c, lib/jit_ppc-cpu.c, + lib/jit_x86.c, lib/jit_x86-cpu.c: Calculate _jit->function->stack + in the emit stage, otherwise it will calculate it wrong if + need to jit_allocai space to spill registers. + + * lib/lightning.c: Correct wrong offset when updating the + "current" jit function pointer in the code that may need to + allocate stack space to spill registers. + + * check/lightning.c: Correct off by one data space check. + + * check/Makefile.am: Update for new test case. + +2012-12-17 Paulo Andrade <pcpa@gnu.org> + + * check/fop_abs.ok, check/fop_abs.tst, check/fop_sqrt.ok, + check/fop_sqrt.tst: New files implementing simple test cases + for the extra float operations. + + * check/Makefile.am: Update for new test cases. + + * check/alu.inc: Add an extra macro to check for unordered + equality on tests where it is expected to use NaN as an + argument. + + * check/lightning.c: Minor change for proper/common argument + syntax handling ommiting arguments to options. + +2012-12-17 Paulo Andrade <pcpa@gnu.org> + + * check/Makefile.am: Automatically generate pattern list + of tests with alternate jit generation options. This should + prevent typos and needing to change multiple places after + a change. + +2012-12-14 Paulo Andrade <pcpa@gnu.org> + + * check/lightning.c: Remove the ".cpu name value" syntax, + as it was not able to do proper changes before the jit + internal data structure was initialized. Now it supports + several getopt options to force using different jit + generation options, effectively replacing the previous + syntax. + + * check/run-test: Add simple extra logic to handle differently + named test scripts, used to test things like x87 coprocessor + in ix86, and arm instruction set or software float in armv7l. + + * configure.ac: Add some AC_RUN_IFELSE calls to figure at + compile time if can test different code generation options, + and update Makefile generation accordingly. + + * check/Makefile.am, lib/jit_arm.c, lib/jit_x86.c: Update to + properly work with the test tool updating the jit_cpu global + information. + + * check/check.arm.sh, check/check.swf.sh, check/check.x87.sh: + New wrapper files passing -mthumb=0, mvfp=0 and -mx87=1 to + the test tool, if applicable, so that it can validate alternate + code generation options on test hosts that support them. + +2012-12-14 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_x86-x87.c, lib/jit_x86.c: Correct test cases in ix86 + when using the x87 coprocessor instead of sse2+. + +2012-12-14 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, include/lightning/jit_private.h, + lib/jit_arm.c, lib/jit_mips.c, lib/jit_ppc.c, lib/jit_x86.c, + lib/lightning.c: Make jit_ellipsis implementation not + backend specific. It is not intended to handle va_list + like objects at runtime, as jit_arg* and jit_getarg* + return constant values resolved at parse time, so, effectively + it is not possible to create printf like jit functions, as + there is no va_start, va_arg, va_end, etc, abstraction. This + limitation should be kept for the sake of making new ports + easier. + +2012-12-14 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, lib/lightning.c: Add two extra wrapper + functions to avoid need for excess pointer to/from word casts. + + * check/lightning.c: Only need for pointer to/from word cast + now is jit_movi, update accordingly. + +2012-12-13 Paulo Andrade <pcpa@gnu.org> + + * check/varargs.ok, check/varargs.tst: New test cases implementing + simple varargs calls with a large amount of arguments to exercise + excess arguments on stack. + + * include/lightning.h: Include config.h if HAVE_CONFIG_H is + defined. + + * lib/jit_arm.c: Allocate a fpr register, not a gpr one for + temporary when pushing varargs arguments in the stack. + + * lib/jit_arm-swf.c: Correct code changing the wrong offset + in jit_absr_d and jit_negr_d in software float. + + * lib/jit_mips.c: Correct calculation of offsets of arguments + on stack. + + * lib/jit_ppc.c: Correct bogus logic for "next" offset of arguments + on stack and adjust for fixed offset of stack arguments. + +2012-12-12 Paulo Andrade <pcpa@gnu.org> + + * include/lightning.h, lib/jit_arm.c, lib/jit_mips.c, + lib/jit_ppc.c, lib/jit_x86.c, lib/lightning.c: Change jit_prepare + to no longer receive an argument. If receiving an argument, it + should be an ABI specifier, not a boolean if varargs or not, + and add the new jit_ellipsis call, to specify where the + ellipsis is in the C prototype of the function being called. + Note that currently it is not supported to define varargs + functions and it will be ignored if calling jit_ellipsis not + in a prepare/finish* block, but this should be addressed. + + * check/allocai.tst, check/alu_add.tst, check/alu_and.tst, + check/alu_com.tst, check/alu_div.tst, check/alu_lsh.tst, + check/alu_mul.tst, check/alu_neg.tst, check/alu_or.tst, + check/alu_rem.tst, check/alu_rsh.tst, check/alu_sub.tst, + check/alu_xor.tst, check/alux_add.tst, check/alux_sub.tst, + check/bp.tst, check/branch.tst, check/cvt.tst, check/divi.tst, + check/fib.tst, check/ldsti.tst, check/ldstr-c.tst, + check/ldstr.tst, check/ldstxi-c.tst, check/ldstxi.tst, + check/ldstxr-c.tst, check/ldstxr.tst, check/rpn.tst, + check/lightning.c: Update for the change to jit_prepare and + addition of jit_ellipsis. + +2012-12-11 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ppc-cpu.c: Make movr a function that checks arguments + so that other code can safely assume it is a noop if src and dst + are the same register. + Implement rem{r,i}{,_u} as a div{,u}/mul/sub. + Correct ANDIS, ORIS and XORIS calls to cast the argument to + unsigned before the shift to avoid an assertion if the argument + had the topmost bit set. + Implement lshi, rshi and rshi_u as functions to test for a + zero argument, that would otherwise trigger an assertion when + computing the shift value. + Do a simple implementation of bm{s,c}{r,i} with a temporary, + "andr" of arguments and jump based on comparison with zero. + Correct typo in ldxi_c. + + * lib/jit_ppc-fpu.c: Correct wrong arguments to FDIV* and STF*. + + * lib/jit_ppc.c: Correct wrong check for 6 instead of 8 integer + arguments in registers. If calling a varargs function and + passing a float or double argument, also either store the + value in the stack or in integer registers, as varargs functions + do not fetch it from float registers. + Add "case" for new functions and incorrectly missing ones. + Call libgcc's __clear_cache, that should know what to do + if the hardware needs flushing cache before execution. + + * lib/lightning.c: Do a simple/trivial logic in jit_regset_scan1, + that should make it easier for the compiler to optimize it, and + that also corrects the previously wrong code for big endian, and + that was causing problems in ppc due to not saving all callee save + registers as it was not "finding" them in the regset due to the + little endian assumption bug. + +2012-12-11 Paulo Andrade <pcpa@gnu.org> + + * configure.ac: Only default to using the builtin disassembler + if on GNU/Linux. This should be temporary, due to requiring + /proc/self/exe. + Correctly check $target_cpu for powerpc. + + * include/lightning/jit_ppc.h: Correctly implement jit_v_num. + + * include/lightning/jit_private.h: Declare proper prototype + for jit_init_debug and jit_finish_debug. + + * lib/jit_ppc-cpu.c: Remove code to save/restore callee save + float registers, as it is not required since those float + registers are not usable currently. + Change prolog and epilog generation to, at least comparing + code, match what gcc generates in "gcc -O0", but it is still + failing in Darwin PPC, apparently due to the __clear_cache + call not being enough, as frequently it will also fail to + execute, and the code buffer is all zeroes. + + * lib/lightning.c: Do not fail in jit_regset_scan1 calls due + to passing 64 as argument on computers with 64 registers. + +2012-12-10 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_mips-cpu.c: Correct all current test cases. + Call the "xori" not the "XORI" macro for jit_xori implementation, + as the XORI macro handles only 16 bit unsigned values. + Call the "movr" macro, not the "movi" macro in the special + case of adding or subtracting zero. + Use the proper temporary register in the jit_andr implementation. + +2012-12-09 Paulo Andrade <pcpa@gnu.org> + + * check/alu.inc, check/alu_add.ok, check/alu_add.tst, + check/alu_and.ok, check/alu_and.tst, check/alu_com.ok, + check/alu_com.tst, check/alu_div.ok, check/alu_div.tst, + check/alu_lsh.ok, check/alu_lsh.tst, check/alu_mul.ok, + check/alu_mul.tst, check/alu_neg.ok, check/alu_neg.tst, + check/alu_or.ok, check/alu_or.tst, check/alu_rem.ok, + check/alu_rem.tst, check/alu_rsh.ok, check/alu_rsh.tst, + check/alu_sub.ok, check/alu_sub.tst, check/alu_xor.ok, + check/alu_xor.tst, check/alux_add.ok, check/alux_add.tst, + check/alux_sub.ok, check/alux_sub.tst, check/branch.ok, + check/branch.tst: New test cases for arithmetic and branch + tests. + + * check/Makefile.am: Update for new test cases. + + * include/lightning/jit_private.h: Make the jit_reg_free_p + macro shared by all backends. Previously was added for the + arm backend, but is useful in the x86_64 backend when checking + state of "special purpose register". + Also add the new jit_class_named register class, that must be + or'ed with the register value if calling jit_get_reg expecting + an specific value, because the specific register value may be + zero, that previously was treated as no register requested. + + * lib/jit_arm-cpu.c: Correct argument order for T2_MVN. + + * lib/jit_arm-swf.c: Call the proper function for double + divide. The "software float" implementation just calls + libgcc functions. + + * lib/jit_arm.c: Return float/double values in the float + register if using the hard float ABI. + + * lib/jit_x86-cpu.c: Change the can_sign_extend_int_p macro + to not include -0x80000000L, because there is code that + "abuses" it and thinks it can negate the immediate value + after calling that macro. + Correct implementation of jit_subi that had a wrong code + patch logic doing subtraction with reversed arguments. + Correct REX prefix calculation in the jit_muli implementation. + Correct logic to get/unget %*ax and %*dx registers in divremr + and divremi. + Correct divremi that was using the symbolic, unique %*ax + value in on place (not using the _REGNO name suffix). + Correct cut&paste error causing it to use "xor" instead of + "or" in one code path of the jit_ori implementation. + Correct several flaws when clobbering registers and/or when + one of the arguments was %*cx in the rotshr wrapper function + implementing most shift operations. + + * lib/lightning.c: No longer expect that the backend be smart + enough to know what to do when asking for a named register + if that register is already an argument or is live. It fails + if it is an argument, or if register is live, fails if cannot + spill. + No longer incorrectly assume that eqr_{f,d} and ltgr_{f,d} are + safe to inverse value tests in jump thread optimization. + +2012-12-05 Paulo Andrade <pcpa@gnu.org> + + * check/Makefile.am, check/cvt.ok, check/cvt.tst: Add new + "cvt" test case to test conversion from/to int/float types. + + * check/lightning.c: Only define truncr_{f,d}_l in 64 bit mode. + + * include/lightning.h: Correct typo that caused it to define + jit_truncr_{f,d}_l in 32 bit mode. + + * lib/jit_arm-cpu.c: Avoid assertion failure in the signed/unsigned + extend opcodes generation as it shares an interface for 3 argument + opcode generation. + + * lib/jit_x86-cpu.c: Correct wrong argument passed to + jit_unget_reg in the andi implementation and wrong byte + unsigned extend code generation. + + * lib/jit_x86-sse.c: Correct conversion from "word" to float or + double as is dependent on wordsize. + +2012-12-05 Paulo Andrade <pcpa@gnu.org> + + * check/ldstr-c.ok, check/ldstr-c.tst, check/ldstxi-c.ok, + check/ldstxi-c.tst, check/ldstxr-c.ok, check/ldstxr-c.tst: + New test case files testing load clobbering the base and/or + index register; + + * check/ldst.inc: New file with common definition for all the + ldst* test cases. + + check/Makefile.am, check/ldsti.tst, check/ldstr.tst, + check/ldstxi.tst, check/ldstxr.tst: Update for new common + definitions file and new register clobber ldst tests. + +2012-12-05 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_mips-fpu.c: Correct wrong register order in stxr_{f,d} + in the mips backend. + +2012-12-05 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_arm-vfp.c: Correct regression found in armv7l with + latest test cases. + +2012-12-05 Paulo Andrade <pcpa@gnu.org> + + * check/ldstxi.tst, check/ldstxr.tst: Correct wrong argument + order for 32 bit mode tests. + + * configure.ac: Correct check for ix86 target_cpu. + +2012-12-05 Paulo Andrade <pcpa@gnu.org> + + * check/ldstr.ok, check/ldstr.tst, check/ldsti.ok, + check/ldsti.tst, check/ldstxr.ok, check/ldstxr.tst, + check/ldstxi.ok, check/ldstxi.tst: + New test case files exercising a very large amount of + register combinations to verify load/store implementation. + + * check/Makefile.am: Update for new test cases. + + * lib/jit_x86-cpu.c: Correct wrong argument order when + computing REX prefix for {ld,st}r_T codes; + +2012-12-04 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_mips-fpu.c, lib/jit_mips.c: Implement missing mips + jit_sqrtr_{f,d} codes. + + * check/all.tst, include/lightning.h, lib/jit_print.c: Change + declaration order and call order in all.tst of {add,sub}c and + {add,sub}x. *c must be called before to set the carry and *x + second to use the carry and keep it set. The wrong call order + was causing all.tst to fail in mips, where a register is + allocated to keep a global carry state. + +2012-12-04 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_mips.h, lib/jit_mips-cpu.c, + lib/jit_mips-fpu.c, lib/jit_mips.c: Correct float/double + argument handling and make the mips backend pass the initial + test cases. + + * include/lightning.h, ib/jit_print.c, lib/lightning.c: + Add extra enum values for argument handling functions that + could not be abstracted to the current codes, that is, when + float values need to move from/to gpr from/to fpr. It would + be more tempting to add such primitives, but they would have + wordsize limitations, and it is not expected to add codes + with one gpr argument for 64 bit and two for 32 bit. + + * lib/jit_ppc.c: Check _jit->function before calling jit_epilog() + to avoid a runtime exception. + +2012-12-04 Paulo Andrade <pcpa@gnu.org> + + * include/lightning/jit_mips.h, lib/jit_mips.c: Update to + make the mips backend compile in a qemu image. + + * lib/jit_ppc.c: Minor adaptations to help in having the + ppc backend compilable. + +2012-12-03 Paulo Andrade <pcpa@gnu.org> + + * configure.ac, include/lightning/jit_private.h, lib/jit_arm-cpu.c, + lib/jit_arm-swf.c, lib/jit_arm.c, check/Makefile.am: Correct + implementation of the arm backend port to build and pass the + current test cases. Tested on armv7 with softfp abi. + + * lib/jit_disasm.c: Rename and change prototype of static + disassemble function as in the arm backend it is required + to access state information stored in the jit_state_t object. + + * check/3to2.tst, check/add.tst: Correct test case code assuming + JIT_RO and JIT_RET are the same, and even if they are the same, + the logic was incorrect because it must always call jit_retval* + to fetch a function call return before any other instruction. + The arm backend hash a special condition if jit_retval is not + called, because "r0" is not JIT_R0, but is JIT_RET and *also* + the first argument for a called function, so JIT_RET must be + only used as an argument to jit_retval. + +2012-12-03 Paulo Andrade <pcpa@gnu.org> + + * check/all.tst, check/lightning.c: Only declare or use 64 bit + interfaces on 64 bit builds. + + * check/fib.tst: Use simpler logic to not need preprocessor + conditionals for 32 or 64 bit. + + * include/lightning.h: Only declare 64 bit macros on a 64 bit + build. Code using lightning must know about wordsize and the + jit generation limitations, also, this way it generates a + compile time failure, not a runtime assertion. + + * include/lightning/jit_x86.h: Correct typo in macro name. + + * lib/jit_arm.c, lib/jit_arm-cpu.c, lib/jit_mips.c, + lib/jit_mips-cpu.c, lib/jit_ppc.c, lib/jit_ppc-cpu.c, + lib/jit_x86.c, lib/jit_x86-cpu.c: Correct wrong code to get + current jit function pointer. + + * lib/lightning.c: Move call to the simplify() optimization + to after register liveness is known. Previous code did work + by accident but now with proper test cases the problem was + noticed. + + * lib/jit_disasm.c: Always cast bfd_vma to long long when + passing it as printf argument. + +2012-12-03 Paulo Andrade <pcpa@gnu.org> + + * configure.ac, check/Makefile.am, check/check.sh, + doc/Makefile.am, include/lightning/Makefile.am, + lib/Makefile.am: Correct make distcheck. + +2012-12-02 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_ppc.c: Assign copyright ownership to FSF. + + * lib/jit_x86-cpu.c: Correct integer multiplication that was + generating code with reversed register arguments. + + * check/rpn.ok, check/rpn.tst: New test case file. + +2012-12-02 Paulo Andrade <pcpa@gnu.org> + + * lib/jit_x86-cpu.c, lib/jit_x86-sse.c, lib/jit_x86-x87.c: + Actually change copyright owner to FSF as avertised. + + * lib/jit_arm-cpu.c, lib/jit_arm-swf.c, + lib/jit_arm-vfp.c, lib/jit_arm.c, + lib/jit_mips-cpu.c, lib/jit_mips-fpu.c, lib/jit_mips.c, + lib/jit_ppc-cpu.c, lib/jit_ppc-fpu.c, lib/jit_ppc.c: New + files implementing initial code different jit backends. + + * include/lightning/jit_private.h: Add extra field to the + private jit_patch_t type, required by the arm port. + + * lib/Makefile.am: Update for the new backend implementation + files. + +2012-12-02 Paulo Andrade <pcpa@gnu.org> + + * check/Makefile.am: Add proper "make clean" rule and missing + check.sh to EXTRA_DIST. + +2012-12-02 Paulo Andrade <pcpa@gnu.org> + + * .gitignore: Update pattern of ignored files. + + * check/Makefile.am: Add rule to build liblightning.la dependency + in case of running "make check" before building the library. + +2012-12-02 Paulo Andrade <pcpa@gnu.org> + + * lightning/Makefile.am, lightning/asm-common.h, + lightning/core-common.h, lightning/fp-common.h, + lightning/funcs-common.h, lightning/i386/Makefile.frag, + lightning/i386/asm-32.h, lightning/i386/asm-64.h, + lightning/i386/asm.h, lightning/i386/core-32.h, + lightning/i386/core-64.h, lightning/i386/core.h, + lightning/i386/fp-32.h, lightning/i386/fp-64.h, + lightning/i386/fp.h, lightning/i386/funcs.h, + lightning/ppc/asm.h, lightning/ppc/core.h, + lightning/ppc/fp.h, lightning/ppc/funcs.h, + lightning/sparc/asm.h, lightning/sparc/core.h, + lightning/sparc/fp.h, lightning/sparc/funcs.h: + Removed. The core logic is used in the new code, and new mips + and arm ports will be added. At first, sparc will not be + supported as it has not yet been ported to the new engine. + +2012-12-02 Paulo Andrade <pcpa@gnu.org> + + * tests/Makefile.am, tests/3to2.c, tests/3to2.ok, tests/add.c, + tests/add.ok, tests/allocai.c, tests/allocai.ok, tests/bp.c, + tests/bp.ok, tests/divi.c, tests/divi.ok, tests/fib.c, tests/fib.ok, + tests/fibdelay.c, tests/fibdelay.ok, tests/fibit.c, tests/fibit.ok, + tests/funcfp.c, tests/funcfp.ok, tests/incr.c, tests/incr.ok, + tests/ldst.c, tests/ldst.ok, tests/ldxi.c, tests/ldxi.ok, + tests/modi.c, tests/modi.ok, tests/movi.c, tests/movi.ok, + tests/printf.c, tests/printf.ok, tests/printf2.c, tests/printf2.ok, + tests/ret.c, tests/ret.ok, tests/rpn.c, tests/rpn.ok, tests/rpnfp.c, + tests/rpnfp.ok, tests/sete.c, tests/sete.ok, tests/testfp.c, + tests/testfp.ok, tests-run-test: Removed previous test suite, in + favor of a newer one in the check subdirectory. + + * check/3to2.ok, check/3to2.tst, check/add.ok, check/add.tst, + check/allocai.ok, check/allocai.tst, check/bp.ok, check/bp.tst, + check/divi.ok, check/divi.tst, check/fib.ok, check/fib.tst: + New sample input for the new test program, loosely matching + several of the previous test cases. + + * check/Makefile.am: New test suite makefile. + + * check/check.sh, check/run-test: New wrapper files for the + new test suite. + + * check/lightning.c: New file. The main driver of the new test + suite, that compiles to a parser of a very simple assembly like + language, generates jit and executes it. + + * check/all.tst: New file. A generic debug and sample test file + with a directive to prevent it from being executed, and useful to + read disassembly of all possible instructions, using a fixed set + of registers. + + * include/Makefile.am, include/lightning.h, + include/lightning/Makefile.am, include/lightning/jit_arm.h, + include/lightning/jit_mips.h, include/lightning/jit_ppc.h, + include/lightning/jit_private.h, include/lightning/jit_x86.h, + lib/Makefile.am, lib/jit_disasm.c, lib/jit_print.c, + lib/jit_x86-cpu.c, lib/jit_x86-sse.c, lib/jit_x86-x87.c, + lib/jit_x86.c, lib/lightning.c: New files. These files are + written from scratch, only by <pcpa@gnu.org>, and have now + copyright assignment to the FSF. This is the core of the new + lightning rework. Previously it was integrated in code with + a garbage collector and several custom types like vectors and + hash tables, so this first code merge with lightning converts + that code into a library extracting only the jit bits, and at + first only for x86_64 GNU/Linux. + + * lightning.h, m4/lightning.m4: Removed. These are no longer + required in the new lightning code. + + .gitignore, Makefile.am, configure.ac: Update for the new + lightning code. + +2012-12-02 Paulo Andrade <pcpa@gnu.org> + * .cvsignore: Removed for extra cleanup. + + * build-aux: Rename directory to m4. + + * m4: Renamed to "default" name and for consistency with merge + with code rework to be imported in lightning. + + * .gitignore, configure.ac, Makefile.am, doc/Makefile.am: + Update for build-aux to m4 rename. + +2012-12-01 Paulo Andrade <pcpa@gnu.org> + + * opcode/Makefile.am, opcode/Makefile.in, opcode/ansidecl.h, + opcode/bfd.h, opcode/dis-asm.h, opcode/dis-buf.c, opcode/disass.c, + opcode/i386-dis.c, opcode/i386.h, opcode/ppc-dis.c, opcode/ppc-opc.c, + opcode/ppc.h, opcode/sparc-dis.c, opcode/sparc-opc.c, opcode/sparc.h, + opcode/sysdep.h: Removed. Do not bundle GNU binutils files. + + * aclocal.m4, configure, Makefile.in, config.h.in, doc/Makefile.in, + lightning/Makefile.in, tests/Makefile.in: Removed. Do not maintain + autogenerated files that also generate too much diff noise when + regenerated in git. + + * build-aux/help2man, build-aux/texinfo.tex, build-aux/texi2dvi: + Removed. Buildenvironment must have an up to date version from + upstream installed. + + * build-aux/config.guess, build-aux/config.sub, build-aux/depcomp, + build-aux/install-sh build-aux/mdate-sh build-aux/missing: Removed. + Do not maintain a copy of automake files in git. Release tarballs + must use an up to date version. + + * lightningize.in, doc/lightningize.1: Removed. Do not encourage + bundling lightning in other packages. It should use a system package + or a proper thirdy part subdirectory. + + * INSTALL: Removed. Autoreconf removes it and creates a symlink + when regenerating files, so, avoid conflicts in git and let + automake create the symlink. + + * .gitignore: Add INSTALL and autogenerated files. + + * configure.ac, Makefile.am: Update for removal of opcode subdir, + auto generated files and lightningize. + + * tests/Makefile.am, tests/3to2.c, tests/add.c, tests/bp.c, + tests/fib.c, tests/fibdelay.c, tests/fibit.c, tests/funcfp.c, + tests/incr.c, tests/printf.c, tests/rpn.c, tests/rpnfp.c, + tests/sete.c, tests/testfp.c: Update for removal of opcode subdir. + + * doc/Makefile.am: Update for removal of lightningize. + + * configure.ac, lightning/ppc/funcs.h, lightning/sparc/funcs.h, + lightning/i386/fp.h, lightning/i386/core.h, lightning/i386/asm.h, + tests/3to2.c, tests/add.c, tests/bp.c, tests/fib.c, tests/fibdelay.c, + tests/fibit.c, tests/funcfp.c, tests/incr.c, tests/printf.c, + tests/rpn.c, tests/rpnfp.c, tests/sete.c, tests/testfp.c: + Remove LIGHTNING_CROSS, it is half supported and incomplete. + + * tests/3to2.c, tests/funcfp.c, tests/rpnfp.c: Remove preprocessor + check on JIT_FPR. If no hardware registers are available, the backend + must provide an alternative for software float. + + * lightning/ppc/core.h, lightning/sparc/core.h, tests/Makefile.am: + Remove JIT_NEED_PUSH_POP. It is absolutely not trivial to implement + properly on some backends due to stack alignment constraints, and + whenever it is required, using jit_allocai and using a properly + aligned stack vector, or a heap buffer, is better. + + * tests/push-pop.c, tests/push-pop.ok: Removed due to + JIT_NEED_PUSH_POP no longer available. + +2011-02-28 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core-64.h: Add jit_add{c,x}{i,r}_l, jit_mulr_{l,ul}_, + fix jit_mul{i,r}_{l,ul}. + +2010-08-20 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/fp-64.h: Return patch address from jit_bXYr_{f,d}. + Reported by Paulo César Pereira de Andrade. + * lightning/ppc/fp.h: Likewise. + * lightning/sparc/fp.h: Implement FP branches. + +2010-08-18 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/fp-64.h: Fix jp in jit_bner_{f,d}. + +2010-08-18 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/fp-32.h: Fix -D_ASM_SAFETY compilation. + Reported by Paulo César Pereira de Andrade. + +2010-08-15 Paolo Bonzini <bonzini@gnu.org> + + * tests/ldst.c: Update. + * tests/Makefile.am: Use -ffloat-store to compile it. + +2010-08-15 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core.h (jit_ldr_c, jit_ldxr_c, jit_ldr_s, + jit_ldxr_s): Move... + * lightning/i386/core-32.h: ... here. + * lightning/i386/core-64.h (jit_ldr_c, jit_ldxr_c, jit_ldr_s, + Use movsbq and movswq. + +2010-08-10 Paulo César Pereira de Andrade <pcpa@mandriva.com.br> + + * lightning/i386/core-32.h (jit_replace): Use MOVLrr, not MOVLir. + (jit_movbrm): Check index register as well. + * lightning/i386/fp-64.h: Add jit_extr_f_d and jit_extr_d_f. + * lightning/fp-common.h: Add jit_extr_f_d and jit_extr_d_f. + +2010-07-28 Paolo Bonzini <bonzini@gnu.org> + + * tests/Makefile.am: Add ldst test. + * tests/Makefile.in: Regenerate. + * tests/ldst.c: New. + * tests/ldst.ok: New. + +2010-07-28 Paolo Bonzini <bonzini@gnu.org> + + * THANKS: Add Paulo Cesar Pereira de Andrade. + * doc/porting.texi: Fix ordering of arguments in jit_stxi. + * lightning/i386/core-32.h (jit_replace): Remove cmp argument. + * lightning/i386/fp-64.h (jit_movi_f): Fix. + +2010-07-26 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core-32.h (jit_replace): Move here (removed + 2009-03-01). + +2010-07-19 Paolo Bonzini <bonzini@gnu.org> + + * build-aux/lightning.m4: Always set and replace lightning_frag. + * Makefile.in: Regenerate. + * aclocal.m4: Regenerate. + * config.h.in: Regenerate. + * configure: Regenerate. + * doc/Makefile.in: Regenerate. + * doc/lightningize.1: Regenerate. + * doc/version.texi: Regenerate. + * lightning/Makefile.in: Regenerate. + * opcode/Makefile.in: Regenerate. + * tests/Makefile.in: Regenerate. + +2009-03-01 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core-64.h: Use Mike's macros for x86-64 too. + * lightning/i386/core.h: Remove jit_replace. + + 2009-02-27 Mike Spivey <mike@comlab.ox.ac.uk> + + * lightning/i386/core.h: Rewrite shift-handling macros. + * lightning/fp-common.h: Fix jit_extr_{f_d,d_f}. + +2009-02-17 Mike Spivey <mike@comlab.ox.ac.uk> + + * lightning/i386/core.h: Fix blunder in operand order. + +2009-02-17 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/fp-32.h: Another fix to jit_fp_btest. + +2009-02-17 Paolo Bonzini <bonzini@gnu.org> + + * lightning/fp-common.h: Define double branches if missing. + * lightning/i386/asm.h: Define JC and JNC mnemonics. + * lightning/i386/fp-32.h: Fix jit_fp_btest. All reported + by Mike Spivey. + +2008-10-09 Paolo Bonzini <bonzini@gnu.org> + + * lightning/ppc/funcs.h (jit_flush_code): Subtract 1 from end. + Reported by Eli Barzilay and Matthew Flatt. + +2008-08-23 Nix <nix@esperi.org.uk> + + * lightning/i386/Makefile.frag: fp-32.h and fp-64.h are target files. + +2008-07-02 Laurent Michel <ldm@engr.uconn.edu> + + * lightning/ppc/funcs.h (jit_flush_code): modified the computation + of start/end. The pointer arithmetic was done without casting. It + prevented compilation with recent gcc versions. + * lightning/ppc/core.h (jit_pushr_i): The offset for the store was + incorrect. Should have been 4 bytes below SP (not above). + * lightning/ppc/core.h (jit_popr_i): The offset for the load was + incorrect. Should have been 0 (not +8). + +2008-06-17 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/asm-64.h: Forward IMULQir to IMULQirr, + fix REXQ order for IMULQirr. + +2008-06-17 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core.h: Fix _rN vs. _rR. + +2008-06-16 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core.h: Use jit_save in jit_replace. Move JIT_R + definition... + * lightning/i386/core-32.h: ... here; define jit_save so that + the core.h has no effect on the 32-bit backend. + * lightning/i386/core-64.h: Place JIT_R1/JIT_R2 in R10/R11, + place outgoing arguments in the right spot from the beginning, + define jit_save, fix jit_reg8/jit_reg16. + +2008-06-15 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core-64.h: Rewrite argument passing to + support up to 6 arguments and generate less code. + +2008-06-14 Laurent Michel <ldm@thorgal.homelinux.org> + + * lightning/i386/core-64.h (jit_movi_l): When the operand is 0, + the XOR should be on a quadword. + * lightning/i386/core-64.h (jit_prolog): Keep 16-byte stack + alignment. + (jit_ret): Always use LEAVE. + +2008-06-13 Laurent Michel <ldm@thorgal.homelinux.org> + + * lightning/i386/core-64.h: Add (void) casts for C++ compatibility. + * lightning/i386/asm.h: Likewise. + +2008-06-12 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core.h: Move JIT_V definition... + * lightning/i386/core-32.h: ... here. + * lightning/i386/core-64.h: ... and here. Avoid dancing between + RSI/RDI and R12/R13, and place JIT_V1/JIT_V2 in R12/R13. + +2008-06-11 Paolo Bonzini <bonzini@gnu.org> + + * build-aux/lightning.m4: Adjust LIGHTNING_BACKENDS, don't + use suffix support to distinguish i386/x86_64. + * lightning/i386/Makefile.frag: Use LIGHTNING_TARGET_FILES + to distribute *-32.h and *-64.h files now. + * lightning/i386/asm-i386: Moved to... + * lightning/i386/asm.h: Include the appropriate subtarget file. + * lightning/i386/core-i386: Moved to... + * lightning/i386/core.h: Include the appropriate subtarget file. + * lightning/i386/fp.h: New, include the appropriate subtarget file. + * lightning/i386/asm-32: Do not include asm-i386.h. + * lightning/i386/asm-64.h: Likewise. + * lightning/i386/core-32: Do not include core-i386.h. + * lightning/i386/core-64.h: Likewise. + * lightning/Makefile.am: Adjust for renamed files. + + * configure.ac: Define LIGHTNING_TARGET here. + * opcode/disass.c: Change list of valid LIGHTNING_TARGET values. + + * lightningize.in: Robustify against missing subtarget files. + +2008-06-11 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core-32.h: Use MOVLir instead of jit_movi_l + to implement jit_movi_p. + +2008-06-11 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core-32.h: Use separate __APPLE__ and SysV + prolog/ret macros. Subtract 12 bytes in __APPLE__ case to + keep stack aligned, and always use LEAVE in the epilog. + +2008-06-11 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core-i386.h: Fix C++ incompatibility. + +2008-06-10 Laurent Michel <ldm@engr.uconn.edu> + + * lightning/i386/core-i386.h: Fix jit_replace8 for + case when one of the operands is _EAX. + +2008-05-19 Paolo Bonzini <bonzini@gnu.org> + + * tests/run-test: Avoid CRLF issues on mingw. + +2008-03-21 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core-64.h: Fix jit_{ld,st}{,x}i_{i,l}. + Remove jit_ld{,x}i_ul. + * lightning/core-common.h: Make jit_ld{,x}{i,r}_ul + always a synonym of the _l variant. + * doc/porting.texi: Document this. + +2008-03-19 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core-64.h: Fix uses of jit_qop_. + +2008-03-19 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core-64.h: Add boolean operations. + +2008-03-19 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/asm-64.h: Add LEAQmr. + +2008-03-19 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core-64.h: Misc bugfixes. + +2008-03-19 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core-i386.c: Remove jit_ldr_i, jit_ldxr_i. + * lightning/i386/core-32.h: Add jit_ldr_i, jit_ldxr_i. + * lightning/i386/core-64.h: Add jit_ld{r,xr,i,xi}_{ui,l,ul}; + move jit_ldr_i, jit_ldxr_i, jit_str_l, jit_stxr_l with others. + +2008-03-19 Paolo Bonzini <bonzini@gnu.org> + + * lightning/asm-common.h: Add _s32P. + +2008-03-19 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core-64.h: Implement long mul/div/mod. + +2008-03-19 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/asm-i386.h: Cast memory address to long for JCCim. + +2008-03-15 Paolo Bonzini <bonzini@gnu.org> + + * lightning/asm-common.h: Add underscores around __unused__ + attribute. + +2008-03-15 Paolo Bonzini <bonzini@gnu.org> + + * lightning/ppc/core.h: Avoid some "value computed is not used" + warnings. + * lightnings/tests/allocai.c: Silence other warnings. + +2008-03-14 Paolo Bonzini <bonzini@gnu.org> + + * lightningize.in: Fix some problems (not all). + +2008-03-14 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core-32.h: Avoid some "value computed is not used" + warnings; reported by Sam Steingold. + +2008-03-08 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core-32.h: Fix stxr_c(_EAX, _EBX, _ESI). + +2008-02-13 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/asm-32.h: Avoid redefinition of _r1, reported by + Sam Steingold. + * lightning/i386/asm-64.h: Likewise. + +2008-02-08 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/asm-i386.h: Don't define _VOID, reported + by Reini Urban. + +2008-02-03 Paolo Bonzini <bonzini@gnu.org> + + * build-aux/lightning.m4: Add --with-lightning-prefix option, suggested + by Sam Steingold. + +2008-01-14 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core-64.h: Use CALLsr, not CALLLsr. + +2008-01-13 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core-i386.h: Move jit_calli and jit_callr... + * lightning/i386/core-32.h: ... here. + * lightning/i386/core-64.h: Redefine them. + +2008-01-05 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/fp-32.h: Fix sub(a,0,a). + * lightning/tests/3to2.c: Add new testcases. + * lightning/tests/3to2.ok: Add new testcases. + +2008-01-02 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/fp-32.h: Fix sub(a,b,a) with a ~= JIT_FPR0. + * lightning/tests/3to2.c: New. + * lightning/tests/3to2.ok: New. + +2007-11-07 Paolo Bonzini <bonzini@gnu.org> + + * opcode/Makefile.am: Fix AM_CPPFLAGS. + +2007-08-12 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core-i386.h: Improve encoding of set* instructions. + * lightning/i386/core-64.h: Fix jit_bra_l. + * tests/sete.c: New. + * tests/sete.ok: New. + +2007-06-29 Paolo Bonzini <bonzini@gnu.org> + + * tests/bp.c: Upgrade to GPL/LGPLv3. + * lightning/i386/asm-32.h: Upgrade to GPL/LGPLv3. + * lightning/i386/asm-64.h: Upgrade to GPL/LGPLv3. + * lightning/i386/core-32.h: Upgrade to GPL/LGPLv3. + * lightning/i386/core-64.h: Upgrade to GPL/LGPLv3. + * lightning/i386/fp-64.h: Upgrade to GPL/LGPLv3. + * lightning/sparc/asm.h: Upgrade to GPL/LGPLv3. + * lightning/sparc/core.h: Upgrade to GPL/LGPLv3. + * lightning/sparc/fp.h: Upgrade to GPL/LGPLv3. + * lightning/sparc/funcs.h: Upgrade to GPL/LGPLv3. + * lightning/i386/asm-i386.h: Upgrade to GPL/LGPLv3. + * lightning/i386/core-i386.h: Upgrade to GPL/LGPLv3. + * lightning/i386/fp-32.h: Upgrade to GPL/LGPLv3. + * lightning/i386/funcs.h: Upgrade to GPL/LGPLv3. + * lightning/ppc/asm.h: Upgrade to GPL/LGPLv3. + * lightning/ppc/core.h: Upgrade to GPL/LGPLv3. + * lightning/ppc/fp.h: Upgrade to GPL/LGPLv3. + * lightning/ppc/funcs.h: Upgrade to GPL/LGPLv3. + * lightning.h: Upgrade to GPL/LGPLv3. + * tests/add.c: Upgrade to GPL/LGPLv3. + * tests/fib.c: Upgrade to GPL/LGPLv3. + * tests/testfp.c: Upgrade to GPL/LGPLv3. + * tests/fibdelay.c: Upgrade to GPL/LGPLv3. + * tests/fibit.c: Upgrade to GPL/LGPLv3. + * tests/funcfp.c: Upgrade to GPL/LGPLv3. + * tests/incr.c: Upgrade to GPL/LGPLv3. + * tests/printf.c: Upgrade to GPL/LGPLv3. + * tests/printf2.c: Upgrade to GPL/LGPLv3. + * tests/rpn.c: Upgrade to GPL/LGPLv3. + * tests/rpnfp.c: Upgrade to GPL/LGPLv3. + * lightning/asm-common.h: Upgrade to GPL/LGPLv3. + * lightning/core-common.h: Upgrade to GPL/LGPLv3. + * lightning/fp-common.h: Upgrade to GPL/LGPLv3. + * lightning/funcs-common.h: Upgrade to GPL/LGPLv3. + * opcode/dis-buf.c: Upgrade to GPL/LGPLv3. + * opcode/disass.c: Upgrade to GPL/LGPLv3. + * opcode/i386-dis.c: Upgrade to GPL/LGPLv3. + * opcode/sparc-dis.c: Upgrade to GPL/LGPLv3. + * opcode/sparc-opc.c: Upgrade to GPL/LGPLv3. + * lightningize.in: Upgrade to GPL/LGPLv3. + * opcode/bfd.h: Upgrade to GPL/LGPLv3. + * opcode/i386.h: Upgrade to GPL/LGPLv3. + * opcode/sparc.h: Upgrade to GPL/LGPLv3. + +2007-01-26 Thomas Girard <thomas.g.girard@free.fr> + + * lightning/Makefile.am: Add clean-local target. + +2006-12-02 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/asm-i386.h: Add CVTTS?2SIL. + * lightning/i386/asm-64.h: Add CVTTS?2SIQ. + * lightning/i386/fp-64.h: Use it. + + * lightning/Makefile.am: Place files in nodist_lightning_HEADERS. + +2006-11-23 Paolo Bonzini <bonzini@gnu.org> + + * lightning/core-common.h: Add casts in "*i_p" variants. + * lightning/i386/asm-32.h: Add _r1. + * lightning/i386/asm-64.h: Likewise, and add SSE instructions. + * lightning/i386/asm-i386.h: Merge SSE instructions from Gwenole. + Use short form for 16-bit AX instructions. Remove _r1 + * lightning/i386/core-64.h: Add FP ABI support in its infancy. + * lightning/i386/core-i386.h: Move jit_arg_f and jit_arg_d... + * lightning/i386/core-32.h: ... and jit_prepare_f and jit_prepare_d... + * lightning/i386/fp-32.h: ... here. + * lightning/i386/fp-64.h: Write the code. + * lightning/sparc/fp.h: Fix jit_extr_{f_d,d_f} register order. + +2006-11-22 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/asm-i386.h: Move x86-64 instructions... + * lightning/i386/asm-64.h: ... here. + * lightning/i386/fp-32.h: Fix bugfixes worked around in froofyJIT. + Add JIT_FPRET. + * lightning/sparc/fp.h: Likewise. + * lightning/ppc/fp.h: Likewise. + * lightning/fp-common.h: Adjust for JIT_FPRET. + * tests/funcfp.c: Adjust for JIT_FPRET. + * tests/rpnfp.c: Adjust for JIT_FPRET. + +2006-11-20 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/asm-i386.h: Add an underscore to macros without + a parameter. + +2006-11-20 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core-i386.h: Move jit_movip, jit_check8, jit_reg8, + jit_reg16, jit_movbrm... + * lightning/i386/core-32.h: ... here. + * lightning/i386/core-64.h: Redefine them. Fix other bugs. + + * tests/printf.c: Do not do a varargs call. + +2006-11-20 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/asm-i386.h: Check in rewrite from Basilisk II. + * lightning/i386/asm-32.h: Adjust. + * lightning/i386/asm-64.h: Adjust. + * lightning/i386/fp-32.h: Adjust. + + * lightning/i386/core-32.h: Adjust. Add jit_{ld,ldx,st,stx}i*. + * lightning/i386/core-64.h: Adjust. Add jit_{ld,ldx,st,stx}i*. + * lightning/i386/core-i386.h: Adjust. Remove these patterns. + +2006-11-20 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/asm-i386.h: Merge 64-bit cleanliness changes from + mzscheme. + Add SSE. + * lightning/i386/asm-64.h: Likewise. + +2006-11-20 Paolo Bonzini <bonzini@gnu.org> + Ludovic Courtes <ludo@chbouib.org> + + * lightning/i386/core-32.h: Disable jit_push and jit_pop if stack not + needed. + * lightning/i386/core-64.h: Disable jit_push and jit_pop if stack not + needed. + * lightning/sparc/core.h: Merge final implementation of jit_pushr and + jit_popr. + * lightning/ppc/core.h: Fix implementation of jit_pushr and jit_popr to + work (more or less) across function calls. + + * tests/push-pop.c, tests/push-pop.ok: New test. + * tests/Makefile.am: Run it. + +2006-11-20 Paolo Bonzini <bonzini@gnu.org> + + * lightning/asm-common.h: Make 64-bit safe. + * lightning/i386/funcs.h: Make 64-bit safe. + + * lightning/i386/asm-64.h: More merge from mzscheme. + * lightning/i386/asm-i386.h: More merge from mzscheme. + * lightning/i386/core-32.h: More merge from mzscheme. + * lightning/i386/core-64.h: More merge from mzscheme. + * lightning/i386/core-i386.h: More merge from mzscheme. + + * tests/rpnfp.c, tests/testfp.c, tests/funcfp.c: Skip if no + floating-point support. + +2006-11-04 Paolo Bonzini <bonzini@gnu.org> + + * tests/rpn.c: Remove pushr/popr. + +2006-11-04 Paolo Bonzini <bonzini@gnu.org> + + * lightning/ppc/core.h: Implement jit_allocai, define JIT_FP to be R1. + * lightning/ppc/funcs.h: Store frame size into _jitl. Store R1 before + the STMW, so that the offset is unchanged when we patch the STMW. + * lightning/i386/core.h: Define JIT_FP to be EBP. + * lightning/i386/core-32.h: Implement jit_allocai, put LEAVE in the + epilog if jit_allocai was used. + * lightning/i386/core-64.h: Implement jit_allocai, put LEAVE in the + epilog if jit_allocai was used. + +2006-11-04 Ludovic Courtes <ludo@chbouib.org> + + * lightning/sparc/core.h: Implement jit_allocai. + * tests/allocai.c: New. + * tests/Makefile.am: Point to new tests. + +2006-11-03 Paolo Bonzini <bonzini@gnu.org> + + * lightning/ppc/core.h: Fix jit_bms using BNE rather than BGT. + "AND." does signed comparisons. + +2006-10-31 Paolo Bonzini <bonzini@gnu.org> + + * doc/porting.texi: Rename JIT_FP to JIT_AP. + * lightning/core-common.h: Likewise. + * lightning/i386/core-i386.h: Likewise. + * lightning/fp-common.h: Provide default versions of jit_getarg_[fd]. + * lightning/i386/fp-32.h: Don't provide jit_getarg_[fd]. + * lightning/ppc/fp.h: Likewise. + +2006-10-31 Ludovic Courtes <ludo@chbouib.org> + + * doc/using.texi (The instruction set): Clarified the use of `JIT_RET' and + documented `jit_retval'. + * tests/ret.c (generate_function_proxy): After `jit_finish', use + `jit_retval_i' to move FUNC's return value into the correct register. + +2006-10-31 Paolo Bonzini <bonzini@gnu.org> + Ludovic Courtes <ludo@chbouib.org> + + * tests/divi.c, tests/divi.ok, tests/movi.c, tests/movi.ok: New. + * tests/ldxi.c: Ensure large pointer is generated. + * tests/Makefile.am: Point to new tests. + * lightning.h: Include funcs-common.h before funcs.h. + * lightning/sparc/core.h: Fix bugs in modi/divi. + +2006-10-30 Paolo Bonzini <bonzini@gnu.org> + + * lightning/Makefile.am: Use "ln -sf". + * lightning/core-common.h: Define jit_negr_l if necessary. + +2006-10-30 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/asm.h (MOVS*, MOVZ*): Use correct _r[124] macros. + +2006-10-29 Paolo Bonzini <bonzini@gnu.org> + + * configure.ac: Use lightning.m4 macros. + * lightning.m4: Refactor to use common code in configure.ac. Move... + * build-aux/lightning.m4: ... here. + * lightningize.in: Support suffixes. + * opcode/disass.in: Adapt to changes in configure.ac. + + * lightning/ppc/funcs.h: Use __APPLE__ instead of _CALL_DARWIN. + * lightning/i386/core-32.h: Likewise. + +2006-10-26 Paolo Bonzini <bonzini@gnu.org> + + * configure.ac: Fix compilation test. + * lightning/Makefile.am: Symlink LIGHTNING_TARGET_FILES in + non-distribution mode. + * lightning/i386/Makefile.frag: Use LIGHTNING_TARGET_FILES. + +2006-10-26 Paolo Bonzini <bonzini@gnu.org> + + * configure.ac: Subst cpu. + * lightning/core-common.h: Make tests pass on i386. + * lightning/i386/asm-32.h: Make tests pass on i386. + * lightning/i386/asm-64.h: Make tests pass on i386. + * lightning/i386/asm-i386.h: Make tests pass on i386. + * lightning/i386/core-32.h: Make tests pass on i386. + * lightning/i386/core-64.h: Make tests pass on i386. + * lightning/i386/core-i386.h: Make tests pass on i386. + * tests/Makefile.am: Include files from cpu directory. + +2006-10-26 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/asm.h: Move to asm-i386.h + * lightning/i386/asm-32.h: New, from Matthew Flatt. + * lightning/i386/asm-64.h: New, from Matthew Flatt. + * lightning/i386/core.h: Move to core-i386.h + * lightning/i386/core-32.h: New, from Matthew Flatt. + * lightning/i386/core-64.h: New, from Matthew Flatt. + * lightning/i386/fp.h: Move to fp-32.h + * lightning/i386/fp-64.h: New, dummy. + * lightning/i386/Makefile.frag: New. + * lightning/Makefile.am: Support per-target Makefile fragments. + * configure.ac: Support per-target Makefile fragments and CPU suffixes. + +2006-10-16 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/i386.h (jit_flush_code): Fix syntax error. :-( + +2006-07-06 Paolo Bonzini <bonzini@gnu.org> + Ludovic Courtes <ludovic.courtes@laas.fr> + + * doc/using.texi: Clarify "Using autoconf" section + and rename it to "Bundling lightning" + * lightning.m4: Work also if lightning is not bundled. + +2006-07-06 Paolo Bonzini <bonzini@gnu.org> + Ludovic Courtes <ludovic.courtes@laas.fr> + + * lightning/ppc/core.h (_jit_mod): Replace with... + (_jit_mod_big, _jit_mod_small): ... these. + (jit_modi_i, jit_modi_ui): Rewrite. + * tests/modi.c, tests/modi.ok: New tests. + +2006-05-18 Matthew Flatt <mflatt@cs.utah.edu> + + * lightning/i386/asm.h: Fix test for extending the mprotect area + towards lower addresses. + +2006-05-16 Bruno Haible <bruno@clisp.org> + + * lightning/asm-common.h: Don't use __func__ nor __FUNCTION__ if + not compiling with GNU C. + +2006-02-16 Paolo Bonzini <bonzini@gnu.org> + + * lightning/ppc/core.h: Fix jit_ldxi_* with big displacement. + +2006-01-23 Paolo Bonzini <bonzini@gnu.org> + + * configure.ac: Fix comments in config.h.in. + +2005-11-25 Paolo Bonzini <bonzini@gnu.org> + + * lightning/sparc/fp.h: Fix header comment. + * lightning/ppc/fp.h: Fix header comment. + +2005-04-27 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/asm.h (JCm, JCSm, JNCm, JNCSm): New. + +2004-11-26 Paolo Bonzini <bonzini@gnu.org> + + * lightning/ppc/funcs.h (_jit_epilog): Remove unused variable. + +2004-11-13 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/funcs.h [__linux__]: Include sys/mman.h. + +2004-11-09 Paolo Bonzini <bonzini@gnu.org> + + * lightning/sparc/fp.h: Fix fp-to-integer conversions. + * lightning/ppc/testfp.c: Test fp-to-integer conversions + of integer numbers. + * lightning/ppc/testfp.ok: Adjust for the above. + +2004-11-08 Paolo Bonzini <bonzini@gnu.org> + + * lightning/ppc/testfp.c: Always flush code before + testing it. + +2004-11-08 Paolo Bonzini <bonzini@gnu.org> + + * lightning/ppc/fp.h: Do not clobber f31. + +2004-11-08 Paolo Bonzini <bonzini@gnu.org> + + * lightning.h: New name of... + * lightning-inst.h: ... this file. + * lightning.h.in: Removed. + + * opcodes/disass.c: Include config.h. + * tests/add.c: Include config.h. + * tests/bp.c: Include config.h. + * tests/fib.c: Include config.h. + * tests/fibdelay.c: Include config.h. + * tests/fibit.c: Include config.h. + * tests/funcfp.c: Include config.h. + * tests/incr.c: Include config.h. + * tests/printf.c: Include config.h. + * tests/printf2.c: Include config.h. + * tests/rpn.c: Include config.h. + * tests/rpnfp.c: Include config.h. + * tests/testfp.c: Include config.h. + +2004-10-12 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/fp.h: Fix bugs in conditional branches. + +2004-10-10 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/funcs.h: Fix pasto in jit_flush_code. + +2004-10-08 Paolo Bonzini <bonzini@gnu.org> + + * lightning/ppc/fp.h: Optimized conditional branches. + +2004-09-20 Paolo Bonzini <bonzini@gnu.org> + + * lightning/ppc/asm.h: Fix more typos. + +2004-09-20 Paolo Bonzini <bonzini@gnu.org> + + * lightning/ppc/asm.h: Fix typos, replace `26' with JIT_AUX. + +2004-09-20 Paolo Bonzini <bonzini@gnu.org> + + * lightning/ppc/fp.h: Added conditional branches. + +2004-09-18 Laurent Michel <ldm@thorgal.homelinux.org> + + * lightning/ppc/fp.h (jit_unler_d, jit_unltr_d, jit_unger_d, + jit_ungtr_d, jit_ltgt_d, jit_uneq_d): Implemented missing tests + to fully support testfp. + (jit_floorr_d_i, jit_ceilr_d_i, jit_roundr_d_i, jit_truncr_d_i): + New macros. + * lightning/ppc/asm.h: Added missing opcodes FCTIWZ and MTFSFI. + * lightning/ppc/funcs.h (_jit_prolog): Fixed minor mistake in + the initialization of _jitl.nextarg_geti, relying on the + JIT_AUX macro as well to get the register offset. + +2004-09-07 Paolo Bonzini <bonzini@gnu.org> + + * lightning/ppc/funcs.h: Fix typo. + +2004-09-06 Paolo Bonzini <bonzini@gnu.org> + + * tests/funcfp.c: Use %g. Remove C99 variable declarations. + * tests/testfp.c: Don't use __builtin_nan. + + * lightning/ppc/core.h: Add three V registers. + * lightning/ppc/funcs.h: Adjust. + + * lightning/sparc/core.h: Some fixes related to FP argument passing. + Move R0 to %g2, use %o7 for JIT_BIG2. + * lightning/sparc/fp.h: Some fixes related to FP argument passing. + +2004-09-02 Paolo Bonzini <bonzini@gnu.org> + + * lightning/sparc/core.h: Add another V register, + move R0 to %o7. + +2004-07-15 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/funcs.h: Implement jit_flush_code, + in order to support Fedora's exec-shield. + +2004-07-14 Paolo Bonzini <bonzini@gnu.org> + + * lightning/core-common.h: Add more jit_extr_*_* macros. + * lightning/doc/using.texi: Be clearer about the order + of arguments in jit_extr_*_*. + * lightning/doc/porting.texi: Add more jit_extr_*_* macros. + * lightning/i386/fp.h: Fix typo in jit_extr_i_d. + +2004-07-14 Paolo Bonzini <bonzini@gnu.org> + + * lightning/ppc/funcs.h: Adjust offset of LR into + stack frame if running under the Darwin ABI. + +2004-07-13 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/fp.h: Rename jit_exti_d to jit_extr_i_d. + +2004-07-13 Paolo Bonzini <bonzini@gnu.org> + + * lightning/ppc/core.h: Fix thinko. + + * lightning/i386/core.h: Fix jit_lti_ui. + * lightning/core-common.h: Add missing macros. + + * lightning/ppc/fp.h: Rename jit_neg_* to jit_negr_*. + * lightning/i386/fp.h: Rename jit_neg_* to jit_negr_*. + * lightning/sparc/fp.h: Rename jit_neg_* to jit_negr_*. + * lightning/fp-common.h: Rename jit_neg_* to jit_negr_*. + * doc/porting.texi: Add undocumented macros. + +2004-07-12 Paolo Bonzini <bonzini@gnu.org> + + * doc/porting.texi: Add missing macros. + +2004-07-12 Paolo Bonzini <bonzini@gnu.org> + + * lightning/ppc/funcs.h: Don't generate trampolines. + Separate prolog and epilog generation. + * lightning/ppc/core.h: Generate epilog explicitly. + Don't reserve r31 anymore. + * lightning/core-common.h: Remove call to jit_setup_code. + +2004-07-09 Paolo Bonzini <bonzini@gnu.org> + + * lightning/lightning.h.in: Avoid preprocessor warnings. + * lightning/lightning-inst.h: Likewise. + + * lightning/i386/core.h: Define JIT_R, JIT_R_NUM, JIT_V, + JIT_V_NUM. + * lightning/ppc/core.h: Likewise. + * lightning/sparc/core.h: Likewise. + * lightning/i386/fp.h: Define JIT_FPR, JIT_FPR_NUM. + * lightning/ppc/fp.h: Likewise. + * lightning/sparc/fp.h: Likewise. + * lightning/core-common.h: Define fixed register names. + * lightning/fp-common.h: Likewise for FP regs. + +2004-07-09 Paolo Bonzini <bonzini@gnu.org> + + * lightning/ppc/funcs.h: Fix location where return address + is stored. + * lightning/i386/asm.h: Add a trailing _ to opcodes without + any parameter. + * lightning/i386/core.h: Adjust for the above. + +2004-04-15 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/fp.h: Change "and" to "_and" + to satisfy C++ compilers. + +2004-04-14 Paolo Bonzini <bonzini@gnu.org> + + * lightning/sparc/fp.h: Use memcpy to implement jit_movi. + * lightning/ppc/fp.h: Use memcpy to implement jit_movi. + Move floating-point opcodes... + * lightning/ppc/asm.h: ... here. + +2004-04-14 Paolo Bonzini <bonzini@gnu.org> + + * lightning/core-common.h: Add jit_finishr. + * lightning/ppc/core.h: Add jit_callr and jit_finishr. + * lightning/i386/core.h: Add jit_callr. + * lightning/sparc/core.h: Add jit_callr. Fix typo. + +2004-04-14 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core.h: Fix pasto in jit_b*_ui. + +2004-03-30 Laurent Michel + + * lightning/ppc: Implement PowerPC floating point + (ChangeLog entry missing). + +2004-03-12 Paolo Bonzini <bonzini@gnu.org> + + * lightning/fp-common.h: Load/store macros are not the + same for floats and doubles anywhere, but jit_retval may be. + * lightning/i386/asm.h: Fix = mistaken for == in ESCrri. + * lightning/i386/core.h: Fix typo in jit_prepare_[fd]. + * lightning/i386/fp.h: Rewritten. + * tests/testfp.c: Add tests for unordered comparisons. + * tests/testfp.ok: Add results. + +2004-03-15 Paolo Bonzini <bonzini@gnu.org> + + Merge changes from Laurent Michel. + + * lightning/asm-common.h: Add _jit_I_noinc. + * lightning/core-common.h: Support jit_init, + jit_setup_code, jit_patch_at. Return patchable IP from + jit_movi_p. + * lightning/funcs-common.h: Provide defaults + for jit_setup_code, jit_start_pfx, jit_end_pfx + * lightning/i386/core.h: Add jit_patch_at, jit_patch_movi. + * lightning/ppc/core.h: Likewise. + * lightning/sparc/core.h: Likewise. + * lightning/ppc/asm.h: Fix generation of branch destination + displacements in _FB and _BB + * lightning/ppc/core.h: Generate trampolines in the user + area. + * lightning/ppc/funcs.h: Add a few casts. + * tests/bc.c: New testcase. + + * lightning/i386/asm.h: Wrap into #ifndef LIGHTNING_DEBUG. + * lightning/ppc/asm.h: Wrap into #ifndef LIGHTNING_DEBUG. + * lightning/sparc/asm.h: Wrap into #ifndef LIGHTNING_DEBUG. + + +2004-03-09 Paolo Bonzini <bonzini@gnu.org> + + * lightning/sparc/fp.h: Rewrite. Move macros for + FP code generation... + * lightning/sparc/asm.h: ... here. + * lightning/sparc/core.h: Rename jit_prepare to + jit_prepare_i, jit_retval to jit_retval_i. + * lightning/ppc/core.h: Rename jit_prepare to + jit_prepare_i, jit_retval to jit_retval_i. + * lightning/i386/core.h: Rename jit_prepare to + jit_prepare_i, jit_retval to jit_retval_i. + * lightning/core-common.h: Provide backwards + compatible synonyms for the above. + * lightning/fp-common.h: Rewrite. + * lightning-inst.h: Include fp unconditionally. + * lightning.h.in: Include fp unconditionally. + * tests/Makefile.am: Enable fp tests. + * tests/fib.c: Use jit_retval_i. + * tests/fibit.c: Cast codeBuffer to char *. + * tests/funcfp.c: Use new fp macros. + * tests/printf.c: Use jit_retval_i. + * tests/rpnfp.c: Use new fp macros. + * tests/testfp.c: Use new fp macros. + +2004-03-02 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core.h: generate correct code when + doing lt/le/ge/etc. on ESI and EDI. Use MOVZX/MOVSX + where possible. + * lightning/i386/asm.h: Add macros for MOVZX/MOVSX. + Move macros for x87 here, and add many of them. + * lightning/i386/fp.h: Use new macros for x87. + +2004-02-06 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core.h: avoid generating MOV reg, reg. + * lightning/sparc/core.h: fix several bugs. + * lightning/ppc/core.h: fix several bugs. + * tests/rpn.c: rewritten. + +2004-01-08 Paolo Bonzini <bonzini@gnu.org> + + * tests/rpnfp.c: new example, suggested by Basile + Starynkevitch. + * tests/rpnfp.ok: new example. + +2003-12-12 Paolo Bonzini <bonzini@gnu.org> + + * tests/add.c: new test, suggested by Steve Dekorte. + * tests/add.c: new test. + +2003-11-14 Paolo Bonzini <bonzini@gnu.org> + John Redford <eirenik@hotmail.com> + + * lightning/asm-common.h: change the 'pc' field of _jit to + be a union of various data types, because ISO C99 doesn't + permit using ++ on a = cast. Change the incremented casts of + _jit.pc to be _jit.x.uc_pc, _jit.x.us_pc, etc. + * all files: change all non-cast instances of _jit.pc to be + _jit.x.pc. + * lightning/i386/core.h: remove casts from jit_might. + +2003-05-25 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core.h: use JITSORRY in jit_replace + * lightning/asm-common.h: define JITSORRY + +2003-05-14 Paolo Bonzini <bonzini@gnu.org> + + * lightning/i386/core.h: fix missing comma in several + load/store macros. + * lightning/core-common.h: fix long/unsigned long/pointer + jit_pushr/jit_popr. + * lightning/ppc/funcs.h: correctly align stack pointer + +No changelogs for the assemblers (lightning directory) until 1.0 + +2003-03-27 Paolo Bonzini <bonzini@gnu.org> + + * tests/printf2.c: new test + +2001-05-03 Paolo Bonzini <bonzini@gnu.org> + + * tests/printf.c: made the message platform independent + +2001-01-19 Paolo Bonzini <bonzini@gnu.org> + + * configure.in: support cross-assembling + + * disass/bfd.h, disass/dis-asm.h, disass/dis-buf.c, + disass/i386-dis.c, disass/i386.h, disass/ppc-dis.c, + disass/ppc.h, disass/ppc-opc.c, disass/sparc-dis.c, + disass/sparc.h, disass/sparc-opc.c: new files, from GDB + + * disass/disass.c, disass/Makefile.am: new files + + * tests/fib.c, tests/fibit.c, tests/incr.c, tests/printf.c, + tests/rpn.c, tests/testfp.c, tests/Makefile.am: support + disassembling diff --git a/deps/lightening/NEWS b/deps/lightening/NEWS new file mode 100644 index 0000000..f56dd79 --- /dev/null +++ b/deps/lightening/NEWS @@ -0,0 +1,199 @@ +NEWS FROM 1.99 TO 1.99a + +o Lightning now builds and pass all test cases on AIX 7.1 powerpc, + HP-UX 11iv2 hppa, HP-UX 11iv3 ia64, Solaris 10 Sparc, Solaris 11 + x86_64, and Irix 6.5.30 mips (using n32 abi). + +NEWS FROM VERSION 1.3 TO 1.99 + +o The 1.99 version is a major lightning redesign and an + alpha version. + +o Unless for some special power users usage, the major + difference in the rework is that now function calls push + arguments from left to right, what is both, more natural for + programers, and also more natural to implement for architectures + that pass arguments in registers and have alignment constraints, + usually for 64 bit double arguments. + +o Add mips backend, implementing the o32 abi. + +o Added arm backend implementing all combinations of software float, + vfp, neon, arm and thumb instruction sets, softfp and hardp abis, + armv5, armv6, and armv7. + +o Added sse2+ code generation for the 32 bit x86 backend. + +o Added sse3 and sse4.x optional code generation for the 64 bit + x86 backend, code generation based on detected cpu. + +o Reworked and added full lightning instruction set to ppc 32; + tested on ppc64 hardware and Darwin 32 operating system. + +o Added ppc64 backend, built and tested on Fedora ppc. + +o Reworked the sparc backend, built and tested on Debian sparc. + +o Added an ia64 backend, built and tested on Debian ia64. + +o Added an hppa backend, built and tested on Debian hppa. + +--- + +NEWS FROM VERSION 1.2 TO 1.3 + +o Initial support for x86-64 back-end (mostly untested). + +o lightning is more strict on casts from integer to pointer. + Be sure to use the _p variants when your immediates are + of pointer type. This was done to ease 64-bit cleanliness + tests. + +o Many bug fixes. + +o JIT_FPRET is used as JIT_RET to move return values. + jit_retval_[fd] is used to retrieve return values. + +o jit_pushr/jit_popr are deprecated, you need to #define + JIT_NEED_PUSH_POP prior to including lightning.h if you + want to use them. + +o Support for stack-allocated variables. Because of this, + backends defining JIT_FP should now rename it to JIT_AP. + JIT_FP is now a user-visible register used in ldxi/ldxr + to access stack-allocated variables. + + +--- + +NEWS FROM VERSION 1.1.2 TO 1.2 + +o Floating-point interface rewritten, uses a register file + architecture rather than a stack. + +o Many bug fixes. + +o jit_prepare and jit_retval are now jit_prepare_i and + jit_retval_i. + +o Support for Fedora Core 1's exec-shield feature. + +o PPC supports both SysV and Darwin ABIs. + +o More (and more complete) examples provided. + +--- + +NEWS FROM VERSION 1.1.1 TO 1.1.2 + +o This release fixes the bugs in PowerPC cache flushing and in + SPARC testing. + +--- + +NEWS FROM VERSION 1.1 TO 1.1.1 + +o Merge changes from Debian + +This version was released to have a distributable version of lightning +after the recent crack of the GNU FTP machines. It does not fix +outstanding bugs; I apologize for the inconvenience. + +--- + +NEWS FROM VERSION 1.0 TO 1.1 + +o Several bug fixes + +o improved infrastructure for embedding GNU lightning (lightningize + script) + +--- + +NEWS FROM VERSION 0.99 TO 1.0 + +o SPARC backend tested on GNU Smalltalk + + +--- + +NEWS FROM VERSION 0.98 TO 0.99 + +o Added floating point function support (thanks to Laurent Michel); + unfortunately this broke even more the PPC and SPARC floating point + stuff :-( + +--- + +NEWS FROM VERSION 0.97 to 0.98 + +o PPC backend tested on GNU Smalltalk + +o switched to autoconf 2.50 + +o new (much faster) PPC cache flushing code by John McIntosh + +--- + +NEWS FROM VERSION 0.96 to 0.97 + +o support for cross-assembling and for disassembling the code that the tests + generate + +o PPC microtests pass (tested directly by me), SPARC was said to work + +--- + +NEWS FROM VERSION 0.95 to 0.96 + +o fixed implementation of delay slots to be coherent with the manual + +--- + +NEWS FROM VERSION 0.94 to 0.95 + +o adc/sbc replaced with addc/addx/subc/subx to allow for more optimization + (inspired by the PPC instruction set). + +o A few fixes and much less warnings from the compiler + +o Automake-ized everything + +o i386 backend generates smaller code for bms/bmc/or/xor by using byte + or word versions if possible + +o Moved backends to separate directories + +--- + +NEWS FROM VERSION 0.93 to 0.94 + +o Manual builds as DVI file. + +--- + +NEWS FROM VERSION 0.92 to 0.93 + +o Floating-point front-end (began supporting PPC & SPARC). + +--- + +NEWS FROM VERSION 0.91 to 0.92 + +o Floating-point front-end (only x86 supported). + +--- + +NEWS FROM VERSION 0.9 to 0.91 + +o Carrying supported in addition/subtraction. + +o insn type changed to jit_insn. + +o Misc bug fixes. + +o Reentrancy supported. + +o SPARC run-time assembler rewritten. + +o The run-time assembler can be disabled for debugging purposes. diff --git a/deps/lightening/README.md b/deps/lightening/README.md new file mode 100644 index 0000000..515c3ee --- /dev/null +++ b/deps/lightening/README.md @@ -0,0 +1,57 @@ +# Lightening + +Lightening is a just-in-time code generation library derived from GNU +Lightning, adapted to the purposes of the GNU Guile project. + +## Use + +``` +gcc -flto -O2 -g -o lightening.o -c lightening/lightening.c +gcc -flto -O2 -g -o my-program lightening.o my-program.c +``` + +See the GNU Lightning manual for more on how to program against +Lightening (much of the details are the same). + +## What's the difference with GNU Lightning? + +This project is called Lightening because it's lighter-weight than GNU +Lightning. When you go to generate code at run-time with GNU Lightning, +what happens is that you build up a graph of nodes which GNU Lightning +"optimizes" before finally emitting machine code. These optimizations +can improve register allocation around call sites. However they are not +helpful from a Guile perspective, as they get in the way of register +allocation that we need to do; and they actually prevent access to all +the registers that we would like to have. + +Guile needs a simple, light-weight code generation library. The GNU +Lightning architecture-specific backends provide the bulk of this +functionality, and Lightening wraps it all in a lightweight API. + +## Supported targets + +Lightening can generate code for the x86-64, i686, ARMv7, and AArch64 +architectures. It supports the calling conventions of MS Windows, +GNU/Linux, and Mac OS. + +On i686, Lightening requires SSE support. On ARMv7, we require hardware +floating-point support (the VFP instructions), as well as the UDIV/SDIV +instructions. + +Lightening is automatically tested using GitLab's continuous integration +for under the supported architectures, for GNU/Linux; for a list of +recent jobs, see [the CI +page](https://gitlab.com/wingo/lightening/-/jobs). + +## Future targets + +Lightening has some inherited code from GNU Lightning for MIPS, PPC64, +and s390. Patches to adapt this code to the Lightening code structure +are quite welcome. + +RISC-V support would be fun too. + +## Status + +Lightening is used in GNU Guile since version 2.9.2 and seems to work +well. diff --git a/deps/lightening/THANKS b/deps/lightening/THANKS new file mode 100644 index 0000000..42bbfc6 --- /dev/null +++ b/deps/lightening/THANKS @@ -0,0 +1,19 @@ +Thanks to all the following people for their help in +improving GNU lightning: + +Paolo Bonzini <bonzini@gnu.org> +Eli Barzilay <eli@barzilay.org> +Ludovic Courtes <ludo@chbouib.org> +Matthew Flatt <mflatt@cs.utah.edu> +Laurent Michel <ldm@thorgal.homelinux.org> +Paulo Cesar Pereira de Andrade <pcpa@gnu.org> +Mike Spivey <mike@comlab.ox.ac.uk> +Basile Starynkevitch <basile@starynkevitch.net> +Sam Steingold <sds@gnu.org> +Jens Troeger <savage@light-speed.de> +Tom Tromey <tromey@redhat.com> +Trent Nelson <trent@snakebite.org> +Vitaly Magerya <vmagerya@gmail.com> +Brandon Invergo <brandon@gnu.org> +Holger Hans Peter Freyther <holger@moiji-mobile.com> +Jon Arintok <jon.arintok@gmail.com> diff --git a/deps/lightening/lightening.am b/deps/lightening/lightening.am new file mode 100644 index 0000000..2c9089e --- /dev/null +++ b/deps/lightening/lightening.am @@ -0,0 +1,58 @@ +# Copyright 2019 Free Software Foundation, Inc. +# +# This file is part of Lightening. +# +# Lightening is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# Lightening is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. +# + +lightening = $(srcdir)/lightening + +lightening_c_files = \ + $(lightening)/lightening/lightening.c + +lightening_extra_files = \ + $(lightening)/AUTHORS \ + $(lightening)/ChangeLog \ + $(lightening)/ChangeLog.lightning \ + $(lightening)/COPYING \ + $(lightening)/COPYING.DOC \ + $(lightening)/COPYING.LESSER \ + $(lightening)/lightening.am \ + $(lightening)/lightning.texi \ + $(lightening)/NEWS \ + $(lightening)/README.md \ + $(lightening)/THANKS \ + \ + $(lightening)/lightening.h \ + \ + $(lightening)/lightening/endian.h \ + \ + $(lightening)/lightening/aarch64.h \ + $(lightening)/lightening/arm.h \ + $(lightening)/lightening/mips.h \ + $(lightening)/lightening/ppc.h \ + $(lightening)/lightening/x86.h \ + \ + $(lightening)/lightening/aarch64.c \ + $(lightening)/lightening/aarch64-cpu.c \ + $(lightening)/lightening/aarch64-fpu.c \ + $(lightening)/lightening/arm.c \ + $(lightening)/lightening/arm-cpu.c \ + $(lightening)/lightening/arm-vfp.c \ + $(lightening)/lightening/mips.c \ + $(lightening)/lightening/mips-cpu.c \ + $(lightening)/lightening/mips-fpu.c \ + $(lightening)/lightening/ppc.c \ + $(lightening)/lightening/ppc-cpu.c \ + $(lightening)/lightening/ppc-fpu.c \ + $(lightening)/lightening/x86.c \ + $(lightening)/lightening/x86-cpu.c \ + $(lightening)/lightening/x86-sse.c diff --git a/deps/lightening/lightening.h b/deps/lightening/lightening.h new file mode 100644 index 0000000..309e350 --- /dev/null +++ b/deps/lightening/lightening.h @@ -0,0 +1,715 @@ +/* + * Copyright (C) 2012-2020 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + * Andy Wingo + */ + +#ifndef _jit_h +#define _jit_h + +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <stddef.h> + +#include "lightening/endian.h" + +CHOOSE_32_64(typedef int32_t jit_word_t, + typedef int64_t jit_word_t); +CHOOSE_32_64(typedef uint32_t jit_uword_t, + typedef uint64_t jit_uword_t); +typedef float jit_float32_t; +typedef double jit_float64_t; +typedef void* jit_pointer_t; +typedef int jit_bool_t; + +typedef void* jit_addr_t; +typedef ptrdiff_t jit_off_t; +typedef intptr_t jit_imm_t; +typedef uintptr_t jit_uimm_t; + +typedef struct jit_gpr { uint8_t regno; } jit_gpr_t; +typedef struct jit_fpr { uint8_t regno; } jit_fpr_t; + +// Precondition: regno between 0 and 63, inclusive. +#define JIT_GPR(regno) ((jit_gpr_t) { regno }) +#define JIT_FPR(regno) ((jit_fpr_t) { regno }) + +static inline uint8_t jit_gpr_regno (jit_gpr_t reg) { return reg.regno; } +static inline uint8_t jit_fpr_regno (jit_fpr_t reg) { return reg.regno; } + +static inline jit_bool_t +jit_same_gprs (jit_gpr_t a, jit_gpr_t b) +{ + return jit_gpr_regno (a) == jit_gpr_regno (b); +} + +static inline jit_bool_t +jit_same_fprs (jit_fpr_t a, jit_fpr_t b) +{ + return jit_fpr_regno (a) == jit_fpr_regno (b); +} + +#if defined(__i386__) || defined(__x86_64__) +# include "lightening/x86.h" +#elif defined(__mips__) +# include "lightening/mips.h" +#elif defined(__arm__) +# include "lightening/arm.h" +#elif defined(__ppc__) || defined(__powerpc__) +# include "lightening/ppc.h" +#elif defined(__aarch64__) +# include "lightening/aarch64.h" +#elif defined(__s390__) || defined(__s390x__) +# include "lightening/s390.h" +#endif + +#ifndef JIT_EXTRA_SPACE +#define JIT_EXTRA_SPACE 0 +#endif + +#ifndef JIT_JMP_MAX_SIZE +#define JIT_JMP_MAX_SIZE sizeof(uint32_t) +#endif + +#ifndef JIT_LITERAL_MAX_SIZE +#define JIT_LITERAL_MAX_SIZE (sizeof(uintptr_t) * 2) +#endif + +#ifndef JIT_INST_MAX_SIZE +#define JIT_INST_MAX_SIZE sizeof(uint32_t) +#endif + +#ifndef JIT_CALL_STACK_ALIGN_WORD +#define JIT_CALL_STACK_ALIGN_WORD 1 +#endif + +enum jit_reloc_kind +{ + JIT_RELOC_ABSOLUTE, + JIT_RELOC_REL8, + JIT_RELOC_REL16, + JIT_RELOC_REL32, + JIT_RELOC_REL64, +#ifdef JIT_NEEDS_LITERAL_POOL + JIT_RELOC_JMP_WITH_VENEER, + JIT_RELOC_JCC_WITH_VENEER, + JIT_RELOC_LOAD_FROM_POOL, +#endif +#ifdef JIT_USE_IMMEDIATE_RELOC + JIT_RELOC_IMMEDIATE, +#endif + JIT_RELOC_MASK = 15, + JIT_RELOC_FLAG_0 = 16, +}; + +typedef struct jit_reloc +{ + uint8_t kind; + uint8_t inst_start_offset; + uint8_t pc_base_offset; + uint8_t rsh; + uint32_t offset; +} jit_reloc_t; + +#if defined(__GNUC__) && (__GNUC__ >= 4) +# define JIT_API extern __attribute__ ((__visibility__("hidden"))) +#else +# define JIT_API extern +#endif + +typedef struct jit_state jit_state_t; + +enum jit_operand_abi +{ + JIT_OPERAND_ABI_UINT8, + JIT_OPERAND_ABI_INT8, + JIT_OPERAND_ABI_UINT16, + JIT_OPERAND_ABI_INT16, + JIT_OPERAND_ABI_UINT32, + JIT_OPERAND_ABI_INT32, + JIT_OPERAND_ABI_UINT64, + JIT_OPERAND_ABI_INT64, + JIT_OPERAND_ABI_POINTER, + JIT_OPERAND_ABI_FLOAT, + JIT_OPERAND_ABI_DOUBLE, + JIT_OPERAND_ABI_WORD = CHOOSE_32_64(JIT_OPERAND_ABI_INT32, + JIT_OPERAND_ABI_INT64) +}; + +enum jit_operand_kind +{ + JIT_OPERAND_KIND_IMM, + JIT_OPERAND_KIND_GPR, + JIT_OPERAND_KIND_FPR, + JIT_OPERAND_KIND_MEM, +#ifdef JIT_PASS_DOUBLES_IN_GPR_PAIRS + JIT_OPERAND_KIND_GPR_PAIR, +#endif +}; + +typedef struct jit_operand +{ + enum jit_operand_abi abi; + enum jit_operand_kind kind; + union + { + intptr_t imm; + struct { jit_gpr_t gpr; ptrdiff_t addend; } gpr; + struct { jit_fpr_t fpr; +#if JIT_PASS_FLOATS_IN_GPRS + jit_gpr_t gpr; +#endif + } fpr; + struct { jit_gpr_t base; ptrdiff_t offset; ptrdiff_t addend; } mem; +#if JIT_PASS_DOUBLES_IN_GPR_PAIRS + struct { jit_gpr_t l; jit_gpr_t h; } gpr_pair; +#endif + } loc; +} jit_operand_t; + +static inline jit_operand_t +jit_operand_imm (enum jit_operand_abi abi, jit_imm_t imm) +{ + return (jit_operand_t){ abi, JIT_OPERAND_KIND_IMM, { .imm = imm } }; +} + +static inline jit_operand_t +jit_operand_gpr_with_addend (enum jit_operand_abi abi, jit_gpr_t gpr, + ptrdiff_t addend) +{ + return (jit_operand_t){ abi, JIT_OPERAND_KIND_GPR, + { .gpr = { gpr, addend } } }; +} + +static inline jit_operand_t +jit_operand_gpr (enum jit_operand_abi abi, jit_gpr_t gpr) +{ + return jit_operand_gpr_with_addend (abi, gpr, 0); +} + +static inline jit_operand_t +jit_operand_fpr (enum jit_operand_abi abi, jit_fpr_t fpr) +{ + return (jit_operand_t){ abi, JIT_OPERAND_KIND_FPR, { .fpr = { fpr } } }; +} + +static inline jit_operand_t +jit_operand_mem_with_addend (enum jit_operand_abi abi, jit_gpr_t base, + ptrdiff_t offset, ptrdiff_t addend) +{ + return (jit_operand_t){ abi, JIT_OPERAND_KIND_MEM, + { .mem = { base, offset, addend } } }; +} + +static inline jit_operand_t +jit_operand_mem (enum jit_operand_abi abi, jit_gpr_t base, ptrdiff_t offset) +{ + return jit_operand_mem_with_addend (abi, base, offset, 0); +} + +#ifdef JIT_PASS_DOUBLES_IN_GPR_PAIRS +static inline jit_operand_t +jit_operand_gpr_pair(enum jit_operand_abi abi, jit_gpr_t l, jit_gpr_t h) +{ + return (jit_operand_t){abi, JIT_OPERAND_KIND_GPR_PAIR, + { .gpr_pair = { l, h } } }; +} +#endif + +static inline jit_operand_t +jit_operand_addi (jit_operand_t op, ptrdiff_t addend) +{ + switch (op.kind) { + case JIT_OPERAND_KIND_GPR: + return jit_operand_gpr_with_addend (op.abi, op.loc.gpr.gpr, + op.loc.gpr.addend + addend); + case JIT_OPERAND_KIND_MEM: + return jit_operand_mem_with_addend (op.abi, op.loc.mem.base, + op.loc.mem.offset, + op.loc.mem.addend + addend); + default: + abort (); + } +} + +JIT_API jit_bool_t init_jit(void); + +JIT_API jit_state_t *jit_new_state(void* (*alloc_fn)(size_t), + void (*free_fn)(void*)); +JIT_API void jit_destroy_state(jit_state_t*); + +JIT_API void jit_begin(jit_state_t*, uint8_t*, size_t); +JIT_API jit_bool_t jit_has_overflow(jit_state_t*); +JIT_API void jit_reset(jit_state_t*); +JIT_API void* jit_end(jit_state_t*, size_t*); + +JIT_API void jit_align(jit_state_t*, unsigned); + +JIT_API jit_pointer_t jit_address(jit_state_t*); +typedef void (*jit_function_pointer_t)(); +JIT_API jit_function_pointer_t jit_address_to_function_pointer(jit_pointer_t); +JIT_API void jit_patch_here(jit_state_t*, jit_reloc_t); +JIT_API void jit_patch_there(jit_state_t*, jit_reloc_t, jit_pointer_t); + +JIT_API void jit_move_operands (jit_state_t *_jit, jit_operand_t *dst, + jit_operand_t *src, size_t argc); + +JIT_API size_t jit_align_stack (jit_state_t *_jit, size_t expand); +JIT_API void jit_shrink_stack (jit_state_t *_jit, size_t diff); + +JIT_API size_t jit_enter_jit_abi (jit_state_t *_jit, + size_t v, size_t vf, size_t frame_size); +JIT_API void jit_leave_jit_abi (jit_state_t *_jit, + size_t v, size_t vf, size_t frame_size); + +/* Note that all functions that take jit_operand_t args[] use the args + as scratch space while shuffling values into position. */ +JIT_API void jit_calli(jit_state_t *, jit_pointer_t f, + size_t argc, jit_operand_t args[]); +JIT_API void jit_callr(jit_state_t *, jit_gpr_t f, + size_t argc, jit_operand_t args[]); +JIT_API void jit_locate_args(jit_state_t*, size_t argc, jit_operand_t args[]); +JIT_API void jit_load_args(jit_state_t*, size_t argc, jit_operand_t dst[]); + +static inline void +jit_calli_0(jit_state_t *_jit, jit_pointer_t f) +{ + return jit_calli(_jit, f, 0, NULL); +} + +static inline void +jit_calli_1(jit_state_t *_jit, jit_pointer_t f, jit_operand_t arg) +{ + jit_operand_t args[] = { arg }; + return jit_calli(_jit, f, 1, args); +} + +static inline void +jit_calli_2(jit_state_t *_jit, jit_pointer_t f, jit_operand_t a, + jit_operand_t b) +{ + jit_operand_t args[] = { a, b }; + return jit_calli(_jit, f, 2, args); +} + +static inline void +jit_calli_3(jit_state_t *_jit, jit_pointer_t f, jit_operand_t a, + jit_operand_t b, jit_operand_t c) +{ + jit_operand_t args[] = { a, b, c }; + return jit_calli(_jit, f, 3, args); +} + +static inline void +jit_callr_0(jit_state_t *_jit, jit_gpr_t f) +{ + return jit_callr(_jit, f, 0, NULL); +} + +static inline void +jit_callr_1(jit_state_t *_jit, jit_gpr_t f, jit_operand_t arg) +{ + jit_operand_t args[] = { arg }; + return jit_callr(_jit, f, 1, args); +} + +static inline void +jit_callr_2(jit_state_t *_jit, jit_gpr_t f, jit_operand_t a, jit_operand_t b) +{ + jit_operand_t args[] = { a, b }; + return jit_callr(_jit, f, 2, args); +} + +static inline void +jit_callr_3(jit_state_t *_jit, jit_gpr_t f, jit_operand_t a, jit_operand_t b, + jit_operand_t c) +{ + jit_operand_t args[] = { a, b, c }; + return jit_callr(_jit, f, 3, args); +} + +static inline void +jit_load_args_1(jit_state_t *_jit, jit_operand_t a) +{ + jit_operand_t args[] = { a }; + return jit_load_args(_jit, 1, args); +} + +static inline void +jit_load_args_2(jit_state_t *_jit, jit_operand_t a, jit_operand_t b) +{ + jit_operand_t args[] = { a, b }; + return jit_load_args(_jit, 2, args); +} + +static inline void +jit_load_args_3(jit_state_t *_jit, jit_operand_t a, jit_operand_t b, + jit_operand_t c) +{ + jit_operand_t args[] = { a, b, c }; + return jit_load_args(_jit, 3, args); +} + +#define JIT_PROTO_0(stem, ret) \ + ret jit_##stem (jit_state_t* _jit) +#define JIT_PROTO_1(stem, ret, ta) \ + ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a) +#define JIT_PROTO_2(stem, ret, ta, tb) \ + ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b) +#define JIT_PROTO_3(stem, ret, ta, tb, tc) \ + ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, jit_##tc##_t c) +#define JIT_PROTO_4(stem, ret, ta, tb, tc, td) \ + ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, jit_##tc##_t c, jit_##td##_t d) + +#define JIT_PROTO_RFF__(stem) JIT_PROTO_2(stem, jit_reloc_t, fpr, fpr) +#define JIT_PROTO_RGG__(stem) JIT_PROTO_2(stem, jit_reloc_t, gpr, gpr) +#define JIT_PROTO_RG___(stem) JIT_PROTO_1(stem, jit_reloc_t, gpr) +#define JIT_PROTO_RGi__(stem) JIT_PROTO_2(stem, jit_reloc_t, gpr, imm) +#define JIT_PROTO_RGu__(stem) JIT_PROTO_2(stem, jit_reloc_t, gpr, uimm) +#define JIT_PROTO_R____(stem) JIT_PROTO_0(stem, jit_reloc_t) +#define JIT_PROTO__FFF_(stem) JIT_PROTO_3(stem, void, fpr, fpr, fpr) +#define JIT_PROTO__FF__(stem) JIT_PROTO_2(stem, void, fpr, fpr) +#define JIT_PROTO__FGG_(stem) JIT_PROTO_3(stem, void, fpr, gpr, gpr) +#define JIT_PROTO__FG__(stem) JIT_PROTO_2(stem, void, fpr, gpr) +#define JIT_PROTO__FGo_(stem) JIT_PROTO_3(stem, void, fpr, gpr, off) +#define JIT_PROTO__F___(stem) JIT_PROTO_1(stem, void, fpr) +#define JIT_PROTO__Fd__(stem) JIT_PROTO_2(stem, void, fpr, float64) +#define JIT_PROTO__Ff__(stem) JIT_PROTO_2(stem, void, fpr, float32) +#define JIT_PROTO__Fp__(stem) JIT_PROTO_2(stem, void, fpr, pointer) +#define JIT_PROTO__GF__(stem) JIT_PROTO_2(stem, void, gpr, fpr) +#define JIT_PROTO__GGF_(stem) JIT_PROTO_3(stem, void, gpr, gpr, fpr) +#define JIT_PROTO__GGGG(stem) JIT_PROTO_4(stem, void, gpr, gpr, gpr, gpr) +#define JIT_PROTO__GGG_(stem) JIT_PROTO_3(stem, void, gpr, gpr, gpr) +#define JIT_PROTO__GGGi(stem) JIT_PROTO_4(stem, void, gpr, gpr, gpr, imm) +#define JIT_PROTO__GGGu(stem) JIT_PROTO_4(stem, void, gpr, gpr, gpr, uimm) +#define JIT_PROTO__GG__(stem) JIT_PROTO_2(stem, void, gpr, gpr) +#define JIT_PROTO__GGi_(stem) JIT_PROTO_3(stem, void, gpr, gpr, imm) +#define JIT_PROTO__GGo_(stem) JIT_PROTO_3(stem, void, gpr, gpr, off) +#define JIT_PROTO__GGu_(stem) JIT_PROTO_3(stem, void, gpr, gpr, uimm) +#define JIT_PROTO__G___(stem) JIT_PROTO_1(stem, void, gpr) +#define JIT_PROTO__Gi__(stem) JIT_PROTO_2(stem, void, gpr, imm) +#define JIT_PROTO__Gp__(stem) JIT_PROTO_2(stem, void, gpr, pointer) +#define JIT_PROTO______(stem) JIT_PROTO_0(stem, void) +#define JIT_PROTO__i___(stem) JIT_PROTO_1(stem, void, imm) +#define JIT_PROTO__oGF_(stem) JIT_PROTO_3(stem, void, off, gpr, fpr) +#define JIT_PROTO__oGG_(stem) JIT_PROTO_3(stem, void, off, gpr, gpr) +#define JIT_PROTO__pF__(stem) JIT_PROTO_2(stem, void, pointer, fpr) +#define JIT_PROTO__pG__(stem) JIT_PROTO_2(stem, void, pointer, gpr) +#define JIT_PROTO__p___(stem) JIT_PROTO_1(stem, void, pointer) + +#define FOR_EACH_INSTRUCTION(M) \ + M(_GGG_, addr) \ + M(_FFF_, addr_f) \ + M(_FFF_, addr_d) \ + M(_GGi_, addi) \ + M(_GGG_, addcr) \ + M(_GGi_, addci) \ + M(_GGG_, addxr) \ + M(_GGi_, addxi) \ + M(_GGG_, subr) \ + M(_FFF_, subr_f) \ + M(_FFF_, subr_d) \ + M(_GGi_, subi) \ + M(_GGG_, subcr) \ + M(_GGi_, subci) \ + M(_GGG_, subxr) \ + M(_GGi_, subxi) \ + M(_GGG_, mulr) \ + M(_FFF_, mulr_f) \ + M(_FFF_, mulr_d) \ + M(_GGi_, muli) \ + M(_GGGG, qmulr) \ + M(_GGGi, qmuli) \ + M(_GGGG, qmulr_u) \ + M(_GGGu, qmuli_u) \ + M(_GGG_, divr) \ + M(_FFF_, divr_f) \ + M(_FFF_, divr_d) \ + M(_GGi_, divi) \ + M(_GGG_, divr_u) \ + M(_GGu_, divi_u) \ + M(_GGGG, qdivr) \ + M(_GGGi, qdivi) \ + M(_GGGG, qdivr_u) \ + M(_GGGu, qdivi_u) \ + M(_GGG_, remr) \ + M(_GGi_, remi) \ + M(_GGG_, remr_u) \ + M(_GGu_, remi_u) \ + \ + M(_GGG_, andr) \ + M(_GGu_, andi) \ + M(_GGG_, orr) \ + M(_GGu_, ori) \ + M(_GGG_, xorr) \ + M(_GGu_, xori) \ + \ + M(_GGG_, lshr) \ + M(_GGu_, lshi) \ + M(_GGG_, rshr) \ + M(_GGu_, rshi) \ + M(_GGG_, rshr_u) \ + M(_GGu_, rshi_u) \ + \ + M(_GG__, negr) \ + M(_GG__, comr) \ + \ + M(_GG__, movr) \ + M(_Gi__, movi) \ + M(RG___, mov_addr) \ + M(_GG__, extr_c) \ + M(_GG__, extr_uc) \ + M(_GG__, extr_s) \ + M(_GG__, extr_us) \ + WHEN_64(M(_GG__, extr_i)) \ + WHEN_64(M(_GG__, extr_ui)) \ + \ + M(_GG__, bswapr_us) \ + M(_GG__, bswapr_ui) \ + WHEN_64(M(_GG__, bswapr_ul)) \ + \ + M(_GG__, ldr_c) \ + M(_Gp__, ldi_c) \ + M(_GG__, ldr_uc) \ + M(_Gp__, ldi_uc) \ + M(_GG__, ldr_s) \ + M(_Gp__, ldi_s) \ + M(_GG__, ldr_us) \ + M(_Gp__, ldi_us) \ + M(_GG__, ldr_i) \ + M(_Gp__, ldi_i) \ + WHEN_64(M(_GG__, ldr_ui)) \ + WHEN_64(M(_Gp__, ldi_ui)) \ + WHEN_64(M(_GG__, ldr_l)) \ + WHEN_64(M(_Gp__, ldi_l)) \ + M(_FG__, ldr_f) \ + M(_Fp__, ldi_f) \ + M(_FG__, ldr_d) \ + M(_Fp__, ldi_d) \ + \ + M(_GGG_, ldxr_c) \ + M(_GGo_, ldxi_c) \ + M(_GGG_, ldxr_uc) \ + M(_GGo_, ldxi_uc) \ + M(_GGG_, ldxr_s) \ + M(_GGo_, ldxi_s) \ + M(_GGG_, ldxr_us) \ + M(_GGo_, ldxi_us) \ + M(_GGG_, ldxr_i) \ + M(_GGo_, ldxi_i) \ + WHEN_64(M(_GGG_, ldxr_ui)) \ + WHEN_64(M(_GGo_, ldxi_ui)) \ + WHEN_64(M(_GGG_, ldxr_l)) \ + WHEN_64(M(_GGo_, ldxi_l)) \ + M(_FGG_, ldxr_f) \ + M(_FGo_, ldxi_f) \ + M(_FGG_, ldxr_d) \ + M(_FGo_, ldxi_d) \ + \ + M(_GG__, ldr_atomic) \ + M(_GG__, str_atomic) \ + M(_GGG_, swap_atomic) \ + M(_GGGG, cas_atomic) \ + \ + M(_GG__, str_c) \ + M(_pG__, sti_c) \ + M(_GG__, str_s) \ + M(_pG__, sti_s) \ + M(_GG__, str_i) \ + M(_pG__, sti_i) \ + WHEN_64(M(_GG__, str_l)) \ + WHEN_64(M(_pG__, sti_l)) \ + M(_GF__, str_f) \ + M(_pF__, sti_f) \ + M(_GF__, str_d) \ + M(_pF__, sti_d) \ + \ + M(_GGG_, stxr_c) \ + M(_oGG_, stxi_c) \ + M(_GGG_, stxr_s) \ + M(_oGG_, stxi_s) \ + M(_GGG_, stxr_i) \ + M(_oGG_, stxi_i) \ + WHEN_64(M(_GGG_, stxr_l)) \ + WHEN_64(M(_oGG_, stxi_l)) \ + M(_GGF_, stxr_f) \ + M(_oGF_, stxi_f) \ + M(_GGF_, stxr_d) \ + M(_oGF_, stxi_d) \ + \ + M(RGG__, bltr) \ + M(RFF__, bltr_f) \ + M(RFF__, bltr_d) \ + M(RGi__, blti) \ + M(RGG__, bltr_u) \ + M(RGu__, blti_u) \ + M(RGG__, bler) \ + M(RFF__, bler_f) \ + M(RFF__, bler_d) \ + M(RGi__, blei) \ + M(RGG__, bler_u) \ + M(RGu__, blei_u) \ + M(RGG__, beqr) \ + M(RFF__, beqr_f) \ + M(RFF__, beqr_d) \ + M(RGi__, beqi) \ + M(RGG__, bger) \ + M(RFF__, bger_f) \ + M(RFF__, bger_d) \ + M(RGi__, bgei) \ + M(RGG__, bger_u) \ + M(RGu__, bgei_u) \ + M(RGG__, bgtr) \ + M(RFF__, bgtr_f) \ + M(RFF__, bgtr_d) \ + M(RGi__, bgti) \ + M(RGG__, bgtr_u) \ + M(RGu__, bgti_u) \ + M(RGG__, bner) \ + M(RFF__, bner_f) \ + M(RFF__, bner_d) \ + M(RGi__, bnei) \ + \ + M(RFF__, bunltr_f) \ + M(RFF__, bunltr_d) \ + M(RFF__, bunler_f) \ + M(RFF__, bunler_d) \ + M(RFF__, buneqr_f) \ + M(RFF__, buneqr_d) \ + M(RFF__, bunger_f) \ + M(RFF__, bunger_d) \ + M(RFF__, bungtr_f) \ + M(RFF__, bungtr_d) \ + M(RFF__, bltgtr_f) \ + M(RFF__, bltgtr_d) \ + M(RFF__, bordr_f) \ + M(RFF__, bordr_d) \ + M(RFF__, bunordr_f) \ + M(RFF__, bunordr_d) \ + \ + M(RGG__, bmsr) \ + M(RGu__, bmsi) \ + M(RGG__, bmcr) \ + M(RGu__, bmci) \ + \ + M(RGG__, boaddr) \ + M(RGi__, boaddi) \ + M(RGG__, boaddr_u) \ + M(RGu__, boaddi_u) \ + M(RGG__, bxaddr) \ + M(RGi__, bxaddi) \ + M(RGG__, bxaddr_u) \ + M(RGu__, bxaddi_u) \ + M(RGG__, bosubr) \ + M(RGi__, bosubi) \ + M(RGG__, bosubr_u) \ + M(RGu__, bosubi_u) \ + M(RGG__, bxsubr) \ + M(RGi__, bxsubi) \ + M(RGG__, bxsubr_u) \ + M(RGu__, bxsubi_u) \ + \ + M(_G___, jmpr) \ + M(_p___, jmpi) \ + M(R____, jmp) \ + \ + M(_p___, jmpi_with_link) \ + M(_____, pop_link_register) \ + M(_____, push_link_register) \ + \ + M(_____, ret) \ + M(_G___, retr) \ + M(_F___, retr_f) \ + M(_F___, retr_d) \ + M(_i___, reti) \ + M(_G___, retval_c) \ + M(_G___, retval_uc) \ + M(_G___, retval_s) \ + M(_G___, retval_us) \ + M(_G___, retval_i) \ + WHEN_64(M(_G___, retval_ui)) \ + WHEN_64(M(_G___, retval_l)) \ + M(_F___, retval_f) \ + M(_F___, retval_d) \ + \ + M(_____, breakpoint) \ + \ + M(_FF__, negr_f) \ + M(_FF__, negr_d) \ + M(_FF__, absr_f) \ + M(_FF__, absr_d) \ + M(_FF__, sqrtr_f) \ + M(_FF__, sqrtr_d) \ + \ + M(_GF__, truncr_f_i) \ + M(_FG__, extr_f) \ + M(_FG__, extr_d) \ + M(_FF__, extr_d_f) \ + M(_FF__, extr_f_d) \ + M(_FF__, movr_f) \ + M(_FF__, movr_d) \ + M(_Ff__, movi_f) \ + M(_Fd__, movi_d) \ + M(_GF__, truncr_d_i) \ + WHEN_64(M(_GF__, truncr_f_l)) \ + WHEN_64(M(_GF__, truncr_d_l)) \ + /* EOL */ + +#define DECLARE_INSTRUCTION(kind, stem) JIT_API JIT_PROTO_##kind(stem); +FOR_EACH_INSTRUCTION(DECLARE_INSTRUCTION) +#undef DECLARE_INSTRUCTION + +#if __WORDSIZE == 32 +# define jit_ldr(j,u,v) jit_ldr_i(j,u,v) +# define jit_ldi(j,u,v) jit_ldi_i(j,u,v) +# define jit_ldxr(j,u,v,w) jit_ldxr_i(j,u,v,w) +# define jit_ldxi(j,u,v,w) jit_ldxi_i(j,u,v,w) +# define jit_str(j,u,v) jit_str_i(j,u,v) +# define jit_sti(j,u,v) jit_sti_i(j,u,v) +# define jit_stxr(j,u,v,w) jit_stxr_i(j,u,v,w) +# define jit_stxi(j,u,v,w) jit_stxi_i(j,u,v,w) +# define jit_retval(j,u) jit_retval_i(j,u) +# define jit_bswapr(j,u,v) jit_bswapr_ui(j,u,v) +# define jit_truncr_d(j,u,v) jit_truncr_d_i(j,u,v) +# define jit_truncr_f(j,u,v) jit_truncr_f_i(j,u,v) +#else +# define jit_ldr(j,u,v) jit_ldr_l(j,u,v) +# define jit_ldi(j,u,v) jit_ldi_l(j,u,v) +# define jit_ldxr(j,u,v,w) jit_ldxr_l(j,u,v,w) +# define jit_ldxi(j,u,v,w) jit_ldxi_l(j,u,v,w) +# define jit_str(j,u,v) jit_str_l(j,u,v) +# define jit_sti(j,u,v) jit_sti_l(j,u,v) +# define jit_stxr(j,u,v,w) jit_stxr_l(j,u,v,w) +# define jit_stxi(j,u,v,w) jit_stxi_l(j,u,v,w) +# define jit_retval(j,u) jit_retval_l(j,u) +# define jit_bswapr(j,u,v) jit_bswapr_ul(j,u,v) +# define jit_truncr_d(j,u,v) jit_truncr_d_l(j,u,v) +# define jit_truncr_f(j,u,v) jit_truncr_f_l(j,u,v) +#endif + +void jit_begin_data(jit_state_t *, size_t max_size_or_zero); +void jit_end_data(jit_state_t *); +void jit_emit_u8(jit_state_t *, uint8_t); +void jit_emit_u16(jit_state_t *, uint16_t); +void jit_emit_u32(jit_state_t *, uint32_t); +void jit_emit_u64(jit_state_t *, uint64_t); +void jit_emit_ptr(jit_state_t *, void *); +jit_reloc_t jit_emit_addr(jit_state_t *); + +#endif /* _jit_h */ diff --git a/deps/lightening/lightening/aarch64-cpu.c b/deps/lightening/lightening/aarch64-cpu.c new file mode 100644 index 0000000..2094e35 --- /dev/null +++ b/deps/lightening/lightening/aarch64-cpu.c @@ -0,0 +1,2584 @@ +/* + * Copyright (C) 2013-2017, 2019 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#if __BYTE_ORDER != __LITTLE_ENDIAN +#error AArch64 requires little-endian host +#endif + +static int32_t +logical_immediate(jit_word_t imm) +{ + /* There are 5334 possible immediate values, but to avoid the + * need of either too complex code or large lookup tables, + * only check for (simply) encodable common/small values */ + switch (imm) { + case -16: return 0xf3b; + case -15: return 0xf3c; + case -13: return 0xf3d; + case -9: return 0xf3e; + case -8: return 0xf7c; + case -7: return 0xf7d; + case -5: return 0xf7e; + case -4: return 0xfbd; + case -3: return 0xfbe; + case -2: return 0xffe; + case 1: return 0x000; + case 2: return 0xfc0; + case 3: return 0x001; + case 4: return 0xf80; + case 6: return 0xfc1; + case 7: return 0x002; + case 8: return 0xf40; + case 12: return 0xf81; + case 14: return 0xfc2; + case 15: return 0x003; + case 16: return 0xf00; + default: return -1; + } +} + +static void +oxxx(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rn, int32_t Rm) +{ + uint32_t inst = Op; + inst = write_Rd_bitfield(inst, Rd); + inst = write_Rn_bitfield(inst, Rn); + inst = write_Rm_bitfield(inst, Rm); + emit_u32_with_pool(_jit, inst); +} + +static void +oxxi(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rn, int32_t Imm12) +{ + uint32_t inst = Op; + inst = write_Rd_bitfield(inst, Rd); + inst = write_Rn_bitfield(inst, Rn); + inst = write_imm12_bitfield(inst, Imm12); + emit_u32_with_pool(_jit, inst); +} + +static void +oxx9(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rn, int32_t Simm9) +{ + uint32_t inst = Op; + inst = write_Rd_bitfield(inst, Rd); + inst = write_Rn_bitfield(inst, Rn); + inst = write_simm9_bitfield(inst, Simm9); + emit_u32_with_pool(_jit, inst); +} + +static uint32_t +encode_ox19(int32_t Op, int32_t Rd) +{ + uint32_t inst = Op; + inst = write_Rd_bitfield(inst, Rd); + return inst; +} + +static uint32_t +encode_oc19(int32_t Op, int32_t Cc) +{ + uint32_t inst = Op; + inst = write_cond2_bitfield(inst, Cc); + return inst; +} + +static uint32_t +encode_o26(int32_t Op) +{ + uint32_t inst = Op; + return inst; +} + +static void +ox_x(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rm) +{ + uint32_t inst = Op; + inst = write_Rd_bitfield(inst, Rd); + inst = write_Rm_bitfield(inst, Rm); + emit_u32_with_pool(_jit, inst); +} + +static void +o_xx(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rn) +{ + uint32_t inst = Op; + inst = write_Rd_bitfield(inst, Rd); + inst = write_Rn_bitfield(inst, Rn); + emit_u32_with_pool(_jit, inst); +} + +static void +oxx_(jit_state_t *_jit, int32_t Op, int32_t Rn, int32_t Rm) +{ + uint32_t inst = Op; + inst = write_Rn_bitfield(inst, Rn); + inst = write_Rm_bitfield(inst, Rm); + emit_u32_with_pool(_jit, inst); +} + +static void +o_x_(jit_state_t *_jit, int32_t Op, int32_t Rn) +{ + uint32_t inst = Op; + inst = write_Rn_bitfield(inst, Rn); + emit_u32_with_pool(_jit, inst); +} + +static void +ox_h(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Imm16) +{ + uint32_t inst = Op; + inst = write_Rd_bitfield(inst, Rd); + inst = write_imm16_bitfield(inst, Imm16); + emit_u32_with_pool(_jit, inst); +} + +static void +oxxrs(jit_state_t *_jit, int32_t Op, + int32_t Rd, int32_t Rn, int32_t R, int32_t S) +{ + uint32_t inst = Op; + inst = write_Rd_bitfield(inst, Rd); + inst = write_Rn_bitfield(inst, Rn); + inst = write_immr_bitfield(inst, R); + inst = write_imms_bitfield(inst, S); + emit_u32_with_pool(_jit, inst); +} + +#define XZR_REGNO 0x1f +#define WZR_REGNO XZR_REGNO +#define LSL_12 0x00400000 +#define MOVI_LSL_16 0x00200000 +#define MOVI_LSL_32 0x00400000 +#define MOVI_LSL_48 0x00600000 +#define XS 0x80000000 /* Wn -> Xn */ +#define BCC_EQ 0x0 +#define BCC_NE 0x1 +#define BCC_CS 0x2 +#define BCC_HS BCC_CS +#define BCC_CC 0x3 +#define BCC_LO BCC_CC +#define BCC_MI 0x4 +#define BCC_PL 0x5 +#define BCC_VS 0x6 +#define BCC_VC 0x7 +#define BCC_HI 0x8 +#define BCC_LS 0x9 +#define BCC_GE 0xa +#define BCC_LT 0xb +#define BCC_GT 0xc +#define BCC_LE 0xd +#define BCC_AL 0xe +#define BCC_NV 0xf +/* adapted and cut down to only tested and required by lightening, + * from data in binutils/aarch64-tbl.h */ +#define A64_ADCS 0x3a000000 +#define A64_SBCS 0x7a000000 +#define A64_ADDI 0x11000000 +#define A64_ADDSI 0xb1000000 +#define A64_SUBI 0x51000000 +#define A64_SUBSI 0x71000000 +#define A64_ADD 0x0b000000 +#define A64_ADDS 0x2b000000 +#define A64_SUB 0x4b000000 +#define A64_NEG 0x4b0003e0 +#define A64_SUBS 0x6b000000 +#define A64_CMP 0x6b00001f +#define A64_SBFM 0x93400000 +#define A64_UBFM 0x53400000 +#define A64_UBFX 0x53000000 +#define A64_B 0x14000000 +#define A64_BL 0x94000000 +#define A64_BR 0xd61f0000 +#define A64_BLR 0xd63f0000 +#define A64_RET 0xd65f0000 +#define A64_CBZ 0x34000000 +#define A64_CBNZ 0x35000000 +#define A64_B_C 0x54000000 +#define A64_REV 0xdac00c00 +#define A64_UDIV 0x1ac00800 +#define A64_SDIV 0x1ac00c00 +#define A64_LSL 0x1ac02000 +#define A64_LSR 0x1ac02400 +#define A64_ASR 0x1ac02800 +#define A64_MUL 0x1b007c00 +#define A64_SMULH 0x9b407c00 +#define A64_UMULH 0x9bc07c00 +#define A64_LDAR 0xc8dffc00 +#define A64_STLR 0xc89ffc00 +#define A64_LDAXR 0xc85ffc00 +#define A64_STLXR 0xc800fc00 +#define A64_STRBI 0x39000000 +#define A64_LDRBI 0x39400000 +#define A64_LDRSBI 0x39800000 +#define A64_STRI 0xf9000000 +#define A64_LDRI 0xf9400000 +#define A64_LDRI_LITERAL 0x58000000 +#define A64_STRHI 0x79000000 +#define A64_LDRHI 0x79400000 +#define A64_LDRSHI 0x79800000 +#define A64_STRWI 0xb9000000 +#define A64_LDRWI 0xb9400000 +#define A64_LDRSWI 0xb9800000 +#define A64_STRB 0x38206800 +#define A64_LDRB 0x38606800 +#define A64_LDRSB 0x38e06800 +#define A64_STR 0xf8206800 +#define A64_LDR 0xf8606800 +#define A64_STRH 0x78206800 +#define A64_LDRH 0x78606800 +#define A64_LDRSH 0x78a06800 +#define A64_STRW 0xb8206800 +#define A64_LDRW 0xb8606800 +#define A64_LDRSW 0xb8a06800 +#define A64_STURB 0x38000000 +#define A64_LDURB 0x38400000 +#define A64_LDURSB 0x38800000 +#define A64_STUR 0xf8000000 +#define A64_LDUR 0xf8400000 +#define A64_STURH 0x78000000 +#define A64_LDURH 0x78400000 +#define A64_LDURSH 0x78800000 +#define A64_STURW 0xb8000000 +#define A64_LDURW 0xb8400000 +#define A64_LDURSW 0xb8800000 +#define A64_ANDI 0x12400000 +#define A64_ORRI 0x32400000 +#define A64_EORI 0x52400000 +#define A64_ANDSI 0x72000000 +#define A64_AND 0x0a000000 +#define A64_ORR 0x2a000000 +#define A64_MOV 0x2a0003e0 /* AKA orr Rd,xzr,Rm */ +#define A64_MVN 0x2a2003e0 +#define A64_UXTW 0x2a0003e0 /* AKA MOV */ +#define A64_EOR 0x4a000000 +#define A64_ANDS 0x6a000000 +#define A64_MOVN 0x12800000 +#define A64_MOVZ 0x52800000 +#define A64_MOVK 0x72800000 +#define A64_BRK 0xd4200000 + +static void +SBFM(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t ImmR, int32_t ImmS) +{ + return oxxrs(_jit, A64_SBFM|XS,Rd,Rn,ImmR,ImmS); +} + +static void +UBFM(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t ImmR, int32_t ImmS) +{ + return oxxrs(_jit, A64_UBFM|XS,Rd,Rn,ImmR,ImmS); +} + +static void +UBFX(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t ImmR, int32_t ImmS) +{ + return oxxrs(_jit, A64_UBFX,Rd,Rn,ImmR,ImmS); +} + +static void +CMP(jit_state_t *_jit, int32_t Rn, int32_t Rm) +{ + return oxx_(_jit, A64_CMP|XS,Rn,Rm); +} + +static void +CMPI(jit_state_t *_jit, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_SUBSI|XS,XZR_REGNO,Rn,Imm12); +} + +static void +CMPI_12(jit_state_t *_jit, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_SUBSI|XS|LSL_12,XZR_REGNO,Rn,Imm12); +} + +static void +CMNI(jit_state_t *_jit, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_ADDSI|XS,XZR_REGNO,Rn,Imm12); +} + +static void +CMNI_12(jit_state_t *_jit, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_ADDSI|XS|LSL_12,XZR_REGNO,Rn,Imm12); +} + +static void +TST(jit_state_t *_jit, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_ANDS|XS,XZR_REGNO,Rn,Rm); +} + +/* actually should use oxxrs but logical_immediate returns proper encoding */ +static void +TSTI(jit_state_t *_jit, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_ANDSI,XZR_REGNO,Rn,Imm12); +} + +static void +MOV(jit_state_t *_jit, int32_t Rd, int32_t Rm) +{ + return ox_x(_jit, A64_MOV|XS,Rd,Rm); +} + +static void +MVN(jit_state_t *_jit, int32_t Rd, int32_t Rm) +{ + return ox_x(_jit, A64_MVN|XS,Rd,Rm); +} + +static void +NEG(jit_state_t *_jit, int32_t Rd, int32_t Rm) +{ + return ox_x(_jit, A64_NEG|XS,Rd,Rm); +} + +static void +MOVN(jit_state_t *_jit, int32_t Rd, int32_t Imm16) +{ + return ox_h(_jit, A64_MOVN|XS,Rd,Imm16); +} + +static void +MOVN_16(jit_state_t *_jit, int32_t Rd, int32_t Imm16) +{ + return ox_h(_jit, A64_MOVN|XS|MOVI_LSL_16,Rd,Imm16); +} + +static void +MOVN_32(jit_state_t *_jit, int32_t Rd, int32_t Imm16) +{ + return ox_h(_jit, A64_MOVN|XS|MOVI_LSL_32,Rd,Imm16); +} + +static void +MOVN_48(jit_state_t *_jit, int32_t Rd, int32_t Imm16) +{ + return ox_h(_jit, A64_MOVN|XS|MOVI_LSL_48,Rd,Imm16); +} + +static void +MOVZ(jit_state_t *_jit, int32_t Rd, int32_t Imm16) +{ + return ox_h(_jit, A64_MOVZ|XS,Rd,Imm16); +} + +static void +MOVZ_16(jit_state_t *_jit, int32_t Rd, int32_t Imm16) +{ + return ox_h(_jit, A64_MOVZ|XS|MOVI_LSL_16,Rd,Imm16); +} + +static void +MOVZ_32(jit_state_t *_jit, int32_t Rd, int32_t Imm16) +{ + return ox_h(_jit, A64_MOVZ|XS|MOVI_LSL_32,Rd,Imm16); +} + +static void +MOVZ_48(jit_state_t *_jit, int32_t Rd, int32_t Imm16) +{ + return ox_h(_jit, A64_MOVZ|XS|MOVI_LSL_48,Rd,Imm16); +} + +static void +MOVK_16(jit_state_t *_jit, int32_t Rd, int32_t Imm16) +{ + return ox_h(_jit, A64_MOVK|XS|MOVI_LSL_16,Rd,Imm16); +} + +static void +MOVK_32(jit_state_t *_jit, int32_t Rd, int32_t Imm16) +{ + return ox_h(_jit, A64_MOVK|XS|MOVI_LSL_32,Rd,Imm16); +} + +static void +MOVK_48(jit_state_t *_jit, int32_t Rd, int32_t Imm16) +{ + return ox_h(_jit, A64_MOVK|XS|MOVI_LSL_48,Rd,Imm16); +} + +static void +ADD(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_ADD|XS,Rd,Rn,Rm); +} + +static void +ADDI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_ADDI|XS,Rd,Rn,Imm12); +} + +static void +ADDI_12(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_ADDI|XS|LSL_12,Rd,Rn,Imm12); +} + +static void +ADDS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_ADDS|XS,Rd,Rn,Rm); +} + +static void +ADDSI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_ADDSI|XS,Rd,Rn,Imm12); +} + +static void +ADDSI_12(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_ADDSI|XS|LSL_12,Rd,Rn,Imm12); +} + +static void +ADCS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_ADCS|XS,Rd,Rn,Rm); +} + +static void +SUB(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_SUB|XS,Rd,Rn,Rm); +} + +static void +SUBI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_SUBI|XS,Rd,Rn,Imm12); +} + +static void +SUBI_12(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_SUBI|XS|LSL_12,Rd,Rn,Imm12); +} + +static void +SUBS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_SUBS|XS,Rd,Rn,Rm); +} + +static void +SUBSI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_SUBSI|XS,Rd,Rn,Imm12); +} + +static void +SUBSI_12(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_SUBSI|XS|LSL_12,Rd,Rn,Imm12); +} + +static void +SBCS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_SBCS|XS,Rd,Rn,Rm); +} + +static void +MUL(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_MUL|XS,Rd,Rn,Rm); +} + +static void +SMULH(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_SMULH,Rd,Rn,Rm); +} + +static void +UMULH(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_UMULH,Rd,Rn,Rm); +} + +static void +SDIV(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_SDIV|XS,Rd,Rn,Rm); +} + +static void +UDIV(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_UDIV|XS,Rd,Rn,Rm); +} + +static void +LSL(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_LSL|XS,Rd,Rn,Rm); +} + +static void +LSLI(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + return UBFM(_jit, r0,r1,(64-i0)&63,63-i0); +} + +static void +ASR(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_ASR|XS,Rd,Rn,Rm); +} + +static void +ASRI(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + return SBFM(_jit, r0,r1,i0,63); +} + +static void +LSR(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_LSR|XS,Rd,Rn,Rm); +} + +static void +LSRI(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + return UBFM(_jit, r0,r1,i0,63); +} + +static void +AND(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_AND|XS,Rd,Rn,Rm); +} + +/* actually should use oxxrs but logical_immediate returns proper encoding */; +static void +ANDI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_ANDI|XS,Rd,Rn,Imm12); +} + +static void +ORR(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_ORR|XS,Rd,Rn,Rm); +} + +/* actually should use oxxrs but logical_immediate returns proper encoding */ +static void +ORRI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_ORRI|XS,Rd,Rn,Imm12); +} + +static void +EOR(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_EOR|XS,Rd,Rn,Rm); +} + +/* actually should use oxxrs but logical_immediate returns proper encoding */ +static void +EORI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_EORI|XS,Rd,Rn,Imm12); +} + +static void +SXTB(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + return SBFM(_jit, Rd,Rn,0,7); +} + +static void +SXTH(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + return SBFM(_jit, Rd,Rn,0,15); +} + +static void +SXTW(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + return SBFM(_jit, Rd,Rn,0,31); +} + +static void +UXTB(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + return UBFX(_jit, Rd,Rn,0,7); +} + +static void +UXTH(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + return UBFX(_jit, Rd,Rn,0,15); +} + +static void +UXTW(jit_state_t *_jit, int32_t Rd, int32_t Rm) +{ + return ox_x(_jit, A64_UXTW,Rd,Rm); +} + +static void +REV(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + return o_xx(_jit, A64_REV,Rd,Rn); +} + +static void +LDAR(jit_state_t *_jit, int32_t Rt, int32_t Rn) +{ + return o_xx(_jit, A64_LDAR, Rt, Rn); +} + +static void +STLR(jit_state_t *_jit, int32_t Rt, int32_t Rn) +{ + return o_xx(_jit, A64_STLR, Rt, Rn); +} + +static void +LDAXR(jit_state_t *_jit, int32_t Rt, int32_t Rn) +{ + return o_xx(_jit, A64_LDAXR, Rt, Rn); +} + +static void +STLXR(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_STLXR, Rt, Rn, Rm); +} + +static void +LDRSB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_LDRSB,Rt,Rn,Rm); +} + +static void +LDRSBI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_LDRSBI,Rt,Rn,Imm12); +} + +static void +LDURSB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) +{ + return oxx9(_jit, A64_LDURSB,Rt,Rn,Imm9); +} + +static void +LDRB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_LDRB,Rt,Rn,Rm); +} + +static void +LDRBI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_LDRBI,Rt,Rn,Imm12); +} + +static void +LDURB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) +{ + return oxx9(_jit, A64_LDURB,Rt,Rn,Imm9); +} + +static void +LDRSH(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_LDRSH,Rt,Rn,Rm); +} + +static void +LDRSHI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_LDRSHI,Rt,Rn,Imm12); +} + +static void +LDURSH(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) +{ + return oxx9(_jit, A64_LDURSH,Rt,Rn,Imm9); +} + +static void +LDRH(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_LDRH,Rt,Rn,Rm); +} + +static void +LDRHI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_LDRHI,Rt,Rn,Imm12); +} + +static void +LDURH(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) +{ + return oxx9(_jit, A64_LDURH,Rt,Rn,Imm9); +} + +static void +LDRSW(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_LDRSW,Rt,Rn,Rm); +} + +static void +LDRSWI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_LDRSWI,Rt,Rn,Imm12); +} + +static void +LDURSW(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) +{ + return oxx9(_jit, A64_LDURSW,Rt,Rn,Imm9); +} + +static void +LDRW(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_LDRW,Rt,Rn,Rm); +} + +static void +LDRWI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_LDRWI,Rt,Rn,Imm12); +} + +static void +LDURW(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) +{ + return oxx9(_jit, A64_LDURW,Rt,Rn,Imm9); +} + +static void +LDR(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_LDR,Rt,Rn,Rm); +} + +static void +LDRI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_LDRI,Rt,Rn,Imm12); +} + +static void +LDUR(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) +{ + return oxx9(_jit, A64_LDUR,Rt,Rn,Imm9); +} + +static void +STRB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_STRB,Rt,Rn,Rm); +} + +static void +STRBI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_STRBI,Rt,Rn,Imm12); +} + +static void +STURB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) +{ + return oxx9(_jit, A64_STURB,Rt,Rn,Imm9); +} + +static void +STRH(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_STRH,Rt,Rn,Rm); +} + +static void +STRHI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_STRHI,Rt,Rn,Imm12); +} + +static void +STURH(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) +{ + return oxx9(_jit, A64_STURH,Rt,Rn,Imm9); +} + +static void +STRW(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_STRW,Rt,Rn,Rm); +} + +static void +STRWI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_STRWI,Rt,Rn,Imm12); +} + +static void +STURW(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) +{ + return oxx9(_jit, A64_STURW,Rt,Rn,Imm9); +} + +static void +STR(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) +{ + return oxxx(_jit, A64_STR,Rt,Rn,Rm); +} + +static void +STRI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) +{ + return oxxi(_jit, A64_STRI,Rt,Rn,Imm12); +} + +static void +STUR(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) +{ + return oxx9(_jit, A64_STUR,Rt,Rn,Imm9); +} + +static jit_reloc_t +B(jit_state_t *_jit) +{ + return emit_jmp(_jit, encode_o26(A64_B)); +} + +static jit_reloc_t +BL(jit_state_t *_jit) +{ + return emit_jmp(_jit, encode_o26(A64_BL)); +} + +static void +BR(jit_state_t *_jit, int32_t Rn) +{ + return o_x_(_jit, A64_BR,Rn); +} + +static void +BLR(jit_state_t *_jit, int32_t Rn) +{ + return o_x_(_jit, A64_BLR,Rn); +} + +static void +RET(jit_state_t *_jit) +{ + return o_x_(_jit, A64_RET,jit_gpr_regno(_LR)); +} + +static jit_reloc_t +B_C(jit_state_t *_jit, int32_t Cc) +{ + return emit_jcc(_jit, encode_oc19(A64_B_C, Cc)); +} + +static jit_reloc_t +CBZ(jit_state_t *_jit, int32_t Rd) +{ + return emit_jcc(_jit, encode_ox19(A64_CBZ|XS,Rd)); +} + +static jit_reloc_t +CBNZ(jit_state_t *_jit, int32_t Rd) +{ + return emit_jcc(_jit, encode_ox19(A64_CBNZ|XS,Rd)); +} + +static void +NOP(jit_state_t *_jit) +{ + return emit_u32_with_pool(_jit, 0xd503201f); +} + +static void +BRK(jit_state_t *_jit) +{ + emit_u32_with_pool(_jit, A64_BRK); +} + +static jit_reloc_t +movi_from_pool(jit_state_t *_jit, int32_t Rt) +{ + return emit_load_from_pool(_jit, encode_ox19(A64_LDRI_LITERAL, Rt)); +} + +static void +emit_veneer(jit_state_t *_jit, jit_pointer_t target) +{ + jit_gpr_t tmp = get_temp_gpr(_jit); + uint32_t ldr = encode_ox19(A64_LDRI_LITERAL, jit_gpr_regno(tmp)); + uint32_t br = write_Rn_bitfield(A64_BR, jit_gpr_regno(tmp)); + uint32_t *loc = _jit->pc.ui; + emit_u32(_jit, ldr); + emit_u32(_jit, br); + unget_temp_gpr(_jit); + if (_jit->overflow) + return; + // Patch load to here, divided by 4. + patch_load_from_pool_offset(loc, _jit->pc.ui - loc); + emit_u64(_jit, (uint64_t) target); +} + +static void +movr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 != r1) { + // Stack pointer requires special handling + if (r1 == jit_gpr_regno(_X31) || r0 == jit_gpr_regno(_X31)) + ADDI(_jit, r0, r1, 0); + else + MOV(_jit, r0, r1); + } +} + +static void +addr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return ADD(_jit,r0,r1,r2); +} + +static void +addcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return ADDS(_jit,r0,r1,r2); +} + +static void +addxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return ADCS(_jit,r0,r1,r2); +} + +static void +subr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return SUB(_jit,r0,r1,r2); +} + +static void +subcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return SUBS(_jit,r0,r1,r2); +} + +static void +subxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return SBCS(_jit,r0,r1,r2); +} + +static void +mulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return MUL(_jit,r0,r1,r2); +} + +static void +divr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return SDIV(_jit,r0,r1,r2); +} + +static void +divr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return UDIV(_jit,r0,r1,r2); +} + +static void +iqdivr(jit_state_t *_jit, jit_bool_t sign, + int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + int32_t rg0, rg1; + if (r0 == r2 || r0 == r3) { + rg0 = jit_gpr_regno(get_temp_gpr(_jit)); + } else { + rg0 = r0; + } + if (r1 == r2 || r1 == r3) { + rg1 = jit_gpr_regno(get_temp_gpr(_jit)); + } else { + rg1 = r1; + } + if (sign) + divr(_jit, rg0, r2, r3); + else + divr_u(_jit, rg0, r2, r3); + mulr(_jit, rg1, r3, rg0); + subr(_jit, rg1, r2, rg1); + if (rg0 != r0) { + movr(_jit, r0, rg0); + unget_temp_gpr(_jit); + } + if (rg1 != r1) { + movr(_jit, r1, rg1); + unget_temp_gpr(_jit); + } +} + +static void +qdivr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + return iqdivr(_jit,1,r0,r1,r2,r3); +} + +static void +qdivr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + return iqdivr(_jit,0,r0,r1,r2,r3); +} + +static void +lshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return LSL(_jit,r0,r1,r2); +} + +static void +rshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return ASR(_jit,r0,r1,r2); +} + +static void +rshr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return LSR(_jit,r0,r1,r2); +} + +static void +negr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return NEG(_jit,r0,r1); +} + +static void +comr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return MVN(_jit,r0,r1); +} + +static void +andr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return AND(_jit,r0,r1,r2); +} + +static void +orr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return ORR(_jit,r0,r1,r2); +} + +static void +xorr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return EOR(_jit,r0,r1,r2); +} + +static void +ldr_c(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return LDRSBI(_jit,r0,r1,0); +} + +static void +ldr_s(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return LDRSHI(_jit,r0,r1,0); +} + +static void +ldr_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return LDRSWI(_jit,r0,r1,0); +} + +static void +ldxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return LDRSH(_jit,r0,r1,r2); +} + +static void +ldxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return LDRSW(_jit,r0,r1,r2); +} + +static void +ldxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return LDR(_jit,r0,r1,r2); +} + +static void +str_c(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return STRBI(_jit,r1,r0,0); +} + +static void +str_s(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return STRHI(_jit,r1,r0,0); +} + +static void +str_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return STRWI(_jit,r1,r0,0); +} + +static void +str_l(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return STRI(_jit,r1,r0,0); +} + +static void +stxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return STRB(_jit,r2,r1,r0); +} + +static void +stxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return STRH(_jit,r2,r1,r0); +} + +static void +stxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return STRW(_jit,r2,r1,r0); +} + +static void +stxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return STR(_jit,r2,r1,r0); +} + +static void +bswapr_ul(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return REV(_jit,r0,r1); +} + +static void +extr_c(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return SXTB(_jit,r0,r1); +} + +static void +extr_uc(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return UXTB(_jit,r0,r1); +} + +static void +extr_s(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return SXTH(_jit,r0,r1); +} + +static void +extr_us(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return UXTH(_jit,r0,r1); +} + +static void +extr_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return SXTW(_jit,r0,r1); +} + +static void +extr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return UXTW(_jit,r0,r1); +} + +static void +movi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + jit_word_t n0 = ~i0, ibit = 0, nbit = 0; + if (i0 & 0x000000000000ffffL) ibit |= 1; + if (i0 & 0x00000000ffff0000L) ibit |= 2; + if (i0 & 0x0000ffff00000000L) ibit |= 4; + if (i0 & 0xffff000000000000L) ibit |= 8; + if (n0 & 0x000000000000ffffL) nbit |= 1; + if (n0 & 0x00000000ffff0000L) nbit |= 2; + if (n0 & 0x0000ffff00000000L) nbit |= 4; + if (n0 & 0xffff000000000000L) nbit |= 8; + switch (ibit) { + case 0: + MOVZ (_jit, r0, 0); + break; + case 1: + MOVZ (_jit, r0, i0 & 0xffff); + break; + case 2: + MOVZ_16(_jit, r0, (i0 >> 16) & 0xffff); + break; + case 3: + MOVZ (_jit, r0, i0 & 0xffff); + MOVK_16(_jit, r0, (i0 >> 16) & 0xffff); + break; + case 4: + MOVZ_32(_jit, r0, (i0 >> 32) & 0xffff); + break; + case 5: + MOVZ (_jit, r0, i0 & 0xffff); + MOVK_32(_jit, r0, (i0 >> 32) & 0xffff); + break; + case 6: + MOVZ_16(_jit, r0, (i0 >> 16) & 0xffff); + MOVK_32(_jit, r0, (i0 >> 32) & 0xffff); + break; + case 7: + if (nbit == 8) { + MOVN_48(_jit, r0, (n0 >> 48) & 0xffff); + } else { + MOVZ (_jit, r0, i0 & 0xffff); + MOVK_16(_jit, r0, (i0 >> 16) & 0xffff); + MOVK_32(_jit, r0, (i0 >> 32) & 0xffff); + } + break; + case 8: + MOVZ_48(_jit, r0, (i0 >> 48) & 0xffff); + break; + case 9: + MOVZ (_jit, r0, i0 & 0xffff); + MOVK_48(_jit, r0, (i0 >> 48) & 0xffff); + break; + case 10: + MOVZ_16(_jit, r0, (i0 >> 16) & 0xffff); + MOVK_48(_jit, r0, (i0 >> 48) & 0xffff); + break; + case 11: + if (nbit == 4) { + MOVN_32(_jit, r0, (n0 >> 32) & 0xffff); + } else { + MOVZ (_jit, r0, i0 & 0xffff); + MOVK_16(_jit, r0, (i0 >> 16) & 0xffff); + MOVK_48(_jit, r0, (i0 >> 48) & 0xffff); + } + break; + case 12: + MOVZ_32(_jit, r0, (i0 >> 32) & 0xffff); + MOVK_48(_jit, r0, (i0 >> 48) & 0xffff); + break; + case 13: + if (nbit == 2) { + MOVN_16(_jit, r0, (n0 >> 16) & 0xffff); + } else { + MOVZ (_jit, r0, i0 & 0xffff); + MOVK_32(_jit, r0, (i0 >> 32) & 0xffff); + MOVK_48(_jit, r0, (i0 >> 48) & 0xffff); + } + break; + case 14: + if (nbit == 1) { + MOVN (_jit, r0, (n0) & 0xffff); + } else { + MOVZ_16(_jit, r0, (i0 >> 16) & 0xffff); + MOVK_32(_jit, r0, (i0 >> 32) & 0xffff); + MOVK_48(_jit, r0, (i0 >> 48) & 0xffff); + } + break; + case 15: + if (nbit == 0) { + MOVN (_jit, r0, 0); + } else if (nbit == 1) { + MOVN (_jit, r0, n0 & 0xffff); + } else if (nbit == 8) { + MOVN_48(_jit, r0, (n0 >> 48) & 0xffff); + } else { + MOVZ (_jit, r0, i0 & 0xffff); + MOVK_16(_jit, r0, (i0 >> 16) & 0xffff); + MOVK_32(_jit, r0, (i0 >> 32) & 0xffff); + MOVK_48(_jit, r0, (i0 >> 48) & 0xffff); + } + break; + default: + abort(); + } +} + +static jit_reloc_t +bccr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1) +{ + CMP(_jit, r0, r1); + return B_C(_jit, cc); +} + +static jit_reloc_t +bcci(jit_state_t *_jit, int32_t cc, int32_t r0, jit_word_t i1) +{ + jit_word_t is = i1 >> 12; + jit_word_t in = -i1; + jit_word_t iS = in >> 12; + if ( i1 >= 0 && i1 <= 0xfff) { + CMPI (_jit, r0, i1); + } else if ((is << 12) == i1 && is >= 0 && is <= 0xfff) { + CMPI_12(_jit, r0, is); + } else if ( in >= 0 && in <= 0xfff) { + CMNI (_jit, r0, in); + } else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff) { + CMNI_12(_jit, r0, iS); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i1); + CMP(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } + return B_C(_jit, cc); +} + +static jit_reloc_t +bltr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit,BCC_LT,r0,r1); +} + +static jit_reloc_t +blti(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return bcci(_jit,BCC_LT,r0,i1); +} + +static jit_reloc_t +bltr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit,BCC_CC,r0,r1); +} + +static jit_reloc_t +blti_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return bcci(_jit,BCC_CC,r0,i1); +} + +static jit_reloc_t +bler(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit,BCC_LE,r0,r1); +} + +static jit_reloc_t +blei(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return bcci(_jit,BCC_LE,r0,i1); +} + +static jit_reloc_t +bler_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit,BCC_LS,r0,r1); +} + +static jit_reloc_t +blei_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return bcci(_jit,BCC_LS,r0,i1); +} + +static jit_reloc_t +beqr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit,BCC_EQ,r0,r1); +} + +static jit_reloc_t +bger(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit,BCC_GE,r0,r1); +} + +static jit_reloc_t +bgei(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return bcci(_jit,BCC_GE,r0,i1); +} + +static jit_reloc_t +bger_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit,BCC_CS,r0,r1); +} + +static jit_reloc_t +bgei_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return bcci(_jit,BCC_CS,r0,i1); +} + +static jit_reloc_t +bgtr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit,BCC_GT,r0,r1); +} + +static jit_reloc_t +bgti(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return bcci(_jit,BCC_GT,r0,i1); +} + +static jit_reloc_t +bgtr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit,BCC_HI,r0,r1); +} + +static jit_reloc_t +bgti_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return bcci(_jit,BCC_HI,r0,i1); +} + +static jit_reloc_t +bner(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit,BCC_NE,r0,r1); +} + +static void +addi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_word_t is = i0 >> 12; + jit_word_t in = -i0; + jit_word_t iS = in >> 12; + if ( i0 >= 0 && i0 <= 0xfff) { + ADDI (_jit, r0, r1, i0); + } else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) { + ADDI_12(_jit, r0, r1, is); + } else if ( in >= 0 && in <= 0xfff) { + SUBI (_jit, r0, r1, in); + } else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff) { + SUBI_12(_jit, r0, r1, iS); + } else { + int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0; + movi(_jit, r2, i0); + addr(_jit, r0, r1, r2); + if (r0 == r1) + unget_temp_gpr(_jit); + } +} + +static void +addci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_word_t is = i0 >> 12; + jit_word_t in = -i0; + jit_word_t iS = in >> 12; + if ( i0 >= 0 && i0 <= 0xfff) { + ADDSI (_jit, r0, r1, i0); + } else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) { + ADDSI_12(_jit, r0, r1, is); + } else if ( in >= 0 && in <= 0xfff) { + SUBSI (_jit, r0, r1, in); + } else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff) { + SUBSI_12(_jit, r0, r1, iS); + } else { + int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0; + movi(_jit, r2, i0); + addcr(_jit, r0, r1, r2); + if (r0 == r1) + unget_temp_gpr(_jit); + } +} + +static void +addxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0; + movi(_jit, r2, i0); + addxr(_jit, r0, r1, r2); + if (r0 == r1) + unget_temp_gpr(_jit); +} + +static void +subi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_word_t is = i0 >> 12; + if ( i0 >= 0 && i0 <= 0xfff) { + SUBI (_jit, r0, r1, i0); + } else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) { + SUBI_12(_jit, r0, r1, is); + } else { + int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0; + movi(_jit, r2, i0); + subr(_jit, r0, r1, r2); + if (r0 == r1) + unget_temp_gpr(_jit); + } +} + +static void +subci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_word_t is = i0 >> 12; + if ( i0 >= 0 && i0 <= 0xfff) { + SUBSI (_jit, r0, r1, i0); + } else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) { + SUBSI_12(_jit, r0, r1, is); + } else { + int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0; + movi(_jit, r2, i0); + subcr(_jit, r0, r1, r2); + if (r0 == r1) + unget_temp_gpr(_jit); + } +} + +static void +subxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0; + movi(_jit, r2, i0); + subxr(_jit, r0, r1, r2); + if (r0 == r1) + unget_temp_gpr(_jit); +} + +static jit_reloc_t +baddr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1) +{ + addcr(_jit, r0, r0, r1); + return B_C(_jit, cc); +} + +static jit_reloc_t +baddi(jit_state_t *_jit, int32_t cc, int32_t r0, jit_word_t i1) +{ + addci(_jit, r0, r0, i1); + return B_C(_jit, cc); +} + +static jit_reloc_t +boaddr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return baddr(_jit,BCC_VS,r0,r1); +} + +static jit_reloc_t +boaddi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return baddi(_jit,BCC_VS,r0,i1); +} + +static jit_reloc_t +boaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return baddr(_jit,BCC_HS,r0,r1); +} + +static jit_reloc_t +boaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return baddi(_jit,BCC_HS,r0,i1); +} + +static jit_reloc_t +bxaddr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return baddr(_jit,BCC_VC,r0,r1); +} + +static jit_reloc_t +bxaddi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return baddi(_jit,BCC_VC,r0,i1); +} + +static jit_reloc_t +bxaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return baddr(_jit,BCC_LO,r0,r1); +} + +static jit_reloc_t +bxaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return baddi(_jit,BCC_LO,r0,i1); +} + +static jit_reloc_t +bsubr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1) +{ + subcr(_jit, r0, r0, r1); + return B_C(_jit, cc); +} + +static jit_reloc_t +bsubi(jit_state_t *_jit, int32_t cc, int32_t r0, jit_word_t i1) +{ + subci(_jit, r0, r0, i1); + return B_C(_jit, cc); +} + +static jit_reloc_t +bosubr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bsubr(_jit,BCC_VS,r0,r1); +} + +static jit_reloc_t +bosubi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return bsubi(_jit,BCC_VS,r0,i1); +} + +static jit_reloc_t +bosubr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bsubr(_jit,BCC_LO,r0,r1); +} + +static jit_reloc_t +bosubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return bsubi(_jit,BCC_LO,r0,i1); +} + +static jit_reloc_t +bxsubr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bsubr(_jit,BCC_VC,r0,r1); +} + +static jit_reloc_t +bxsubi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return bsubi(_jit,BCC_VC,r0,i1); +} + +static jit_reloc_t +bxsubr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bsubr(_jit,BCC_HS,r0,r1); +} + +static jit_reloc_t +bxsubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return bsubi(_jit,BCC_HS,r0,i1); +} + +static jit_reloc_t +bmxr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1) +{ + TST(_jit, r0, r1); + return B_C(_jit, cc); +} + +static jit_reloc_t +bmxi(jit_state_t *_jit, int32_t cc, int32_t r0, jit_word_t i1) +{ + int32_t imm; + imm = logical_immediate(i1); + if (imm != -1) { + TSTI(_jit, r0, imm); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i1); + TST(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } + return B_C(_jit, cc); +} + +static jit_reloc_t +bmsr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bmxr(_jit,BCC_NE,r0,r1); +} + +static jit_reloc_t +bmsi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return bmxi(_jit,BCC_NE,r0,i1); +} + +static jit_reloc_t +bmcr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bmxr(_jit,BCC_EQ,r0,r1); +} + +static jit_reloc_t +bmci(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return bmxi(_jit,BCC_EQ,r0,i1); +} + +static void +jmpr(jit_state_t *_jit, int32_t r0) +{ + return BR(_jit, r0); +} + +static void +callr(jit_state_t *_jit, int32_t r0) +{ + return BLR(_jit,r0); +} + +static void +nop(jit_state_t *_jit, int32_t i0) +{ + for (; i0 > 0; i0 -= 4) + NOP(_jit); + ASSERT(i0 == 0); +} + +static void +muli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0; + movi(_jit, r2, i0); + mulr(_jit, r0, r1, r2); + if (r0 == r1) + unget_temp_gpr(_jit); +} + +static void +qmulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + jit_gpr_t reg; + if (r0 == r2 || r0 == r3) { + reg = get_temp_gpr(_jit); + mulr(_jit, jit_gpr_regno(reg), r2, r3); + } else { + mulr(_jit, r0, r2, r3); + } + SMULH(_jit, r1, r2, r3); + if (r0 == r2 || r0 == r3) { + movr(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +qmuli(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + qmulr(_jit, r0, r1, r2, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +qmulr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + jit_gpr_t reg; + if (r0 == r2 || r0 == r3) { + reg = get_temp_gpr(_jit); + mulr(_jit, jit_gpr_regno(reg), r2, r3); + } else { + mulr(_jit, r0, r2, r3); + } + UMULH(_jit, r1, r2, r3); + if (r0 == r2 || r0 == r3) { + movr(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +qmuli_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + qmulr_u(_jit, r0, r1, r2, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +divi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0; + movi(_jit, r2, i0); + divr(_jit, r0, r1, r2); + if (r0 == r1) + unget_temp_gpr(_jit); +} + +static void +divi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0; + movi(_jit, r2, i0); + divr_u(_jit, r0, r1, r2); + if (r0 == r1) + unget_temp_gpr(_jit); +} + +static void +qdivi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + qdivr(_jit, r0, r1, r2, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +qdivi_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + qdivr_u(_jit, r0, r1, r2, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +remr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1 || r0 == r2) { + jit_gpr_t reg = get_temp_gpr(_jit); + divr(_jit, jit_gpr_regno(reg), r1, r2); + mulr(_jit, jit_gpr_regno(reg), r2, jit_gpr_regno(reg)); + subr(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } else { + divr(_jit, r0, r1, r2); + mulr(_jit, r0, r2, r0); + subr(_jit, r0, r1, r0); + } +} + +static void +remi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + remr(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +remr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1 || r0 == r2) { + jit_gpr_t reg = get_temp_gpr(_jit); + divr_u(_jit, jit_gpr_regno(reg), r1, r2); + mulr(_jit, jit_gpr_regno(reg), r2, jit_gpr_regno(reg)); + subr(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } else { + divr_u(_jit, r0, r1, r2); + mulr(_jit, r0, r2, r0); + subr(_jit, r0, r1, r0); + } +} + +static void +remi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + remr_u(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +lshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) { + movr(_jit, r0, r1); + } else { + ASSERT(i0 > 0 && i0 < 64); + LSLI(_jit, r0, r1, i0); + } +} + +static void +rshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) { + movr(_jit, r0, r1); + } else { + ASSERT(i0 > 0 && i0 < 64); + ASRI(_jit, r0, r1, i0); + } +} + +static void +rshi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) { + movr(_jit, r0, r1); + } else { + ASSERT(i0 > 0 && i0 < 64); + LSRI(_jit, r0, r1, i0); + } +} + +static void +andi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t imm; + if (i0 == 0) { + movi(_jit, r0, 0); + } else if (i0 == -1){ + movr(_jit, r0, r1); + } else { + imm = logical_immediate(i0); + if (imm != -1) { + ANDI(_jit, r0, r1, imm); + } else { + int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0; + movi(_jit, r2, i0); + andr(_jit, r0, r1, r2); + if (r0 == r1) + unget_temp_gpr(_jit); + } + } +} + +static void +ori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t imm; + if (i0 == 0) { + movr(_jit, r0, r1); + } else if (i0 == -1) { + movi(_jit, r0, -1); + } else { + imm = logical_immediate(i0); + if (imm != -1) { + ORRI(_jit, r0, r1, imm); + } else { + int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0; + movi(_jit, r2, i0); + orr(_jit, r0, r1, r2); + if (r0 == r1) + unget_temp_gpr(_jit); + } + } +} + +static void +xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t imm; + if (i0 == 0) { + movr(_jit, r0, r1); + } else if (i0 == -1) { + comr(_jit, r0, r1); + } else { + imm = logical_immediate(i0); + if (imm != -1) { + EORI(_jit, r0, r1, imm); + } else { + int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0; + movi(_jit, r2, i0); + xorr(_jit, r0, r1, r2); + if (r0 == r1) + unget_temp_gpr(_jit); + } + } +} + +static void +bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + bswapr_ul(_jit, r0, r1); + rshi_u(_jit, r0, r0, 48); +} + +static void +bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + bswapr_ul(_jit, r0, r1); + rshi_u(_jit, r0, r0, 32); +} + +static void +ldi_c(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + movi(_jit, r0, i0); + ldr_c(_jit, r0, r0); +} + +static void +ldr_uc(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + LDRBI(_jit, r0, r1, 0); +#if 0 + extr_uc(_jit, r0, r0); +#endif +} + +static void +ldi_uc(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + movi(_jit, r0, i0); + ldr_uc(_jit, r0, r0); +} + +static void +ldi_s(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + movi(_jit, r0, i0); + ldr_s(_jit, r0, r0); +} + +static void +ldr_us(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + LDRHI(_jit, r0, r1, 0); +#if 0 + extr_us(_jit, r0, r0); +#endif +} + +static void +ldi_us(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + movi(_jit, r0, i0); + ldr_us(_jit, r0, r0); +} + +static void +ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + movi(_jit, r0, i0); + ldr_i(_jit, r0, r0); +} + +static void +ldr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + LDRWI(_jit, r0, r1, 0); +#if 0 + extr_ui(_jit, r0, r0); +#endif +} + +static void +ldi_ui(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + movi(_jit, r0, i0); + ldr_ui(_jit, r0, r0); +} + +static void +ldr_l(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + LDRI(_jit, r0, r1, 0); +} + +static void +ldi_l(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + movi(_jit, r0, i0); + ldr_l(_jit, r0, r0); +} + +static void +ldxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + LDRSB(_jit, r0, r1, r2); + extr_c(_jit, r0, r0); +} + +static void +ldxi_c(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 >= 0 && i0 <= 4095) { + LDRSBI(_jit, r0, r1, i0); + } else if (i0 > -256 && i0 < 0) { + LDURSB(_jit, r0, r1, i0); + } else { + int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0; + movi(_jit, r2, i0); + LDRSB(_jit, r0, r1, r2); + if (r0 == r1) + unget_temp_gpr(_jit); + } + extr_c(_jit, r0, r0); +} + +static void +ldxr_uc(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + LDRB(_jit, r0, r1, r2); +#if 0 + extr_uc(_jit, r0, r0); +#endif +} + +static void +ldxi_uc(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 >= 0 && i0 <= 4095) { + LDRBI(_jit, r0, r1, i0); + } else if (i0 > -256 && i0 < 0) { + LDURB(_jit, r0, r1, i0); + } else { + int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0; + addi(_jit, r2, r1, i0); + ldr_uc(_jit, r0, r2); + if (r0 == r1) + unget_temp_gpr(_jit); + } +#if 0 + extr_uc(_jit, r0, r0); +#endif +} + +static void +ldxi_s(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + ASSERT(!(i0 & 1)); + if (i0 >= 0 && i0 <= 8191) { + LDRSHI(_jit, r0, r1, i0 >> 1); + } else if (i0 > -256 && i0 < 0) { + LDURSH(_jit, r0, r1, i0); + } else { + int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0; + movi(_jit, r2, i0); + LDRSH(_jit, r0, r1, r2); + if (r0 == r1) + unget_temp_gpr(_jit); + } +} + +static void +ldxr_us(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + LDRH(_jit, r0, r1, r2); +#if 0 + extr_us(_jit, r0, r0); +#endif +} + +static void +ldxi_us(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + ASSERT(!(i0 & 1)); + if (i0 >= 0 && i0 <= 8191) { + LDRHI(_jit, r0, r1, i0 >> 1); + } else if (i0 > -256 && i0 < 0) { + LDURH(_jit, r0, r1, i0); + } else { + int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0; + movi(_jit, r2, i0); + LDRH(_jit, r0, r1, r2); + if (r0 == r1) + unget_temp_gpr(_jit); + } +#if 0 + extr_us(_jit, r0, r0); +#endif +} + +static void +ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + ASSERT(!(i0 & 3)); + if (i0 >= 0 && i0 <= 16383) { + LDRSWI(_jit, r0, r1, i0 >> 2); + } else if (i0 > -256 && i0 < 0) { + LDURSW(_jit, r0, r1, i0); + } else { + int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0; + addi(_jit, r2, r1, i0); + ldr_i(_jit, r0, r2); + if (r0 == r1) + unget_temp_gpr(_jit); + } +} + +static void +ldxr_ui(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + LDRW(_jit, r0, r1, r2); +#if 0 + extr_ui(_jit, r0, r0); +#endif +} + +static void +ldxi_ui(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + ASSERT(!(i0 & 3)); + if (i0 >= 0 && i0 <= 16383) { + LDRWI(_jit, r0, r1, i0 >> 2); + } else if (i0 > -256 && i0 < 0) { + LDURW(_jit, r0, r1, i0); + } else { + int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0; + movi(_jit, r2, i0); + LDRW(_jit, r0, r1, r2); + if (r0 == r1) + unget_temp_gpr(_jit); + } +#if 0 + extr_ui(_jit, r0, r0); +#endif +} + +static void +ldxi_l(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + ASSERT(!(i0 & 7)); + if (i0 >= 0 && i0 <= 32767) { + LDRI(_jit, r0, r1, i0 >> 3); + } else if (i0 > -256 && i0 < 0) { + LDUR(_jit, r0, r1, i0); + } else { + int32_t r2 = (r0 == r1) ? jit_gpr_regno(get_temp_gpr(_jit)) : r0; + addi(_jit, r2, r1, i0); + ldr_l(_jit, r0, r2); + if (r0 == r1) + unget_temp_gpr(_jit); + } +} + +static void +sti_c(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + str_c(_jit, jit_gpr_regno(reg), r0); + unget_temp_gpr(_jit); +} + +static void +sti_s(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + str_s(_jit, jit_gpr_regno(reg), r0); + unget_temp_gpr(_jit); +} + +static void +sti_i(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + str_i(_jit, jit_gpr_regno(reg), r0); + unget_temp_gpr(_jit); +} + +static void +sti_l(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + str_l(_jit, jit_gpr_regno(reg), r0); + unget_temp_gpr(_jit); +} + +static void +stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (i0 >= 0 && i0 <= 4095) { + STRBI(_jit, r1, r0, i0); + } else if (i0 > -256 && i0 < 0) { + STURB(_jit, r1, r0, i0); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + addi(_jit, jit_gpr_regno(reg), r0, i0); + str_c(_jit, jit_gpr_regno(reg), r1); + unget_temp_gpr(_jit); + } +} + +static void +stxi_s(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + ASSERT(!(i0 & 1)); + if (i0 >= 0 && i0 <= 8191) { + STRHI(_jit, r1, r0, i0 >> 1); + } else if (i0 > -256 && i0 < 0) { + STURH(_jit, r1, r0, i0); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + addi(_jit, jit_gpr_regno(reg), r0, i0); + str_s(_jit, jit_gpr_regno(reg), r1); + unget_temp_gpr(_jit); + } +} + +static void +stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + ASSERT(!(i0 & 3)); + if (i0 >= 0 && i0 <= 16383) { + STRWI(_jit, r1, r0, i0 >> 2); + } else if (i0 > -256 && i0 < 0) { + STURW(_jit, r1, r0, i0); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + addi(_jit, jit_gpr_regno(reg), r0, i0); + str_i(_jit, jit_gpr_regno(reg), r1); + unget_temp_gpr(_jit); + } +} + +static void +stxi_l(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + ASSERT(!(i0 & 7)); + if (i0 >= 0 && i0 <= 32767) { + STRI(_jit, r1, r0, i0 >> 3); + } else if (i0 > -256 && i0 < 0) { + STUR(_jit, r1, r0, i0); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + addi(_jit, jit_gpr_regno(reg), r0, i0); + str_l(_jit, jit_gpr_regno(reg), r1); + unget_temp_gpr(_jit); + } +} + +static jit_reloc_t +mov_addr(jit_state_t *_jit, int32_t r0) +{ + return movi_from_pool(_jit, r0); +} + +static jit_reloc_t +beqi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (i1 == 0) { + return CBZ(_jit, r0); + } else { + return bcci(_jit, BCC_EQ, r0, i1); + } +} + +static jit_reloc_t +bnei(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (i1 == 0) { + return CBNZ(_jit, r0); + } else { + return bcci(_jit, BCC_NE, r0, i1); + } +} + +static jit_reloc_t +jmp(jit_state_t *_jit) +{ + return B(_jit); +} + +static void +jmpi(jit_state_t *_jit, jit_word_t i0) +{ + return jit_patch_there(_jit, jmp(_jit), (void*)i0); +} + +static jit_reloc_t +call(jit_state_t *_jit) +{ + return BL(_jit); +} + +static void +calli(jit_state_t *_jit, jit_word_t i0) +{ + return jit_patch_there(_jit, call(_jit), (void*)i0); +} + +static void +jmpi_with_link(jit_state_t *_jit, jit_word_t i0) +{ + return calli(_jit, i0); +} + +static void +push_link_register(jit_state_t *_jit) +{ +} + +static void +pop_link_register(jit_state_t *_jit) +{ +} + +static void +ret(jit_state_t *_jit) +{ + RET(_jit); +} + +static void +retr(jit_state_t *_jit, int32_t r) +{ + movr(_jit, jit_gpr_regno(_X0), r); + ret(_jit); +} + +static void +reti(jit_state_t *_jit, int32_t i) +{ + movi(_jit, jit_gpr_regno(_X0), i); + ret(_jit); +} + +static void +retval_c(jit_state_t *_jit, int32_t r0) +{ + extr_c(_jit, r0, jit_gpr_regno(_X0)); +} + +static void +retval_uc(jit_state_t *_jit, int32_t r0) +{ + extr_uc(_jit, r0, jit_gpr_regno(_X0)); +} + +static void +retval_s(jit_state_t *_jit, int32_t r0) +{ + extr_s(_jit, r0, jit_gpr_regno(_X0)); +} + +static void +retval_us(jit_state_t *_jit, int32_t r0) +{ + extr_us(_jit, r0, jit_gpr_regno(_X0)); +} + +static void +retval_i(jit_state_t *_jit, int32_t r0) +{ + extr_i(_jit, r0, jit_gpr_regno(_X0)); +} + +static void +retval_ui(jit_state_t *_jit, int32_t r0) +{ + extr_ui(_jit, r0, jit_gpr_regno(_X0)); +} + +static void +retval_l(jit_state_t *_jit, int32_t r0) +{ + movr(_jit, r0, jit_gpr_regno(_X0)); +} + +static uint32_t* +jmp_without_veneer(jit_state_t *_jit) +{ + uint32_t *loc = _jit->pc.ui; + emit_u32(_jit, encode_o26(A64_B)); + return loc; +} + +static void +patch_jmp_without_veneer(jit_state_t *_jit, uint32_t *loc) +{ + patch_jmp_offset(loc, _jit->pc.ui - loc); +} + +static void +ldr_atomic(jit_state_t *_jit, int32_t dst, int32_t loc) +{ + LDAR(_jit, dst, loc); +} + +static void +str_atomic(jit_state_t *_jit, int32_t loc, int32_t val) +{ + STLR(_jit, val, loc); +} + +static void +swap_atomic(jit_state_t *_jit, int32_t dst, int32_t loc, int32_t val) +{ + int32_t result = jit_gpr_regno(get_temp_gpr(_jit)); + int32_t dst_or_tmp; + if (dst == val || dst == loc) + dst_or_tmp = jit_gpr_regno(get_temp_gpr(_jit)); + else + dst_or_tmp = dst; + + void *retry = jit_address(_jit); + LDAXR(_jit, dst_or_tmp, loc); + STLXR(_jit, val, loc, result); + jit_patch_there(_jit, bnei(_jit, result, 0), retry); + movr(_jit, dst, dst_or_tmp); + + if (dst == val || dst == loc) unget_temp_gpr(_jit); + unget_temp_gpr(_jit); +} + +static void +cas_atomic(jit_state_t *_jit, int32_t dst, int32_t loc, int32_t expected, + int32_t desired) +{ + int32_t dst_or_tmp; + if (dst == loc || dst == expected || dst == desired) + dst_or_tmp = jit_gpr_regno(get_temp_gpr(_jit)); + else + dst_or_tmp = dst; + void *retry = jit_address(_jit); + LDAXR(_jit, dst_or_tmp, loc); + jit_reloc_t bad = bner(_jit, dst_or_tmp, expected); + int result = jit_gpr_regno(get_temp_gpr(_jit)); + STLXR(_jit, desired, loc, result); + jit_patch_there(_jit, bnei(_jit, result, 0), retry); + unget_temp_gpr(_jit); + jit_patch_here(_jit, bad); + movr(_jit, dst, dst_or_tmp); + + if (dst == loc || dst == expected || dst == desired) + unget_temp_gpr(_jit); +} + +static void +breakpoint(jit_state_t *_jit) +{ + BRK(_jit); +} diff --git a/deps/lightening/lightening/aarch64-fpu.c b/deps/lightening/lightening/aarch64-fpu.c new file mode 100644 index 0000000..6297342 --- /dev/null +++ b/deps/lightening/lightening/aarch64-fpu.c @@ -0,0 +1,810 @@ +/* + * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +static void +osvvv(jit_state_t *_jit, int32_t Op, int32_t Sz, int32_t Rd, int32_t Rn, + int32_t Rm) +{ + uint32_t inst = Op; + inst = write_size_bitfield(inst, Sz); + inst = write_Rd_bitfield(inst, Rd); + inst = write_Rn_bitfield(inst, Rn); + inst = write_Rm_bitfield(inst, Rm); + emit_u32_with_pool(_jit, inst); +} + +static void +osvv_(jit_state_t *_jit, int32_t Op, int32_t Sz, int32_t Rd, int32_t Rn) +{ + uint32_t inst = Op; + inst = write_size_bitfield(inst, Sz); + inst = write_Rd_bitfield(inst, Rd); + inst = write_Rn_bitfield(inst, Rn); + emit_u32_with_pool(_jit, inst); +} + +static void +os_vv(jit_state_t *_jit, int32_t Op, int32_t Sz, int32_t Rn, int32_t Rm) +{ + uint32_t inst = Op; + inst = write_size_bitfield(inst, Sz); + inst = write_Rn_bitfield(inst, Rn); + inst = write_Rm_bitfield(inst, Rm); + emit_u32_with_pool(_jit, inst); +} + +#define A64_SCVTF 0x1e220000 +#define A64_FMOVWV 0x1e260000 +#define A64_FMOVVW 0x1e270000 +#define A64_FMOVXV 0x9e260000 +#define A64_FMOVVX 0x9e270000 +#define A64_FCVTZS 0x1e380000 +#define A64_FCMPE 0x1e202010 +#define A64_FMOV 0x1e204000 +#define A64_FABS 0x1e20c000 +#define A64_FNEG 0x1e214000 +#define A64_FSQRT 0x1e21c000 +#define A64_FCVTS 0x1e224000 +#define A64_FCVTD 0x1e22c000 +#define A64_FMUL 0x1e200800 +#define A64_FDIV 0x1e201800 +#define A64_FADD 0x1e202800 +#define A64_FSUB 0x1e203800 + +static void +FCMPES(jit_state_t *_jit, int32_t Rn, int32_t Rm) +{ + os_vv(_jit, A64_FCMPE, 0, Rn, Rm); +} + +static void +FCMPED(jit_state_t *_jit, int32_t Rn, int32_t Rm) +{ + os_vv(_jit, A64_FCMPE, 1, Rn, Rm); +} + +static void +FMOVS(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_FMOV, 0, Rd, Rn); +} + +static void +FMOVD(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_FMOV, 1, Rd, Rn); +} + +static void +FMOVWS(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_FMOVWV, 0, Rd, Rn); +} + +static void +FMOVSW(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_FMOVVW, 0, Rd, Rn); +} + +static void +FMOVXD(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_FMOVXV, 1, Rd, Rn); +} + +static void +FMOVDX(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_FMOVVX, 1, Rd, Rn); +} + +static void +FCVT_SD(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_FCVTS, 1, Rd, Rn); +} + +static void +FCVT_DS(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_FCVTD, 0, Rd, Rn); +} + +static void +SCVTFS(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_SCVTF|XS, 0, Rd, Rn); +} + +static void +SCVTFD(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_SCVTF|XS, 1, Rd, Rn); +} + +static void +FCVTSZ_WS(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_FCVTZS, 0, Rd, Rn); +} + +static void +FCVTSZ_WD(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_FCVTZS, 1, Rd, Rn); +} + +static void +FCVTSZ_XS(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_FCVTZS|XS, 0, Rd, Rn); +} + +static void +FCVTSZ_XD(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_FCVTZS|XS, 1, Rd, Rn); +} + +static void +FABSS(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_FABS, 0, Rd, Rn); +} + +static void +FABSD(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_FABS, 1, Rd, Rn); +} + +static void +FNEGS(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_FNEG, 0, Rd, Rn); +} + +static void +FNEGD(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_FNEG, 1, Rd, Rn); +} + +static void +FSQRTS(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_FSQRT, 0, Rd, Rn); +} + +static void +FSQRTD(jit_state_t *_jit, int32_t Rd, int32_t Rn) +{ + osvv_(_jit, A64_FSQRT, 1, Rd, Rn); +} + +static void +FADDS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + osvvv(_jit, A64_FADD, 0, Rd, Rn, Rm); +} + +static void +FADDD(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + osvvv(_jit, A64_FADD, 1, Rd, Rn, Rm); +} + +static void +FSUBS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + osvvv(_jit, A64_FSUB, 0, Rd, Rn, Rm); +} + +static void +FSUBD(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + osvvv(_jit, A64_FSUB, 1, Rd, Rn, Rm); +} + +static void +FMULS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + osvvv(_jit, A64_FMUL, 0, Rd, Rn, Rm); +} + +static void +FMULD(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + osvvv(_jit, A64_FMUL, 1, Rd, Rn, Rm); +} + +static void +FDIVS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + osvvv(_jit, A64_FDIV, 0, Rd, Rn, Rm); +} + +static void +FDIVD(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) +{ + osvvv(_jit, A64_FDIV, 1, Rd, Rn, Rm); +} + +static void +truncr_f_l(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + FCVTSZ_XS(_jit, r0, r1); +} + +static void +truncr_d_l(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + FCVTSZ_XD(_jit, r0, r1); +} + +static void +addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + FADDS(_jit, r0, r1, r2); +} + +static void +subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + FSUBS(_jit, r0, r1, r2); +} + +static void +mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + FMULS(_jit, r0, r1, r2); +} + +static void +divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + FDIVS(_jit, r0, r1, r2); +} + +static void +absr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + FABSS(_jit, r0, r1); +} + +static void +negr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + FNEGS(_jit, r0, r1); +} + +static void +sqrtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + FSQRTS(_jit, r0, r1); +} + +static void +extr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + SCVTFS(_jit, r0, r1); +} + +static void +extr_d_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + FCVT_SD(_jit, r0, r1); +} + +static jit_reloc_t +fbccr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1) +{ + FCMPES(_jit, r0, r1); + return B_C(_jit, cc); +} + +static jit_reloc_t +bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return fbccr(_jit, BCC_MI,r0, r1); +} + +static jit_reloc_t +bler_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return fbccr(_jit, BCC_LS,r0, r1); +} + +static jit_reloc_t +beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return fbccr(_jit, BCC_EQ,r0, r1); +} + +static jit_reloc_t +bger_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return fbccr(_jit, BCC_GE,r0, r1); +} + +static jit_reloc_t +bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return fbccr(_jit, BCC_GT,r0, r1); +} + +static jit_reloc_t +bner_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return fbccr(_jit, BCC_NE,r0, r1); +} + +static jit_reloc_t +bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return fbccr(_jit, BCC_LT,r0, r1); +} + +static jit_reloc_t +bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return fbccr(_jit, BCC_LE,r0, r1); +} + +static jit_reloc_t +bunger_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return fbccr(_jit, BCC_PL,r0, r1); +} + +static jit_reloc_t +bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return fbccr(_jit, BCC_HI,r0, r1); +} + +static jit_reloc_t +bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return fbccr(_jit, BCC_VC,r0, r1); +} + +static jit_reloc_t +bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return fbccr(_jit, BCC_VS, r0, r1); +} + +static void +addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + FADDD(_jit, r0, r1, r2); +} + +static void +subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + FSUBD(_jit, r0, r1, r2); +} + +static void +mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + FMULD(_jit, r0, r1, r2); +} + +static void +divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + FDIVD(_jit, r0, r1, r2); +} + +static void +absr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + FABSD(_jit, r0, r1); +} + +static void +negr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + FNEGD(_jit, r0, r1); +} + +static void +sqrtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + FSQRTD(_jit, r0, r1); +} + +static void +extr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + SCVTFD(_jit, r0, r1); +} + +static void +extr_f_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + FCVT_DS(_jit, r0, r1); +} + +static jit_reloc_t +dbccr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1) +{ + FCMPED(_jit, r0, r1); + return B_C(_jit, cc); +} + +static jit_reloc_t +bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return dbccr(_jit, BCC_MI, r0, r1); +} + +static jit_reloc_t +bler_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return dbccr(_jit, BCC_LS, r0, r1); +} + +static jit_reloc_t +beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return dbccr(_jit, BCC_EQ, r0, r1); +} + +static jit_reloc_t +bger_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return dbccr(_jit, BCC_GE, r0, r1); +} + +static jit_reloc_t +bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return dbccr(_jit, BCC_GT, r0, r1); +} + +static jit_reloc_t +bner_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return dbccr(_jit, BCC_NE, r0, r1); +} + +static jit_reloc_t +bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return dbccr(_jit, BCC_LT, r0, r1); +} + +static jit_reloc_t +bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return dbccr(_jit, BCC_LE, r0, r1); +} + +static jit_reloc_t +bunger_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return dbccr(_jit, BCC_PL, r0, r1); +} + +static jit_reloc_t +bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return dbccr(_jit, BCC_HI, r0, r1); +} + +static jit_reloc_t +bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return dbccr(_jit, BCC_VC, r0, r1); +} + +static jit_reloc_t +bunordr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return dbccr(_jit, BCC_VS, r0, r1); +} + + +static void +truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + FCVTSZ_WS(_jit, r0, r1); + extr_i(_jit, r0, r0); +} + +static void +truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + FCVTSZ_WD(_jit, r0, r1); + extr_i(_jit, r0, r0); +} + +static void +ldr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + ldr_i(_jit, jit_gpr_regno(reg), r1); + FMOVSW(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + ldi_i(_jit, jit_gpr_regno(reg), i0); + FMOVSW(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + ldxr_i(_jit, jit_gpr_regno(reg), r1, r2); + FMOVSW(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + ldxi_i(_jit, jit_gpr_regno(reg), r1, i0); + FMOVSW(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +str_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + FMOVWS(_jit, jit_gpr_regno(reg), r1); + str_i(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + FMOVWS(_jit, jit_gpr_regno(reg), r0); + sti_i(_jit, i0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + FMOVWS(_jit, jit_gpr_regno(reg), r2); + stxr_i(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + FMOVWS(_jit, jit_gpr_regno(reg), r1); + stxi_i(_jit, i0, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +movr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 != r1) + FMOVS(_jit, r0, r1); +} + +static void +movi_f(jit_state_t *_jit, int32_t r0, float i0) +{ + union { + int32_t i; + float f; + } u; + u.f = i0; + if (u.i == 0) + FMOVSW(_jit, r0, WZR_REGNO); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + /* prevent generating unused top 32 bits */ + movi(_jit, jit_gpr_regno(reg), ((jit_word_t)u.i) & 0xffffffff); + FMOVSW(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static jit_reloc_t +buneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + FCMPES(_jit, r0, r1); + jit_reloc_t unordered = B_C(_jit, BCC_VS); /* unordered satisfies condition */ + jit_reloc_t neq = B_C(_jit, BCC_NE); /* not equal (or unordered) does not satisfy */ + jit_patch_here(_jit, unordered); + jit_reloc_t ret = B(_jit); + jit_patch_here(_jit, neq); + return ret; +} + +static jit_reloc_t +bltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + FCMPES(_jit, r0, r1); + jit_reloc_t unordered = B_C(_jit, BCC_VS); /* jump over if unordered */ + jit_reloc_t eq = B_C(_jit, BCC_EQ); /* jump over if equal */ + jit_reloc_t ret = B(_jit); + jit_patch_here(_jit, unordered); + jit_patch_here(_jit, eq); + return ret; +} + +static void +ldr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + ldr_l(_jit, jit_gpr_regno(reg), r1); + FMOVDX(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + ldi_l(_jit, jit_gpr_regno(reg), i0); + FMOVDX(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + ldxr_l(_jit, jit_gpr_regno(reg), r1, r2); + FMOVDX(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + ldxi_l(_jit, jit_gpr_regno(reg), r1, i0); + FMOVDX(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +str_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + FMOVXD(_jit, jit_gpr_regno(reg), r1); + str_l(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + FMOVXD(_jit, jit_gpr_regno(reg), r0); + sti_l(_jit, i0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + FMOVXD(_jit, jit_gpr_regno(reg), r2); + stxr_l(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + FMOVXD(_jit, jit_gpr_regno(reg), r1); + stxi_l(_jit, i0, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +movr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 != r1) + FMOVD(_jit, r0, r1); +} + +static void +movi_d(jit_state_t *_jit, int32_t r0, double i0) +{ + union { + int64_t l; + double d; + } u; + u.d = i0; + if (u.l == 0) + FMOVDX(_jit, r0, XZR_REGNO); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), u.l); + FMOVDX(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static jit_reloc_t +buneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + FCMPED(_jit, r0, r1); + jit_reloc_t unordered = B_C(_jit, BCC_VS); /* unordered satisfies condition */ + jit_reloc_t neq = B_C(_jit, BCC_NE); /* not equal (or unordered) does not satisfy */ + jit_patch_here(_jit, unordered); + jit_reloc_t ret = B(_jit); + jit_patch_here(_jit, neq); + return ret; +} + +static jit_reloc_t +bltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + FCMPED(_jit, r0, r1); + jit_reloc_t unordered = B_C(_jit, BCC_VS); /* jump over if unordered */ + jit_reloc_t eq = B_C(_jit, BCC_EQ); /* jump over if equal */ + jit_reloc_t ret = B(_jit); + jit_patch_here(_jit, unordered); + jit_patch_here(_jit, eq); + return ret; +} + +static void +retr_d(jit_state_t *_jit, int32_t r) +{ + movr_d(_jit, jit_fpr_regno(_D0), r); + ret(_jit); +} + +static void +retr_f(jit_state_t *_jit, int32_t r) +{ + movr_f(_jit, jit_fpr_regno(_D0), r); + ret(_jit); +} + +static void +retval_f(jit_state_t *_jit, int32_t r0) +{ + movr_f(_jit, r0, jit_fpr_regno(_D0)); +} + +static void +retval_d(jit_state_t *_jit, int32_t r0) +{ + movr_d(_jit, r0, jit_fpr_regno(_D0)); +} diff --git a/deps/lightening/lightening/aarch64.c b/deps/lightening/lightening/aarch64.c new file mode 100644 index 0000000..1fe523a --- /dev/null +++ b/deps/lightening/lightening/aarch64.c @@ -0,0 +1,235 @@ +/* + * Copyright (C) 2013-2020 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +/* libgcc */ +extern void __clear_cache(void *, void *); + + +static inline int32_t +read_signed_bitfield(uint32_t word, uint8_t width, uint8_t shift) +{ + return ((int32_t)word) << (32 - width - shift) >> (32 - width); +} + +static inline uint32_t +read_unsigned_bitfield(uint32_t word, uint8_t width, uint8_t shift) +{ + return word << (32 - width - shift) >> (32 - width); +} + +static inline int +in_signed_range(ptrdiff_t diff, uint8_t bits) +{ + return (-1 << (bits - 1)) <= diff && diff < (1 << (bits - 1)); +} + +static inline int +in_unsigned_range(uint32_t val, uint8_t bits) +{ + ASSERT(bits < __WORDSIZE); + return val < (1 << bits); +} + +static inline uint32_t +write_unsigned_bitfield(uint32_t word, uint32_t val, uint8_t width, uint8_t shift) +{ + ASSERT(read_unsigned_bitfield(word, width, shift) == 0); + ASSERT(in_unsigned_range(val, width)); + return word | (val << shift); +} + +static inline int32_t +write_signed_bitfield(uint32_t word, ptrdiff_t val, uint8_t width, uint8_t shift) +{ + ASSERT(read_signed_bitfield(word, width, shift) == 0); + ASSERT(in_signed_range(val, width)); + return word | ((val & ((1 << width) - 1)) << shift); +} + +#define DEFINE_ENCODER(name, width, shift, kind, val_t) \ + static const uint8_t name##_width = width; \ + static const uint8_t name##_shift = shift; \ + static uint32_t \ + write_##name##_bitfield(uint32_t word, val_t val) \ + { \ + return write_##kind##_bitfield(word, val, name##_width, name##_shift); \ + } + +DEFINE_ENCODER(Rd, 5, 0, unsigned, uint32_t) +DEFINE_ENCODER(Rm, 5, 16, unsigned, uint32_t) +DEFINE_ENCODER(Rn, 5, 5, unsigned, uint32_t) +DEFINE_ENCODER(cond2, 4, 0, unsigned, uint32_t) +DEFINE_ENCODER(simm9, 9, 12, signed, ptrdiff_t) +DEFINE_ENCODER(imm12, 12, 10, unsigned, uint32_t) +DEFINE_ENCODER(imm16, 16, 5, unsigned, uint32_t) +DEFINE_ENCODER(simm19, 19, 5, signed, ptrdiff_t) +DEFINE_ENCODER(simm26, 26, 0, signed, ptrdiff_t) +DEFINE_ENCODER(immr, 6, 16, unsigned, uint32_t) +DEFINE_ENCODER(imms, 6, 10, unsigned, uint32_t) +DEFINE_ENCODER(size, 2, 22, unsigned, uint32_t) + +#define DEFINE_PATCHABLE_INSTRUCTION(name, kind, RELOC, rsh) \ + static inline int32_t \ + read_##name##_offset(uint32_t *loc) \ + { \ + return read_signed_bitfield(*loc, kind##_width, kind##_shift); \ + } \ + static inline int \ + offset_in_##name##_range(ptrdiff_t diff, int flags); \ + static inline int \ + offset_in_##name##_range(ptrdiff_t diff, int flags) \ + { \ + return in_signed_range(diff, kind##_width); \ + } \ + static inline void \ + patch_##name##_offset(uint32_t *loc, ptrdiff_t diff) \ + { \ + *loc = write_##kind##_bitfield(*loc, diff); \ + } \ + static inline jit_reloc_t \ + emit_##name(jit_state_t *_jit, uint32_t inst) \ + { \ + while (1) { \ + jit_reloc_t ret = jit_reloc (_jit, JIT_RELOC_##RELOC, 0, \ + _jit->pc.uc, _jit->pc.uc, rsh); \ + if (add_pending_literal(_jit, ret, kind##_width - 1)) { \ + emit_u32(_jit, inst); \ + return ret; \ + } \ + } \ + } + +#define DEFINE_PATCHABLE_INSTRUCTIONS(name, kind, RELOC, rsh) \ + DEFINE_PATCHABLE_INSTRUCTION(name, kind, RELOC, rsh); \ + DEFINE_PATCHABLE_INSTRUCTION(veneer_##name, kind, RELOC, rsh); + +DEFINE_PATCHABLE_INSTRUCTIONS(jmp, simm26, JMP_WITH_VENEER, 2); +DEFINE_PATCHABLE_INSTRUCTIONS(jcc, simm19, JCC_WITH_VENEER, 2); +DEFINE_PATCHABLE_INSTRUCTION(load_from_pool, simm19, LOAD_FROM_POOL, 2); + +struct veneer +{ + uint32_t ldr; + uint32_t br; + uint64_t addr; +}; + +static void +patch_veneer(uint32_t *loc, jit_pointer_t addr) +{ + struct veneer *v = (struct veneer*) loc; + v->addr = (uint64_t) addr; +} + +#include "aarch64-cpu.c" +#include "aarch64-fpu.c" + +static const jit_gpr_t abi_gpr_args[] = { + _X0, _X1, _X2, _X3, _X4, _X5, _X6, _X7 +}; + +static const jit_fpr_t abi_fpr_args[] = { + _D0, _D1, _D2, _D3, _D4, _D5, _D6, _D7 +}; + +static const int abi_gpr_arg_count = sizeof(abi_gpr_args) / sizeof(abi_gpr_args[0]); +static const int abi_fpr_arg_count = sizeof(abi_fpr_args) / sizeof(abi_fpr_args[0]); + +struct abi_arg_iterator +{ + const jit_operand_t *args; + size_t argc; + + size_t arg_idx; + size_t gpr_idx; + size_t fpr_idx; + size_t stack_size; + size_t stack_padding; +}; + +static size_t page_size; + +jit_bool_t +jit_get_cpu(void) +{ + page_size = sysconf(_SC_PAGE_SIZE); + return 1; +} + +jit_bool_t +jit_init(jit_state_t *_jit) +{ + return 1; +} + +static size_t +jit_initial_frame_size (void) +{ + return 0; +} + +static void +reset_abi_arg_iterator(struct abi_arg_iterator *iter, size_t argc, + const jit_operand_t *args) +{ + memset(iter, 0, sizeof *iter); + iter->argc = argc; + iter->args = args; +} + +static void +next_abi_arg(struct abi_arg_iterator *iter, jit_operand_t *arg) +{ + ASSERT(iter->arg_idx < iter->argc); + enum jit_operand_abi abi = iter->args[iter->arg_idx].abi; + if (is_gpr_arg(abi) && iter->gpr_idx < abi_gpr_arg_count) { + *arg = jit_operand_gpr (abi, abi_gpr_args[iter->gpr_idx++]); + } else if (is_fpr_arg(abi) && iter->fpr_idx < abi_fpr_arg_count) { + *arg = jit_operand_fpr (abi, abi_fpr_args[iter->fpr_idx++]); + } else { + *arg = jit_operand_mem (abi, JIT_SP, iter->stack_size); + iter->stack_size += 8; + } + iter->arg_idx++; +} + +static void +jit_flush(void *fptr, void *tptr) +{ + jit_word_t f = (jit_word_t)fptr & -page_size; + jit_word_t t = (((jit_word_t)tptr) + page_size - 1) & -page_size; + __clear_cache((void *)f, (void *)t); +} + +static inline size_t +jit_stack_alignment(void) +{ + return 16; +} + +static void +jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc, jit_pointer_t addr) +{ +} + +static void* +bless_function_pointer(void *ptr) +{ + return ptr; +} diff --git a/deps/lightening/lightening/aarch64.h b/deps/lightening/lightening/aarch64.h new file mode 100644 index 0000000..219c615 --- /dev/null +++ b/deps/lightening/lightening/aarch64.h @@ -0,0 +1,170 @@ +/* + * Copyright (C) 2013-2017, 2019 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#ifndef _jit_aarch64_h +#define _jit_aarch64_h + + +#define JIT_NEEDS_LITERAL_POOL 1 + +#define _X0 JIT_GPR(0) +#define _X1 JIT_GPR(1) +#define _X2 JIT_GPR(2) +#define _X3 JIT_GPR(3) +#define _X4 JIT_GPR(4) +#define _X5 JIT_GPR(5) +#define _X6 JIT_GPR(6) +#define _X7 JIT_GPR(7) +#define _X8 JIT_GPR(8) +#define _X9 JIT_GPR(9) +#define _X10 JIT_GPR(10) +#define _X11 JIT_GPR(11) +#define _X12 JIT_GPR(12) +#define _X13 JIT_GPR(13) +#define _X14 JIT_GPR(14) +#define _X15 JIT_GPR(15) +#define _X16 JIT_GPR(16) +#define _X17 JIT_GPR(17) +#define _X18 JIT_GPR(18) +#define _X19 JIT_GPR(19) +#define _X20 JIT_GPR(20) +#define _X21 JIT_GPR(21) +#define _X22 JIT_GPR(22) +#define _X23 JIT_GPR(23) +#define _X24 JIT_GPR(24) +#define _X25 JIT_GPR(25) +#define _X26 JIT_GPR(26) +#define _X27 JIT_GPR(27) +#define _X28 JIT_GPR(28) +#define _X29 JIT_GPR(29) +#define _X30 JIT_GPR(30) +#define _X31 JIT_GPR(31) + +#define _D0 JIT_FPR(0) +#define _D1 JIT_FPR(1) +#define _D2 JIT_FPR(2) +#define _D3 JIT_FPR(3) +#define _D4 JIT_FPR(4) +#define _D5 JIT_FPR(5) +#define _D6 JIT_FPR(6) +#define _D7 JIT_FPR(7) +#define _D8 JIT_FPR(8) +#define _D9 JIT_FPR(9) +#define _D10 JIT_FPR(10) +#define _D11 JIT_FPR(11) +#define _D12 JIT_FPR(12) +#define _D13 JIT_FPR(13) +#define _D14 JIT_FPR(14) +#define _D15 JIT_FPR(15) +#define _D16 JIT_FPR(16) +#define _D17 JIT_FPR(17) +#define _D18 JIT_FPR(18) +#define _D19 JIT_FPR(19) +#define _D20 JIT_FPR(20) +#define _D21 JIT_FPR(21) +#define _D22 JIT_FPR(22) +#define _D23 JIT_FPR(23) +#define _D24 JIT_FPR(24) +#define _D25 JIT_FPR(25) +#define _D26 JIT_FPR(26) +#define _D27 JIT_FPR(27) +#define _D28 JIT_FPR(28) +#define _D29 JIT_FPR(29) +#define _D30 JIT_FPR(30) +#define _D31 JIT_FPR(31) + +#define JIT_R0 _X0 +#define JIT_R1 _X1 +#define JIT_R2 _X2 +#define JIT_R3 _X3 +#define JIT_R4 _X4 +#define JIT_R5 _X5 +#define JIT_R6 _X6 +#define JIT_R7 _X7 +#define JIT_R8 _X8 +#define JIT_R9 _X9 +#define JIT_R10 _X10 +#define JIT_R11 _X11 +#define JIT_R12 _X12 +#define JIT_R13 _X13 +#define JIT_R14 _X14 +#define JIT_R15 _X15 +#define JIT_TMP0 _X16 +#define JIT_TMP1 _X17 +// x18 is reserved by the platform. +#define JIT_V0 _X19 +#define JIT_V1 _X20 +#define JIT_V2 _X21 +#define JIT_V3 _X22 +#define JIT_V4 _X23 +#define JIT_V5 _X24 +#define JIT_V6 _X25 +#define JIT_V7 _X26 +#define JIT_V8 _X27 +#define JIT_V9 _X28 + +// x29 is frame pointer; x30 is link register. +#define JIT_PLATFORM_CALLEE_SAVE_GPRS _X29, _X30 + +// x31 is stack pointer. +#define JIT_LR _X30 +#define JIT_SP _X31 + +#define JIT_F0 _D0 +#define JIT_F1 _D1 +#define JIT_F2 _D2 +#define JIT_F3 _D3 +#define JIT_F4 _D4 +#define JIT_F5 _D5 +#define JIT_F6 _D6 +#define JIT_F7 _D7 +#define JIT_F8 _D16 +#define JIT_F9 _D17 +#define JIT_F10 _D18 +#define JIT_F11 _D19 +#define JIT_F12 _D20 +#define JIT_F13 _D21 +#define JIT_F14 _D22 +#define JIT_F15 _D23 +#define JIT_F16 _D24 +#define JIT_F17 _D25 +#define JIT_F18 _D26 +#define JIT_F19 _D27 +#define JIT_F20 _D28 +#define JIT_F21 _D29 +#define JIT_F22 _D30 +#define JIT_FTMP _D31 + +#define JIT_VF0 _D8 +#define JIT_VF1 _D9 +#define JIT_VF2 _D10 +#define JIT_VF3 _D11 +#define JIT_VF4 _D12 +#define JIT_VF5 _D13 +#define JIT_VF6 _D14 +#define JIT_VF7 _D15 + +#define _FP _X29 +#define _LR _X30 +#define _SP _X31 + +#define JIT_PLATFORM_CALLEE_SAVE_FPRS + + +#endif /* _jit_aarch64_h */ diff --git a/deps/lightening/lightening/arm-cpu.c b/deps/lightening/lightening/arm-cpu.c new file mode 100644 index 0000000..6618400 --- /dev/null +++ b/deps/lightening/lightening/arm-cpu.c @@ -0,0 +1,3116 @@ +/* + * Copyright (C) 2012-2017,2019-2020 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#define _s20P(d) ((d) >= -(int)0x80000 && d <= 0x7ffff) +#define _s24P(d) ((d) >= -(int)0x800000 && d <= 0x7fffff) +#define _u3(v) ((v) & 0x7) +#define _u4(v) ((v) & 0xf) +#define _u5(v) ((v) & 0x1f) +#define _u8(v) ((v) & 0xff) +#define _u12(v) ((v) & 0xfff) +#define _u13(v) ((v) & 0x1fff) +#define _u16(v) ((v) & 0xffff) +#define _u24(v) ((v) & 0xffffff) + +#define ARM_CC_EQ 0x00000000 /* Z=1 */ +#define ARM_CC_NE 0x10000000 /* Z=0 */ +#define ARM_CC_HS 0x20000000 /* C=1 */ +#define ARM_CC_LO 0x30000000 /* C=0 */ +#define ARM_CC_MI 0x40000000 /* N=1 */ +#define ARM_CC_VS 0x60000000 /* V=1 */ +#define ARM_CC_VC 0x70000000 /* V=0 */ +#define ARM_CC_HI 0x80000000 /* C=1 && Z=0 */ +#define ARM_CC_LS 0x90000000 /* C=0 || Z=1 */ +#define ARM_CC_GE 0xa0000000 /* N=V */ +#define ARM_CC_LT 0xb0000000 /* N!=V */ +#define ARM_CC_GT 0xc0000000 /* Z=0 && N=V */ +#define ARM_CC_LE 0xd0000000 /* Z=1 || N!=V */ +#define ARM_CC_AL 0xe0000000 /* always */ +#define ARM_CC_NV 0xf0000000 /* reserved */ +#define THUMB_MOV 0x4600 +#define THUMB_MOVI 0x2000 +#define THUMB2_MOVI 0xf0400000 +#define THUMB2_MOVWI 0xf2400000 +#define THUMB2_MOVTI 0xf2c00000 +#define THUMB_MVN 0x43c0 +#define THUMB2_MVN 0xea600000 +#define THUMB2_MVNI 0xf0600000 +#define ARM_S 0x00100000 /* set flags */ +#define THUMB_ADD 0x1800 +#define THUMB_ADDX 0x4400 +#define THUMB2_ADD 0xeb000000 +#define THUMB_ADDI3 0x1c00 +#define THUMB_ADDI8 0x3000 +#define THUMB2_ADDI 0xf1000000 +#define THUMB2_ADDWI 0xf2000000 +#define THUMB_ADC 0x4140 +#define THUMB2_ADC 0xeb400000 +#define THUMB2_ADCI 0xf1400000 +#define THUMB_SUB 0x1a00 +#define THUMB2_SUB 0xeba00000 +#define THUMB_SUBI3 0x1e00 +#define THUMB_SUBI8 0x3800 +#define THUMB2_SUBI 0xf1a00000 +#define THUMB2_SUBWI 0xf2a00000 +#define THUMB_SBC 0x4180 +#define THUMB2_SBC 0xeb600000 +#define THUMB2_SBCI 0xf1600000 +#define THUMB_RSBI 0x4240 +#define THUMB2_RSBI 0xf1c00000 +#define THUMB_MUL 0x4340 +#define THUMB2_MUL 0xfb00f000 +#define THUMB2_UMULL 0xfba00000 +#define THUMB2_SMULL 0xfb800000 +#define THUMB_MLS 0xfb000010 +#define THUMB2_SDIV 0xfb90f0f0 +#define THUMB2_UDIV 0xfbb0f0f0 +#define THUMB_AND 0x4000 +#define THUMB2_AND 0xea000000 +#define THUMB2_ANDI 0xf0000000 +#define THUMB2_BIC 0xea200000 +#define THUMB2_BICI 0xf0200000 +#define THUMB_ORR 0x4300 +#define THUMB2_ORR 0xea400000 +#define THUMB2_ORRI 0xf0400000 +#define THUMB_EOR 0x4040 +#define THUMB2_EOR 0xea800000 +#define THUMB2_EORI 0xf0800000 +#define THUMB_REV 0xba00 +#define THUMB2_REV 0xfa90f080 +#define THUMB_SXTB 0xb240 +#define THUMB2_SXTB 0xfa40f080 +#define THUMB_UXTB 0xb2c0 +#define THUMB2_UXTB 0xfa50f080 +#define THUMB_SXTH 0xb200 +#define THUMB2_SXTH 0xfa00f080 +#define THUMB_UXTH 0xb280 +#define THUMB2_UXTH 0xfa10f080 +#define ARM_LSL 0x00000000 +#define THUMB_LSL 0x4080 +#define THUMB2_LSL 0xfa00f000 +#define THUMB_LSLI 0x0000 +#define THUMB2_LSLI 0xea4f0000 +#define ARM_LSR 0x00000020 +#define THUMB_LSR 0x40c0 +#define THUMB2_LSR 0xfa20f000 +#define THUMB_LSRI 0x0800 +#define THUMB2_LSRI 0xea4f0010 +#define ARM_ASR 0x00000040 +#define THUMB_ASR 0x4100 +#define THUMB2_ASR 0xfa40f000 +#define THUMB_ASRI 0x1000 +#define THUMB2_ASRI 0xea4f0020 +#define THUMB_CMP 0x4280 +#define THUMB_CMPX 0x4500 +#define THUMB2_CMP 0xebb00000 +#define THUMB_CMPI 0x2800 +#define THUMB2_CMPI 0xf1b00000 +#define THUMB2_CMN 0xeb100000 +#define THUMB2_CMNI 0xf1100000 +#define THUMB_TST 0x4200 +#define THUMB2_TST 0xea100000 +#define THUMB2_TSTI 0xf0100000 +#define THUMB_BLX 0x4780 +#define THUMB_BX 0x4700 +#define THUMB_CC_B 0xd000 +#define THUMB_B 0xe000 +#define THUMB2_CC_B 0xf0008000 +#define THUMB2_B 0xf0009000 +#define THUMB2_BLI 0xf000d000 +#define THUMB2_BLXI 0xf000c000 +#define THUMB2_P 0x00000400 +#define THUMB2_U 0x00000200 +#define THUMB_LDRSB 0x5600 +#define THUMB2_LDRSB 0xf9100000 +#define THUMB2_LDRSBI 0xf9100c00 +#define THUMB2_LDRSBWI 0xf9900000 +#define THUMB_LDRB 0x5c00 +#define THUMB2_LDRB 0xf8100000 +#define THUMB_LDRBI 0x7800 +#define THUMB2_LDRBI 0xf8100c00 +#define THUMB2_LDRBWI 0xf8900000 +#define THUMB_LDRSH 0x5e00 +#define THUMB2_LDRSH 0xf9300000 +#define THUMB2_LDRSHI 0xf9300c00 +#define THUMB2_LDRSHWI 0xf9b00000 +#define THUMB_LDRH 0x5a00 +#define THUMB2_LDRH 0xf8300000 +#define THUMB_LDRHI 0x8800 +#define THUMB2_LDRHI 0xf8300c00 +#define THUMB2_LDRHWI 0xf8b00000 +#define THUMB_LDR 0x5800 +#define THUMB2_LDR 0xf8500000 +#define THUMB2_LDRP 0xf85f0000 +#define THUMB_LDRI 0x6800 +#define THUMB_LDRISP 0x9800 +#define THUMB2_LDRI 0xf8500c00 +#define THUMB2_LDRWI 0xf8d00000 +#define THUMB_STRB 0x5400 +#define THUMB2_STRB 0xf8000000 +#define THUMB_STRBI 0x7000 +#define THUMB2_STRBI 0xf8000c00 +#define THUMB2_STRBWI 0xf8800000 +#define THUMB_STRH 0x5200 +#define THUMB2_STRH 0xf8200000 +#define THUMB_STRHI 0x8000 +#define THUMB2_STRHI 0xf8200c00 +#define THUMB2_STRHWI 0xf8a00000 +#define THUMB_STR 0x5000 +#define THUMB2_STR 0xf8400000 +#define THUMB_STRI 0x6000 +#define THUMB2_STRWI 0xf8c00000 +#define THUMB_STRISP 0x9000 +#define THUMB2_STRI 0xf8400c00 +#define THUMB2_LDM_W 0x00200000 +#define THUMB2_PUSH 0xe92d0000 +#define THUMB_DMB 0xf3bf8f50 +#define THUMB_LDREX 0xe8500f00 +#define THUMB_STREX 0xe8400000 +#define THUMB_BRK 0xbe00 + +#define _NOREG (jit_gpr_regno(_PC)) + +#define JIT_RELOC_B JIT_RELOC_FLAG_0 + +static void +emit_wide_thumb(jit_state_t *_jit, uint32_t inst) +{ + emit_u16(_jit, inst >> 16); + emit_u16_with_pool(_jit, inst & 0xffff); +} + +static uint32_t +rotate_left(uint32_t v, uint32_t n) { + if (n == 0) { + return v; + } + ASSERT(n < 32); + return (v << n | v >> (32 - n)); +} + +static int +encode_arm_immediate(unsigned int v) +{ + unsigned int a, i; + + for (i = 0; i < 32; i += 2) + if ((a = rotate_left(v, i)) <= 0xff) + return (a | (i << 7)); + + return (-1); +} + +static int +encode_thumb_immediate(unsigned int v) +{ + int i; + unsigned int m; + unsigned int n; + /* 00000000 00000000 00000000 abcdefgh */ + if ((v & 0xff) == v) + return (v); + /* 00000000 abcdefgh 00000000 abcdefgh */ + if ((v & 0xff00ff) == v && ((v & 0xff0000) >> 16) == (v & 0xff)) + return ((v & 0xff) | (1 << 12)); + /* abcdefgh 00000000 abcdefgh 00000000 */ + if (((v & 0xffff0000) >> 16) == (v & 0xffff) && (v & 0xff) == 0) + return (((v & 0x0000ff00) >> 8) | (2 << 12)); + /* abcdefgh abcdefgh abcdefgh abcdefgh */ + if ( (v & 0xff) == ((v & 0xff00) >> 8) && + ((v & 0xff00) >> 8) == ((v & 0xff0000) >> 16) && + ((v & 0xff0000) << 8) == (v & 0xff000000)) + return ((v & 0xff) | (3 << 12)); + /* 1bcdefgh << 24 ... 1bcdefgh << 1 */ + for (i = 8, m = 0xff000000, n = 0x80000000; + i < 23; i++, m >>= 1, n >>= 1) { + if ((v & m) == v && (v & n)) { + v >>= 32 - i; + if (!(i & 1)) + v &= 0x7f; + i >>= 1; + return (((i & 7) << 12) | ((i & 8) << 23) | v); + } + } + return (-1); +} + +static int +encode_thumb_word_immediate(unsigned int v) +{ + if ((v & 0xfffff000) == 0) + return (((v & 0x800) << 15) | ((v & 0x700) << 4) | (v & 0xff)); + return (-1); +} + +static uint32_t +read_wide_thumb(uint32_t *loc) +{ + uint16_t *sloc = (uint16_t*)loc; + return (sloc[0] << 16) | sloc[1]; +} + +static void +write_wide_thumb(uint32_t *loc, uint32_t v) +{ + uint16_t *sloc = (uint16_t *)loc; + sloc[0] = v >> 16; + sloc[1] = v & 0xffff; +} + +static int +offset_in_jmp_range(int32_t offset, int flags) +{ + if (!(offset & 1) && flags | JIT_RELOC_B) + return 0; + else + return -0x1000000 <= offset && offset <= 0xffffff; +} + +static int32_t +decode_thumb_jump(uint32_t v) +{ + uint32_t s = (v >> 26) & 1; + uint32_t j1 = (v >> 13) & 1; + uint32_t j2 = (v >> 11) & 1; + uint32_t i1 = s ? j1 : !j1; + uint32_t i2 = s ? j2 : !j2; + uint32_t hi = (v >> 16) & 0x3ff; + uint32_t lo = v & 0x7ff; + + int32_t ret = s << 31; + ret >>= 8; + ret |= i1 << 22; + ret |= i2 << 21; + ret |= hi << 11; + ret |= lo; + return ret << 1; +} + +static const uint32_t thumb_jump_mask = 0xf800d000; + +static uint32_t +encode_thumb_jump(int32_t v) +{ + ASSERT(offset_in_jmp_range(v, 0)); + v >>= 1; + uint32_t s = !!(v & 0x800000); + uint32_t i1 = !!(v & 0x400000); + uint32_t i2 = !!(v & 0x200000); + uint32_t j1 = s ? i1 : !i1; + uint32_t j2 = s ? i2 : !i2; + uint32_t ret = (s<<26)|((v&0x1ff800)<<5)|(j1<<13)|(j2<<11)|(v&0x7ff); + ASSERT(decode_thumb_jump(ret) == v << 1); + ASSERT((ret & thumb_jump_mask) == 0); + return ret; +} + +static uint32_t +patch_thumb_jump(uint32_t inst, int32_t v) +{ + inst &= thumb_jump_mask; + if (!(v & 1)) { + ASSERT(inst == THUMB2_BLI || inst == THUMB2_BLXI); + v = (v + 2) & ~2; + inst = THUMB2_BLXI; + } + return inst | encode_thumb_jump(v); +} + +static int32_t +read_jmp_offset(uint32_t *loc) +{ + return decode_thumb_jump(read_wide_thumb(loc)); +} + +static void +patch_jmp_offset(uint32_t *loc, int32_t v) +{ + write_wide_thumb(loc, patch_thumb_jump(read_wide_thumb(loc), v)); +} + +static void +patch_veneer_jmp_offset(uint32_t *loc, int32_t v) +{ + ASSERT(!(v & 1)); + patch_jmp_offset(loc, v | 1); +} + +static jit_reloc_t +emit_thumb_jump(jit_state_t *_jit, uint32_t inst) +{ + while (1) { + uint8_t *pc_base = _jit->pc.uc + 4; + int32_t off = (uint8_t*)jit_address(_jit) - pc_base; + enum jit_reloc_kind kind = JIT_RELOC_JMP_WITH_VENEER; + if (inst == THUMB2_B) + kind |= JIT_RELOC_B; + jit_reloc_t ret = jit_reloc (_jit, kind, 0, _jit->pc.uc, pc_base, 0); + uint8_t thumb_jump_width = 24; + if (add_pending_literal(_jit, ret, thumb_jump_width - 1)) { + emit_wide_thumb(_jit, patch_thumb_jump(inst, off)); + return ret; + } + } +} + +static int +offset_in_jcc_range(int32_t v, int flags) +{ + if (!(v & 1)) + return 0; + else + return -0x100000 <= v && v <= 0xfffff; +} + +static int32_t +decode_thumb_cc_jump(uint32_t v) +{ + uint32_t s = (v >> 26) & 1; + uint32_t j1 = (v >> 13) & 1; + uint32_t j2 = (v >> 11) & 1; + uint32_t hi = (v >> 16) & 0x3f; + uint32_t lo = v & 0x7ff; + + int32_t ret = s << 31; + ret >>= 12; + ret |= j2 << 18; + ret |= j1 << 17; + ret |= hi << 11; + ret |= lo; + return ret << 1; +} + +static const uint32_t thumb_cc_jump_mask = 0xfbc0d000; + +static uint32_t +encode_thumb_cc_jump(int32_t v) +{ + ASSERT(offset_in_jcc_range(v, 0)); + v >>= 1; + uint32_t s = !!(v & 0x80000); + uint32_t j2 = !!(v & 0x40000); + uint32_t j1 = !!(v & 0x20000); + uint32_t hi = (v >> 11) & 0x3f; + uint32_t lo = v & 0x7ff; + uint32_t ret = (s<<26)|(hi << 16)|(j1<<13)|(j2<<11)|lo; + ASSERT(decode_thumb_cc_jump(ret) == v << 1); + ASSERT((ret & thumb_cc_jump_mask) == 0); + return ret; +} + +static uint32_t +patch_thumb_cc_jump(uint32_t inst, int32_t v) +{ + return (inst & thumb_cc_jump_mask) | encode_thumb_cc_jump(v); +} + +static int32_t +read_jcc_offset(uint32_t *loc) +{ + return decode_thumb_cc_jump(read_wide_thumb(loc)); +} + +static void +patch_jcc_offset(uint32_t *loc, int32_t v) +{ + write_wide_thumb(loc, patch_thumb_cc_jump(read_wide_thumb(loc), v)); +} + +static void +patch_veneer_jcc_offset(uint32_t *loc, int32_t v) +{ + ASSERT(!(v & 1)); + patch_jcc_offset(loc, v | 1); +} + +static jit_reloc_t +emit_thumb_cc_jump(jit_state_t *_jit, uint32_t inst) +{ + while (1) { + uint8_t *pc_base = _jit->pc.uc + 4; + int32_t off = (uint8_t*)jit_address(_jit) - pc_base; + jit_reloc_t ret = + jit_reloc (_jit, JIT_RELOC_JCC_WITH_VENEER, 0, _jit->pc.uc, pc_base, 0); + uint8_t thumb_cc_jump_width = 20; + if (add_pending_literal(_jit, ret, thumb_cc_jump_width - 1)) { + emit_wide_thumb(_jit, patch_thumb_cc_jump(inst, off)); + return ret; + } + } +} + +static void +torrr(jit_state_t *_jit, int o, int rn, int rd, int rm) +{ + ASSERT(!(o & 0xf0f0f)); + emit_wide_thumb(_jit, o|(_u4(rn)<<16)|(_u4(rd)<<8)|_u4(rm)); +} + +static void +torxr(jit_state_t *_jit, int o, int rn, int rt, int rm) +{ + ASSERT(!(o & 0xf0f0f)); + emit_wide_thumb(_jit, o|(_u4(rn)<<16)|(_u4(rt)<<12)|_u4(rm)); +} + +static void +torrrr(jit_state_t *_jit, int o, int rn, int rl, int rh, int rm) +{ + ASSERT(!(o & 0x000fff0f)); + emit_wide_thumb(_jit, o|(_u4(rn)<<16)|(_u4(rl)<<12)|(_u4(rh)<<8)|_u4(rm)); +} + +static void +torri(jit_state_t *_jit, int o, int rn, int rd, int im) +{ + ASSERT(!(o & 0x0c0f7fff)); + ASSERT(!(im & 0xfbff8f00)); + emit_wide_thumb(_jit, o|(_u4(rn)<<16)|(_u4(rd)<<8)|im); +} + +static void +torri8(jit_state_t *_jit, int o, int rn, int rt, int im) +{ + ASSERT(!(o & 0x000ff0ff)); + ASSERT(!(im & 0xffffff00)); + emit_wide_thumb(_jit, o|(_u4(rn)<<16)|(_u4(rt)<<12)|im); +} + +static void +torri12(jit_state_t *_jit, int o, int rn, int rt, int im) +{ + ASSERT(!(o & 0x000fffff)); + ASSERT(!(im & 0xfffff000)); + emit_wide_thumb(_jit, o|(_u4(rn)<<16)|(_u4(rt)<<12)|im); +} + +static void +tshift(jit_state_t *_jit, int o, int rd, int rm, int im) +{ + ASSERT(!(o & 0x7fcf)); + ASSERT(im >= 0 && im < 32); + emit_wide_thumb(_jit, o|((im&0x1c)<<10)|(_u4(rd)<<8)|((im&3)<<6)|_u4(rm)); +} + +static void +toriw(jit_state_t *_jit, int o, int rd, int im) +{ + ASSERT(!(im & 0xffff0000)); + emit_wide_thumb(_jit, o|((im&0xf000)<<4)|((im&0x800)<<15)|((im&0x700)<<4)|(_u4(rd)<<8)|(im&0xff)); +} + +static jit_reloc_t +tcb(jit_state_t *_jit, int cc) +{ + ASSERT(!(cc & 0xfffffff)); + ASSERT(cc != ARM_CC_AL && cc != ARM_CC_NV); + cc = ((uint32_t)cc) >> 6; + return emit_thumb_cc_jump(_jit, THUMB2_CC_B|cc); +} + +static jit_reloc_t +tb(jit_state_t *_jit, int o) +{ + ASSERT(!(o & 0x07ff2fff)); + return emit_thumb_jump(_jit, o); +} + +static void +T1_ORR(jit_state_t *_jit, int32_t rdn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_ORR|(_u3(rm)<<3)|_u3(rdn)); +} + +static void +T2_ORR(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm) +{ + return torrr(_jit, THUMB2_ORR,rn,rd,rm); +} + +static void +T2_ORRI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im) +{ + return torri(_jit, THUMB2_ORRI,rn,rd,im); +} + +static void +T1_EOR(jit_state_t *_jit, int32_t rdn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_EOR|(_u3(rm)<<3)|_u3(rdn)); +} + +static void +T2_EOR(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm) +{ + return torrr(_jit, THUMB2_EOR,rn,rd,rm); +} + +static void +T2_EORI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im) +{ + return torri(_jit, THUMB2_EORI,rn,rd,im); +} + +static void +T1_MOV(jit_state_t *_jit, int32_t rd, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_MOV|((_u4(rd)&8)<<4)|(_u4(rm)<<3)|(rd&7)); +} + +static void +T1_MOVI(jit_state_t *_jit, int32_t rd, int32_t im) +{ + emit_u16_with_pool(_jit, THUMB_MOVI|(_u3(rd)<<8)|_u8(im)); +} + +static void +T2_MOVI(jit_state_t *_jit, int32_t rd, int32_t im) +{ + return torri(_jit, THUMB2_MOVI,_NOREG,rd,im); +} + +static void +T2_MOVWI(jit_state_t *_jit, int32_t rd, int32_t im) +{ + return toriw(_jit, THUMB2_MOVWI,rd,im); +} + +static void +T2_MOVTI(jit_state_t *_jit, int32_t rd, int32_t im) +{ + return toriw(_jit, THUMB2_MOVTI,rd,im); +} + +static void +T1_MVN(jit_state_t *_jit, int32_t rd, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_MVN|(_u3(rm)<<3)|_u3(rd)); +} + +static void +T2_MVN(jit_state_t *_jit, int32_t rd, int32_t rm) +{ + return torrr(_jit, THUMB2_MVN,_NOREG,rd,rm); +} + +static void +T2_MVNI(jit_state_t *_jit, int32_t rd, int32_t im) +{ + return torri(_jit, THUMB2_MVNI,_NOREG,rd,im); +} + +static void +T1_NOT(jit_state_t *_jit, int32_t rd, int32_t rm) +{ + return T1_MVN(_jit, rd,rm); +} + +static void +T2_NOT(jit_state_t *_jit, int32_t rd, int32_t rm) +{ + return T2_MVN(_jit, rd,rm); +} + +static void +T1_NOP(jit_state_t *_jit) +{ + emit_u16_with_pool(_jit, 0xbf00); +} + +static void +T1_ADD(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_ADD|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rd)); +} + +static void +T1_ADDX(jit_state_t *_jit, int32_t rdn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_ADDX|((_u4(rdn)&8)<<4)|(_u4(rm)<<3)|(rdn&7)); +} + +static void +T2_ADD(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm) +{ + return torrr(_jit, THUMB2_ADD,rn,rd,rm); +} + +static void +T1_ADDI3(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im) +{ + emit_u16_with_pool(_jit, THUMB_ADDI3|(_u3(im)<<6)|(_u3(rn)<<3)|_u3(rd)); +} + +static void +T1_ADDI8(jit_state_t *_jit, int32_t rdn, int32_t im) +{ + emit_u16_with_pool(_jit, THUMB_ADDI8|(_u3(rdn)<<8)|_u8(im)); +} + +static void +T2_ADDI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im) +{ + return torri(_jit, THUMB2_ADDI,rn,rd,im); +} + +static void +T2_ADDWI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im) +{ + return torri(_jit, THUMB2_ADDWI,rn,rd,im); +} + +static void +T2_ADDS(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm) +{ + return torrr(_jit, THUMB2_ADD|ARM_S,rn,rd,rm); +} + +static void +T2_ADDSI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im) +{ + return torri(_jit, THUMB2_ADDI|ARM_S,rn,rd,im); +} + +static void +T1_ADC(jit_state_t *_jit, int32_t rdn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_ADC|(_u3(rm)<<3)|_u3(rdn)); +} + +static void +T2_ADCS(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm) +{ + return torrr(_jit, THUMB2_ADC|ARM_S,rn,rd,rm); +} + +static void +T2_ADCSI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im) +{ + return torri(_jit, THUMB2_ADCI|ARM_S,rn,rd,im); +} + +static void +T1_SUB(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_SUB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rd)); +} + +static void +T2_SUB(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm) +{ + return torrr(_jit, THUMB2_SUB,rn,rd,rm); +} + +static void +T1_SUBI3(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im) +{ + emit_u16_with_pool(_jit, THUMB_SUBI3|(_u3(im)<<6)|(_u3(rn)<<3)|_u3(rd)); +} + +static void +T1_SUBI8(jit_state_t *_jit, int32_t rdn, int32_t im) +{ + emit_u16_with_pool(_jit, THUMB_SUBI8|(_u3(rdn)<<8)|_u8(im)); +} + +static void +T2_SUBI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im) +{ + return torri(_jit, THUMB2_SUBI,rn,rd,im); +} + +static void +T2_SUBWI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im) +{ + return torri(_jit, THUMB2_SUBWI,rn,rd,im); +} + +static void +T2_SUBS(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm) +{ + return torrr(_jit, THUMB2_SUB|ARM_S,rn,rd,rm); +} + +static void +T2_SUBSI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im) +{ + return torri(_jit, THUMB2_SUBI|ARM_S,rn,rd,im); +} + +static void +T1_SBC(jit_state_t *_jit, int32_t rdn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_SBC|(_u3(rm)<<3)|_u3(rdn)); +} + +static void +T2_SBCS(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm) +{ + return torrr(_jit, THUMB2_SBC|ARM_S,rn,rd,rm); +} + +static void +T2_SBCSI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im) +{ + return torri(_jit, THUMB2_SBCI|ARM_S,rn,rd,im); +} + +static void +T1_RSBI(jit_state_t *_jit, int32_t rd, int32_t rn) +{ + emit_u16_with_pool(_jit, THUMB_RSBI|(_u3(rn)<<3)|_u3(rd)); +} + +static void +T2_RSBI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im) +{ + return torri(_jit, THUMB2_RSBI,rn,rd,im); +} + +static void +T1_MUL(jit_state_t *_jit, int32_t rdm, int32_t rn) +{ + emit_u16_with_pool(_jit, THUMB_MUL|(_u3(rn)<<3)|_u3(rdm)); +} + +static void +T2_MUL(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm) +{ + return torrr(_jit, THUMB2_MUL,rn,rd,rm); +} + +static void +T2_SMULL(jit_state_t *_jit, int32_t rl, int32_t rh, int32_t rn, int32_t rm) +{ + return torrrr(_jit, THUMB2_SMULL,rn,rl,rh,rm); +} + +static void +T2_UMULL(jit_state_t *_jit, int32_t rl, int32_t rh, int32_t rn, int32_t rm) +{ + return torrrr(_jit, THUMB2_UMULL,rn,rl,rh,rm); +} + +static void +T2_SDIV(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm) +{ + return torrr(_jit, THUMB2_SDIV,rn,rd,rm); +} + +static void +T2_UDIV(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm) +{ + return torrr(_jit, THUMB2_UDIV,rn,rd,rm); +} + +static void +T1_MLS(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm, int32_t ra) +{ + return torrrr(_jit, THUMB_MLS, rn, ra, rd, rm); +} + +static void +T1_AND(jit_state_t *_jit, int32_t rdn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_AND|(_u3(rm)<<3)|_u3(rdn)); +} + +static void +T2_AND(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm) +{ + return torrr(_jit, THUMB2_AND,rn,rd,rm); +} + +static void +T2_ANDI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im) +{ + return torri(_jit, THUMB2_ANDI,rn,rd,im); +} + +static void +T2_BICI(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t im) +{ + return torri(_jit, THUMB2_BICI,rn,rd,im); +} + +static void +T1_REV(jit_state_t *_jit, int32_t rd, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_REV|(_u3(rm)<<3)|_u3(rd)); +} + +static void +T2_REV(jit_state_t *_jit, int32_t rd, int32_t rm) +{ + return torrr(_jit, THUMB2_REV,rm,rd,rm); +} + +static void +T1_SXTB(jit_state_t *_jit, int32_t rd, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_SXTB|(_u3(rm)<<3)|_u3(rd)); +} + +static void +T2_SXTB(jit_state_t *_jit, int32_t rd, int32_t rm) +{ + return torrr(_jit, THUMB2_SXTB,_NOREG,rd,rm); +} + +static void +T1_UXTB(jit_state_t *_jit, int32_t rd, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_UXTB|(_u3(rm)<<3)|_u3(rd)); +} + +static void +T2_UXTB(jit_state_t *_jit, int32_t rd, int32_t rm) +{ + return torrr(_jit, THUMB2_UXTB,_NOREG,rd,rm); +} + +static void +T1_SXTH(jit_state_t *_jit, int32_t rd, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_SXTH|(_u3(rm)<<3)|_u3(rd)); +} + +static void +T2_SXTH(jit_state_t *_jit, int32_t rd, int32_t rm) +{ + return torrr(_jit, THUMB2_SXTH,_NOREG,rd,rm); +} + +static void +T1_UXTH(jit_state_t *_jit, int32_t rd, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_UXTH|(_u3(rm)<<3)|_u3(rd)); +} + +static void +T2_UXTH(jit_state_t *_jit, int32_t rd, int32_t rm) +{ + return torrr(_jit, THUMB2_UXTH,_NOREG,rd,rm); +} + +static void +T1_LSL(jit_state_t *_jit, int32_t rdn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_LSL|(_u3(rm)<<3)|_u3(rdn)); +} + +static void +T2_LSL(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm) +{ + return torrr(_jit, THUMB2_LSL,rn,rd,rm); +} + +static void +T1_LSLI(jit_state_t *_jit, int32_t rd, int32_t rm, int32_t im) +{ + emit_u16_with_pool(_jit, THUMB_LSLI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd)); +} + +static void +T2_LSLI(jit_state_t *_jit, int32_t rd, int32_t rm, int32_t im) +{ + return tshift(_jit, THUMB2_LSLI,rd,rm,im); +} + +static void +T1_LSR(jit_state_t *_jit, int32_t rdn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_LSR|(_u3(rm)<<3)|_u3(rdn)); +} + +static void +T2_LSR(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm) +{ + return torrr(_jit, THUMB2_LSR,rn,rd,rm); +} + +static void +T1_LSRI(jit_state_t *_jit, int32_t rd, int32_t rm, int32_t im) +{ + emit_u16_with_pool(_jit, THUMB_LSRI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd)); +} + +static void +T2_LSRI(jit_state_t *_jit, int32_t rd, int32_t rm, int32_t im) +{ + return tshift(_jit, THUMB2_LSRI,rd,rm,im); +} + +static void +T1_ASR(jit_state_t *_jit, int32_t rdn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_ASR|(_u3(rm)<<3)|_u3(rdn)); +} + +static void +T2_ASR(jit_state_t *_jit, int32_t rd, int32_t rn, int32_t rm) +{ + return torrr(_jit, THUMB2_ASR,rn,rd,rm); +} + +static void +T1_ASRI(jit_state_t *_jit, int32_t rd, int32_t rm, int32_t im) +{ + emit_u16_with_pool(_jit, THUMB_ASRI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd)); +} + +static void +T2_ASRI(jit_state_t *_jit, int32_t rd, int32_t rm, int32_t im) +{ + return tshift(_jit, THUMB2_ASRI,rd,rm,im); +} + +static void +T1_CMP(jit_state_t *_jit, int32_t rn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_CMP|(_u3(rm)<<3)|_u3(rn)); +} + +static void +T1_CMPX(jit_state_t *_jit, int32_t rn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_CMPX|((_u4(rn)&8)<<4)|(_u4(rm)<<3)|(rn&7)); +} + +static void +T2_CMP(jit_state_t *_jit, int32_t rn, int32_t rm) +{ + return torrr(_jit, THUMB2_CMP,rn,_NOREG,rm); +} + +static void +T1_CMPI(jit_state_t *_jit, int32_t rn, int32_t im) +{ + emit_u16_with_pool(_jit, THUMB_CMPI|(_u3(rn)<<8)|_u8(im)); +} + +static void +T2_CMPI(jit_state_t *_jit, int32_t rn, int32_t im) +{ + return torri(_jit, THUMB2_CMPI,rn,_NOREG,im); +} + +static void +T2_CMNI(jit_state_t *_jit, int32_t rn, int32_t im) +{ + return torri(_jit, THUMB2_CMNI,rn,_NOREG,im); +} + +static void +T1_TST(jit_state_t *_jit, int32_t rn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_TST|(_u3(rm)<<3)|_u3(rn)); +} + +static void +T2_TST(jit_state_t *_jit, int32_t rn, int32_t rm) +{ + return torrr(_jit, THUMB2_TST,rn,_NOREG,rm); +} + +static void +T2_TSTI(jit_state_t *_jit, int32_t rn, int32_t im) +{ + return torri(_jit, THUMB2_TSTI,rn,_NOREG,im); +} + +static void +T1_BLX(jit_state_t *_jit, int32_t r0) +{ + emit_u16_with_pool(_jit, THUMB_BLX|(_u4(r0)<<3)); +} + +static void +T1_BX(jit_state_t *_jit, int32_t r0) +{ + emit_u16_with_pool(_jit, THUMB_BX|(_u4(r0)<<3)); +} + +static jit_reloc_t +T2_CC_B(jit_state_t *_jit, uint32_t cc) +{ + return tcb(_jit, cc); +} + +static jit_reloc_t +T2_B(jit_state_t *_jit) +{ + return tb(_jit, THUMB2_B); +} + +static jit_reloc_t +T2_BLI(jit_state_t *_jit) +{ + return tb(_jit, THUMB2_BLI); +} + +enum dmb_option { DMB_ISH = 0xb }; +static void +T1_DMB(jit_state_t *_jit, enum dmb_option option) +{ + emit_wide_thumb(_jit, THUMB_DMB|_u4(option)); +} + +static void +T1_LDREX(jit_state_t *_jit, int32_t rt, int32_t rn, int8_t offset) +{ + emit_wide_thumb(_jit, THUMB_LDREX|(_u4(rn)<<16)|(_u4(rt)<<12)|_u8(offset)); +} + +static void +T1_STREX(jit_state_t *_jit, int32_t rd, int32_t rt, int32_t rn, int8_t offset) +{ + emit_wide_thumb + (_jit, THUMB_STREX|(_u4(rn)<<16)|(_u4(rt)<<12)|(_u4(rd)<<8)|_u8(offset)); +} + +static void +T1_LDRSB(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_LDRSB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)); +} + +static void +T2_LDRSB(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm) +{ + return torxr(_jit, THUMB2_LDRSB,rn,rt,rm); +} + +static void +T2_LDRSBI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri8(_jit, THUMB2_LDRSBI|THUMB2_U,rn,rt,im); +} + +static void +T2_LDRSBWI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri12(_jit, THUMB2_LDRSBWI,rn,rt,im); +} + +static void +T2_LDRSBIN(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri8(_jit, THUMB2_LDRSBI,rn,rt,im); +} + +static void +T1_LDRB(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_LDRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)); +} + +static void +T2_LDRB(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm) +{ + return torxr(_jit, THUMB2_LDRB,rn,rt,rm); +} + +static void +T1_LDRBI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + emit_u16_with_pool(_jit, THUMB_LDRBI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt)); +} + +static void +T2_LDRBI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri8(_jit, THUMB2_LDRBI|THUMB2_U,rn,rt,im); +} + +static void +T2_LDRBWI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri12(_jit, THUMB2_LDRBWI,rn,rt,im); +} + +static void +T2_LDRBIN(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri8(_jit, THUMB2_LDRBI,rn,rt,im); +} + +static void +T1_LDRSH(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_LDRSH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)); +} + +static void +T2_LDRSH(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm) +{ + return torxr(_jit, THUMB2_LDRSH,rn,rt,rm); +} + +static void +T2_LDRSHI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri8(_jit, THUMB2_LDRSHI|THUMB2_U,rn,rt,im); +} + +static void +T2_LDRSHWI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri12(_jit, THUMB2_LDRSHWI,rn,rt,im); +} + +static void +T2_LDRSHIN(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri8(_jit, THUMB2_LDRSHI,rn,rt,im); +} + +static void +T1_LDRH(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_LDRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)); +} + +static void +T2_LDRH(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm) +{ + return torxr(_jit, THUMB2_LDRH,rn,rt,rm); +} + +static void +T1_LDRHI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + emit_u16_with_pool(_jit, THUMB_LDRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt)); +} + +static void +T2_LDRHI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri8(_jit, THUMB2_LDRHI|THUMB2_U,rn,rt,im); +} + +static void +T2_LDRHWI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri12(_jit, THUMB2_LDRHWI,rn,rt,im); +} + +static void +T2_LDRHIN(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri8(_jit, THUMB2_LDRHI,rn,rt,im); +} + +static void +T1_LDR(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_LDR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)); +} + +static void +T2_LDR(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm) +{ + return torxr(_jit, THUMB2_LDR,rn,rt,rm); +} + +static void +T1_LDRI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + emit_u16_with_pool(_jit, THUMB_LDRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt)); +} + +static void +T1_LDRISP(jit_state_t *_jit, int32_t rt, int32_t im) +{ + emit_u16_with_pool(_jit, THUMB_LDRISP|(_u3(rt)<<8)|_u8(im)); +} + +static void +T2_LDRI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri8(_jit, THUMB2_LDRI|THUMB2_U,rn,rt,im); +} + +static void +T2_LDRWI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri12(_jit, THUMB2_LDRWI,rn,rt,im); +} + +static void +T2_LDRIN(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri8(_jit, THUMB2_LDRI,rn,rt,im); +} + +static void +T1_STRB(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_STRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)); +} + +static void +T2_STRB(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm) +{ + return torxr(_jit, THUMB2_STRB,rn,rt,rm); +} + +static void +T1_STRBI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + emit_u16_with_pool(_jit, THUMB_STRBI | (_u5(im) << 6) | (_u3(rn) << 3) | _u3(rt)); +} + +static void +T2_STRBI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri8(_jit, THUMB2_STRBI|THUMB2_U,rn,rt,im); +} + +static void +T2_STRBWI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri12(_jit, THUMB2_STRBWI,rn,rt,im); +} + +static void +T2_STRBIN(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri8(_jit, THUMB2_STRBI,rn,rt,im); +} + +static void +T1_STRH(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_STRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)); +} + +static void +T2_STRH(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm) +{ + return torxr(_jit, THUMB2_STRH,rn,rt,rm); +} + +static void +T1_STRHI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + emit_u16_with_pool(_jit, THUMB_STRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt)); +} + +static void +T2_STRHI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri8(_jit, THUMB2_STRHI|THUMB2_U,rn,rt,im); +} + +static void +T2_STRHWI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri12(_jit, THUMB2_STRHWI,rn,rt,im); +} + +static void +T2_STRHIN(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri8(_jit, THUMB2_STRHI,rn,rt,im); +} + +static void +T1_STR(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm) +{ + emit_u16_with_pool(_jit, THUMB_STR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)); +} + +static void +T2_STR(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t rm) +{ + return torxr(_jit, THUMB2_STR,rn,rt,rm); +} + +static void +T1_STRI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + emit_u16_with_pool(_jit, THUMB_STRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt)); +} + +static void +T1_STRISP(jit_state_t *_jit, int32_t rt, int32_t im) +{ + emit_u16_with_pool(_jit, THUMB_STRISP|(_u3(rt)<<8)|(_u8(im))); +} + +static void +T2_STRI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri8(_jit, THUMB2_STRI|THUMB2_U,rn,rt,im); +} + +static void +T2_STRWI(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri12(_jit, THUMB2_STRWI,rn,rt,im); +} + +static void +T2_STRIN(jit_state_t *_jit, int32_t rt, int32_t rn, int32_t im) +{ + return torri8(_jit, THUMB2_STRI,rn,rt,im); +} + +static void +T1_BRK(jit_state_t *_jit) +{ + emit_u16_with_pool(_jit, THUMB_BRK); +} + +static void +nop(jit_state_t *_jit, int32_t i0) +{ + for (; i0 > 0; i0 -= 2) + T1_NOP(_jit); + + ASSERT(i0 == 0); +} + +static void +movr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 != r1) { + T1_MOV(_jit, r0, r1); + } +} + +enum preserve_flags { PRESERVE_FLAGS, FLAGS_UNIMPORTANT }; + +static void +_movi(jit_state_t *_jit, int32_t r0, jit_word_t i0, enum preserve_flags flags) +{ + int i; + + if (flags == PRESERVE_FLAGS && r0 < 8 && !(i0 & 0xffffff80)) + T1_MOVI(_jit, r0, i0); + else if (r0 < 8 && !(i0 & 0xffffff80)) + T1_MOVI(_jit, r0, i0); + else if ((i = encode_thumb_immediate(i0)) != -1) + T2_MOVI(_jit, r0, i); + else if ((i = encode_thumb_immediate(~i0)) != -1) + T2_MVNI(_jit, r0, i); + else { + T2_MOVWI(_jit, r0, (uint16_t)i0); + if (i0 & 0xffff0000) + T2_MOVTI(_jit, r0, (uint16_t)((unsigned)i0 >> 16)); + } +} + +static void +movi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + return _movi(_jit, r0, i0, FLAGS_UNIMPORTANT); +} + +static int +offset_in_load_from_pool_range(int32_t offset) +{ + return -0xfff <= offset && offset <= 0xfff; +} + +static int32_t +decode_load_from_pool_offset(uint32_t inst) +{ + int32_t ret = inst & 0xfff; + return ((inst >> 23) & 1) ? ret : -ret; +} + +static uint32_t +encode_load_from_pool_offset(int32_t off) +{ + ASSERT(offset_in_load_from_pool_range(off)); + uint32_t u = off >= 0; + uint32_t ret = ((u ? off : -off) & 0xfff) | (u << 23); + ASSERT(decode_load_from_pool_offset(ret) == off); + return ret; +} + +static uint32_t +patch_load_from_pool(uint32_t inst, int32_t off) +{ + uint32_t load_from_pool_mask = THUMB2_LDRP | (0xf << 12); + return (inst & load_from_pool_mask) | encode_load_from_pool_offset(off); +} + +static int32_t +read_load_from_pool_offset(uint32_t *loc) +{ + return decode_load_from_pool_offset(read_wide_thumb(loc)); +} + +static void +patch_load_from_pool_offset(uint32_t *loc, int32_t v) +{ + write_wide_thumb(loc, patch_load_from_pool(read_wide_thumb(loc), v)); +} + +static jit_reloc_t +emit_load_from_pool(jit_state_t *_jit, uint32_t inst) +{ + while (1) { + uint8_t *pc_base = (uint8_t *)((_jit->pc.w + 4) & ~3); + uint8_t rsh = 0; + int32_t off = (_jit->pc.uc - pc_base) >> rsh; + jit_reloc_t ret = + jit_reloc (_jit, JIT_RELOC_LOAD_FROM_POOL, 0, _jit->pc.uc, pc_base, rsh); + uint8_t load_from_pool_width = 12; + if (add_pending_literal(_jit, ret, load_from_pool_width)) { + emit_wide_thumb(_jit, patch_load_from_pool(inst, off)); + return ret; + } + } +} + +static jit_reloc_t +movi_from_pool(jit_state_t *_jit, int32_t Rt) +{ + return emit_load_from_pool(_jit, THUMB2_LDRP | (_u4(Rt) << 12)); +} + +static jit_reloc_t +mov_addr(jit_state_t *_jit, int32_t r0) +{ + return movi_from_pool(_jit, r0); +} + +static void +comr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if ((r0|r1) < 8) + T1_NOT(_jit, r0, r1); + else + T2_NOT(_jit, r0, r1); +} + +static void +negr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if ((r0|r1) < 8) + T1_RSBI(_jit, r0, r1); + else + T2_RSBI(_jit, r0, r1, 0); +} + +static void +addr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if ((r0|r1|r2) < 8) + T1_ADD(_jit, r0, r1, r2); + else if (r0 == r1 || r0 == r2) + T1_ADDX(_jit, r0, r0 == r1 ? r2 : r1); + else + T2_ADD(_jit, r0, r1, r2); +} + +static void +addi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int i; + + if ((r0|r1) < 8 && !(i0 & ~7)) + T1_ADDI3(_jit, r0, r1, i0); + else if ((r0|r1) < 8 && !(-i0 & ~7)) + T1_SUBI3(_jit, r0, r1, -i0); + else if (r0 < 8 && r0 == r1 && !(i0 & ~0xff)) + T1_ADDI8(_jit, r0, i0); + else if (r0 < 8 && r0 == r1 && !(-i0 & ~0xff)) + T1_SUBI8(_jit, r0, -i0); + else if ((i = encode_thumb_immediate(i0)) != -1) + T2_ADDI(_jit, r0, r1, i); + else if ((i = encode_thumb_immediate(-i0)) != -1) + T2_SUBI(_jit, r0, r1, i); + else if ((i = encode_thumb_word_immediate(i0)) != -1) + T2_ADDWI(_jit, r0, r1, i); + else if ((i = encode_thumb_word_immediate(-i0)) != -1) + T2_SUBWI(_jit, r0, r1, i); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + T2_ADD(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +addcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + /* thumb auto set carry if not inside IT block */ + if ((r0|r1|r2) < 8) + T1_ADD(_jit, r0, r1, r2); + else + T2_ADDS(_jit, r0, r1, r2); +} + +static void +addci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int i; + + if ((r0|r1) < 8 && !(i0 & ~7)) + T1_ADDI3(_jit, r0, r1, i0); + else if ((r0|r1) < 8 && !(-i0 & ~7)) + T1_SUBI3(_jit, r0, r1, -i0); + else if (r0 < 8 && r0 == r1 && !(i0 & ~0xff)) + T1_ADDI8(_jit, r0, i0); + else if (r0 < 8 && r0 == r1 && !(-i0 & ~0xff)) + T1_SUBI8(_jit, r0, -i0); + else if ((i = encode_thumb_immediate(i0)) != -1) + T2_ADDSI(_jit, r0, r1, i); + else if ((i = encode_thumb_immediate(-i0)) != -1) + T2_SUBSI(_jit, r0, r1, i); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + T2_ADDS(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +addxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + /* keep setting carry because don't know last ADC */ + + /* thumb auto set carry if not inside IT block */ + if ((r0|r1|r2) < 8 && (r0 == r1 || r0 == r2)) + T1_ADC(_jit, r0, r0 == r1 ? r2 : r1); + else + T2_ADCS(_jit, r0, r1, r2); +} + +static void +addxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int i; + if ((i = encode_thumb_immediate(i0)) != -1) + T2_ADCSI(_jit, r0, r1, i); + else if ((i = encode_thumb_immediate(-i0)) != -1) + T2_SBCSI(_jit, r0, r1, i); + else if (r0 != r1) { + _movi(_jit, r0, i0, PRESERVE_FLAGS); + T2_ADCS(_jit, r0, r1, r0); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + _movi(_jit, jit_gpr_regno(reg), i0, PRESERVE_FLAGS); + T2_ADCS(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +subr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if ((r0|r1|r2) < 8) + T1_SUB(_jit, r0, r1, r2); + else + T2_SUB(_jit, r0, r1, r2); +} + +static void +subi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int i; + + if ((r0|r1) < 8 && !(i0 & ~7)) + T1_SUBI3(_jit, r0, r1, i0); + else if ((r0|r1) < 8 && !(-i0 & ~7)) + T1_ADDI3(_jit, r0, r1, -i0); + else if (r0 < 8 && r0 == r1 && !(i0 & ~0xff)) + T1_SUBI8(_jit, r0, i0); + else if (r0 < 8 && r0 == r1 && !(-i0 & ~0xff)) + T1_ADDI8(_jit, r0, -i0); + else if ((i = encode_thumb_immediate(i0)) != -1) + T2_SUBI(_jit, r0, r1, i); + else if ((i = encode_thumb_immediate(-i0)) != -1) + T2_ADDI(_jit, r0, r1, i); + else if ((i = encode_thumb_word_immediate(i0)) != -1) + T2_SUBWI(_jit, r0, r1, i); + else if ((i = encode_thumb_word_immediate(-i0)) != -1) + T2_ADDWI(_jit, r0, r1, i); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + T2_SUB(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +subcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + /* thumb auto set carry if not inside IT block */ + if ((r0|r1|r2) < 8) + T1_SUB(_jit, r0, r1, r2); + else + T2_SUBS(_jit, r0, r1, r2); +} + +static void +subci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int i; + + if ((r0|r1) < 8 && !(i0 & ~7)) + T1_SUBI3(_jit, r0, r1, i0); + else if ((r0|r1) < 8 && !(-i0 & ~7)) + T1_ADDI3(_jit, r0, r1, -i0); + else if (r0 < 8 && r0 == r1 && !(i0 & ~0xff)) + T1_SUBI8(_jit, r0, i0); + else if (r0 < 8 && r0 == r1 && !(-i0 & ~0xff)) + T1_ADDI8(_jit, r0, -i0); + else if ((i = encode_thumb_immediate(i0)) != -1) + T2_SUBSI(_jit, r0, r1, i); + else if ((i = encode_thumb_immediate(-i0)) != -1) + T2_ADDSI(_jit, r0, r1, i); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + T2_SUBS(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +subxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + /* keep setting carry because don't know last SBC */ + + /* thumb auto set carry if not inside IT block */ + if ((r0|r1|r2) < 8 && r0 == r1) + T1_SBC(_jit, r0, r2); + else + T2_SBCS(_jit, r0, r1, r2); +} + +static void +subxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int i; + if ((i = encode_arm_immediate(i0)) != -1) + T2_SBCSI(_jit, r0, r1, i); + else if ((i = encode_arm_immediate(-i0)) != -1) + T2_ADCSI(_jit, r0, r1, i); + else if (r0 != r1) { + _movi(_jit, r0, i0, PRESERVE_FLAGS); + T2_SBCS(_jit, r0, r1, r0); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + _movi(_jit, jit_gpr_regno(reg), i0, PRESERVE_FLAGS); + T2_SBCS(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +mulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2 && (r0|r1) < 8) + T1_MUL(_jit, r0, r1); + else if (r0 == r1 && (r0|r2) < 8) + T1_MUL(_jit, r0, r2); + else + T2_MUL(_jit, r0, r1, r2); +} + +static void +muli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + mulr(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +iqmulr(jit_state_t *_jit, int32_t r0, int32_t r1, + int32_t r2, int32_t r3, jit_bool_t sign) +{ + if (r2 == r3) { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, jit_gpr_regno(reg), r2); + if (sign) + T2_SMULL(_jit, r0, r1, jit_gpr_regno(reg), r2); + else + T2_UMULL(_jit, r0, r1, jit_gpr_regno(reg), r2); + unget_temp_gpr(_jit); + } else if (r0 != r2 && r1 != r2) { + if (sign) + T2_SMULL(_jit, r0, r1, r2, r3); + else + T2_UMULL(_jit, r0, r1, r2, r3); + } else { + if (sign) + T2_SMULL(_jit, r0, r1, r3, r2); + else + T2_UMULL(_jit, r0, r1, r3, r2); + } +} + +static void +iqmuli(jit_state_t *_jit, int32_t r0, int32_t r1, + int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + iqmulr(_jit, r0, r1, r2, jit_gpr_regno(reg), sign); + unget_temp_gpr(_jit); +} + +static void +qmulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + return iqmulr(_jit, r0,r1,r2,r3,1); +} + +static void +qmulr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + return iqmulr(_jit, r0,r1,r2,r3,0); +} + +static void +qmuli(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t i0) +{ + return iqmuli(_jit, r0,r1,r2,i0,1); +} + +static void +qmuli_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t i0) +{ + return iqmuli(_jit, r0,r1,r2,i0,0); +} + +static void +divr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + T2_SDIV(_jit, r0, r1, r2); +} + +static void +divi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + divr(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +divr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + T2_UDIV(_jit, r0, r1, r2); +} + +static void +divi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + divr_u(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +iqdivr(jit_state_t *_jit, int32_t r0, int32_t r1, + int32_t r2, int32_t r3, jit_bool_t sign) +{ + int need_tmp = r0 == r2 || r0 == r3; + if (need_tmp) { + int32_t tmp = jit_gpr_regno(get_temp_gpr(_jit)); + if (r0 == r2) { + movr(_jit, tmp, r2); + r2 = tmp; + } + if (r0 == r3) { + if (r2 != r3) + movr(_jit, tmp, r3); + r3 = tmp; + } + } + if (sign) + divr(_jit, r0, r2, r3); + else + divr_u(_jit, r0, r2, r3); + T1_MLS(_jit, r1, r3, r0, r2); + if (need_tmp) + unget_temp_gpr(_jit); +} + +static void +iqdivi(jit_state_t *_jit, int32_t r0, int32_t r1, + int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + iqdivr(_jit, r0, r1, r2, jit_gpr_regno(reg), sign); + unget_temp_gpr(_jit); +} + +static void +qdivr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + return iqdivr(_jit, r0,r1,r2,r3,1); +} + +static void +qdivr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + return iqdivr(_jit, r0,r1,r2,r3,0); +} + +static void +qdivi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t i0) +{ + return iqdivi(_jit, r0,r1,r2,i0,1); +} + +static void +qdivi_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t i0) +{ + return iqdivi(_jit, r0,r1,r2,i0,0); +} + +static void +iremr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_bool_t sign) +{ + return iqdivr(_jit, r0, r0, r1, r2, sign); +} + +static void +remr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return iremr(_jit, r0, r1, r2, 1); +} + +static void +remi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + remr(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +remr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return iremr(_jit, r0, r1, r2, 0); +} + +static void +remi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + remr_u(_jit, r0, r1,jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +andr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if ((r0|r1|r2) < 8 && (r0 == r1 || r0 == r2)) + T1_AND(_jit, r0, r0 == r1 ? r2 : r1); + else + T2_AND(_jit, r0, r1, r2); +} + +static void +andi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int i; + + if ((i = encode_thumb_immediate(i0)) != -1) + T2_ANDI(_jit, r0, r1, i); + else if ((i = encode_thumb_immediate(~i0)) != -1) + T2_BICI(_jit, r0, r1, i); + else if (r0 != r1) { + movi(_jit, r0, i0); + T2_AND(_jit, r0, r1, r0); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + T2_AND(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +orr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if ((r0|r1|r2) < 8 && (r0 == r1 || r0 == r2)) + T1_ORR(_jit, r0, r0 == r1 ? r2 : r1); + else + T2_ORR(_jit, r0, r1, r2); +} + +static void +ori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int i; + + if ((i = encode_thumb_immediate(i0)) != -1) + T2_ORRI(_jit, r0, r1, i); + else if (r0 != r1) { + movi(_jit, r0, i0); + T2_ORR(_jit, r0, r1, r0); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + T2_ORR(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +xorr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if ((r0|r1|r2) < 8 && (r0 == r1 || r0 == r2)) + T1_EOR(_jit, r0, r0 == r1 ? r2 : r1); + else + T2_EOR(_jit, r0, r1, r2); +} + +static void +xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int i; + + if ((i = encode_thumb_immediate(i0)) != -1) + T2_EORI(_jit, r0, r1, i); + else if (r0 != r1) { + movi(_jit, r0, i0); + T2_EOR(_jit, r0, r1, r0); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + T2_EOR(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +lshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if ((r0|r1|r2) < 8 && r0 == r1) + T1_LSL(_jit, r0, r2); + else + T2_LSL(_jit, r0, r1, r2); +} + +static void +lshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + ASSERT(i0 >= 0 && i0 <= 31); + if (i0 == 0) + movr(_jit, r0, r1); + else { + if ((r0|r1) < 8) + T1_LSLI(_jit, r0, r1, i0); + else + T2_LSLI(_jit, r0, r1, i0); + } +} + +static void +rshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if ((r0|r1|r2) < 8 && r0 == r1) + T1_ASR(_jit, r0, r2); + else + T2_ASR(_jit, r0, r1, r2); +} + +static void +rshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + ASSERT(i0 >= 0 && i0 <= 31); + if (i0 == 0) + movr(_jit, r0, r1); + else { + if ((r0|r1) < 8) + T1_ASRI(_jit, r0, r1, i0); + else + T2_ASRI(_jit, r0, r1, i0); + } +} + +static void +rshr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if ((r0|r1|r2) < 8 && r0 == r1) + T1_LSR(_jit, r0, r2); + else + T2_LSR(_jit, r0, r1, r2); +} + +static void +rshi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + ASSERT(i0 >= 0 && i0 <= 31); + if (i0 == 0) + movr(_jit, r0, r1); + else { + if ((r0|r1) < 8) + T1_LSRI(_jit, r0, r1, i0); + else + T2_LSRI(_jit, r0, r1, i0); + } +} + +static void +jmpr(jit_state_t *_jit, int32_t r0) +{ + T1_BX(_jit, r0); +} + +static jit_reloc_t +jmp(jit_state_t *_jit) +{ + return T2_B(_jit); +} + +static void +jmpi(jit_state_t *_jit, jit_word_t i0) +{ + return jit_patch_there(_jit, jmp(_jit), (void*)i0); +} + +static jit_reloc_t +bccr(jit_state_t *_jit, int cc, int32_t r0, int32_t r1) +{ + if ((r0|r1) < 8) + T1_CMP(_jit, r0, r1); + else if ((r0&r1) & 8) + T1_CMPX(_jit, r0, r1); + else + T2_CMP(_jit, r0, r1); + return T2_CC_B(_jit, cc); +} + +static jit_reloc_t +bcci(jit_state_t *_jit, int cc, int32_t r0, jit_word_t i1) +{ + int i; + if (r0 < 7 && !(i1 & 0xffffff00)) + T1_CMPI(_jit, r0, i1); + else if ((i = encode_thumb_immediate(i1)) != -1) + T2_CMPI(_jit, r0, i); + else if ((i = encode_thumb_immediate(-i1)) != -1) + T2_CMNI(_jit, r0, i); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i1); + T2_CMP(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } + return T2_CC_B(_jit, cc); +} + +static jit_reloc_t +bltr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit, ARM_CC_LT, r0, r1); +} + +static jit_reloc_t +blti(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return bcci(_jit, ARM_CC_LT, r0, i1); +} + +static jit_reloc_t +bltr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit, ARM_CC_LO, r0, r1); +} + +static jit_reloc_t +blti_u(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return bcci(_jit, ARM_CC_LO, r0, i1); +} + +static jit_reloc_t +bler(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit, ARM_CC_LE, r0, r1); +} + +static jit_reloc_t +blei(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return bcci(_jit, ARM_CC_LE, r0, i1); +} + +static jit_reloc_t +bler_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit, ARM_CC_LS, r0, r1); +} + +static jit_reloc_t +blei_u(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return bcci(_jit, ARM_CC_LS, r0, i1); +} + +static jit_reloc_t +beqr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit, ARM_CC_EQ, r0, r1); +} + +static jit_reloc_t +beqi(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return bcci(_jit, ARM_CC_EQ, r0, i1); +} + +static jit_reloc_t +bger(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit, ARM_CC_GE, r0, r1); +} + +static jit_reloc_t +bgei(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return bcci(_jit, ARM_CC_GE, r0, i1); +} + +static jit_reloc_t +bger_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit, ARM_CC_HS, r0, r1); +} + +static jit_reloc_t +bgei_u(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return bcci(_jit, ARM_CC_HS, r0, i1); +} + +static jit_reloc_t +bgtr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit, ARM_CC_GT, r0, r1); +} + +static jit_reloc_t +bgti(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return bcci(_jit, ARM_CC_GT, r0, i1); +} + +static jit_reloc_t +bgtr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit, ARM_CC_HI, r0, r1); +} + +static jit_reloc_t +bgti_u(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return bcci(_jit, ARM_CC_HI, r0, i1); +} + +static jit_reloc_t +bner(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bccr(_jit, ARM_CC_NE, r0, r1); +} + +static jit_reloc_t +bnei(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return bcci(_jit, ARM_CC_NE, r0, i1); +} + +static jit_reloc_t +baddr(jit_state_t *_jit, int cc, int32_t r0, int32_t r1) +{ + if ((r0|r1) < 8) + T1_ADD(_jit, r0, r0, r1); + else + T2_ADDS(_jit, r0, r0, r1); + return T2_CC_B(_jit, cc); +} + +static jit_reloc_t +baddi(jit_state_t *_jit, int cc, int32_t r0, int i1) +{ + int i; + if (r0 < 8 && !(i1 & ~7)) + T1_ADDI3(_jit, r0, r0, i1); + else if (r0 < 8 && !(-i1 & ~7)) + T1_SUBI3(_jit, r0, r0, -i1); + else if (r0 < 8 && !(i1 & ~0xff)) + T1_ADDI8(_jit, r0, i1); + else if (r0 < 8 && !(-i1 & ~0xff)) + T1_SUBI8(_jit, r0, -i1); + else if ((i = encode_thumb_immediate(i1)) != -1) + T2_ADDSI(_jit, r0, r0, i); + else if ((i = encode_thumb_immediate(-i1)) != -1) + T2_SUBSI(_jit, r0, r0, i); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i1); + T2_ADDS(_jit, r0, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } + return T2_CC_B(_jit, cc); +} + +static jit_reloc_t +boaddr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return baddr(_jit, ARM_CC_VS, r0, r1); +} + +static jit_reloc_t +boaddi(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return baddi(_jit, ARM_CC_VS, r0, i1); +} + +static jit_reloc_t +boaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return baddr(_jit, ARM_CC_HS, r0, r1); +} + +static jit_reloc_t +boaddi_u(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return baddi(_jit, ARM_CC_HS, r0, i1); +} + +static jit_reloc_t +bxaddr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return baddr(_jit, ARM_CC_VC, r0, r1); +} + +static jit_reloc_t +bxaddi(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return baddi(_jit, ARM_CC_VC, r0, i1); +} + +static jit_reloc_t +bxaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return baddr(_jit, ARM_CC_LO, r0, r1); +} + +static jit_reloc_t +bxaddi_u(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return baddi(_jit, ARM_CC_LO, r0, i1); +} + +static jit_reloc_t +bsubr(jit_state_t *_jit, int cc, int32_t r0, int32_t r1) +{ + if ((r0|r1) < 8) + T1_SUB(_jit, r0, r0, r1); + else + T2_SUBS(_jit, r0, r0, r1); + return T2_CC_B(_jit, cc); +} + +static jit_reloc_t +bsubi(jit_state_t *_jit, int cc, int32_t r0, int i1) +{ + int i; + if (r0 < 8 && !(i1 & ~7)) + T1_SUBI3(_jit, r0, r0, i1); + else if (r0 < 8 && !(-i1 & ~7)) + T1_ADDI3(_jit, r0, r0, -i1); + else if (r0 < 8 && !(i1 & ~0xff)) + T1_SUBI8(_jit, r0, i1); + else if (r0 < 8 && !(-i1 & ~0xff)) + T1_ADDI8(_jit, r0, -i1); + else if ((i = encode_thumb_immediate(i1)) != -1) + T2_SUBSI(_jit, r0, r0, i); + else if ((i = encode_thumb_immediate(-i1)) != -1) + T2_SUBSI(_jit, r0, r0, i); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i1); + T2_SUBS(_jit, r0, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } + return T2_CC_B(_jit, cc); +} + +static jit_reloc_t +bosubr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bsubr(_jit, ARM_CC_VS, r0, r1); +} + +static jit_reloc_t +bosubi(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return bsubi(_jit, ARM_CC_VS, r0, i1); +} + +static jit_reloc_t +bosubr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bsubr(_jit, ARM_CC_LO, r0, r1); +} + +static jit_reloc_t +bosubi_u(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return bsubi(_jit, ARM_CC_LO, r0, i1); +} + +static jit_reloc_t +bxsubr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bsubr(_jit, ARM_CC_VC, r0, r1); +} + +static jit_reloc_t +bxsubi(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return bsubi(_jit, ARM_CC_VC, r0, i1); +} + +static jit_reloc_t +bxsubr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bsubr(_jit, ARM_CC_HS, r0, r1); +} + +static jit_reloc_t +bxsubi_u(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return bsubi(_jit, ARM_CC_HS, r0, i1); +} + +static jit_reloc_t +bmxr(jit_state_t *_jit, int cc, int32_t r0, int32_t r1) +{ + if ((r0|r1) < 8) + T1_TST(_jit, r0, r1); + else + T2_TST(_jit, r0, r1); + return T2_CC_B(_jit, cc); +} + +static jit_reloc_t +bmxi(jit_state_t *_jit, int cc, int32_t r0, jit_word_t i1) +{ + int i; + if ((i = encode_thumb_immediate(i1)) != -1) + T2_TSTI(_jit, r0, i); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i1); + T2_TST(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } + return T2_CC_B(_jit, cc); +} + +static jit_reloc_t +bmsr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bmxr(_jit, ARM_CC_NE, r0, r1); +} + +static jit_reloc_t +bmsi(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return bmxi(_jit, ARM_CC_NE, r0, i1); +} + +static jit_reloc_t +bmcr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bmxr(_jit, ARM_CC_EQ, r0, r1); +} + +static jit_reloc_t +bmci(jit_state_t *_jit, int32_t r0, int32_t i1) +{ + return bmxi(_jit, ARM_CC_EQ, r0, i1); +} + +static void +ldr_c(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + T2_LDRSBI(_jit, r0, r1, 0); +} + +static void +ldi_c(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + T2_LDRSBI(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +ldxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if ((r0|r1|r2) < 8) + T1_LDRSB(_jit, r0, r1, r2); + else + T2_LDRSB(_jit, r0, r1, r2); +} + +#define jit_ldrt_strt_p() 0 + +static void +ldxi_c(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + + if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255) + T2_LDRSBI(_jit, r0, r1, i0); + else if (i0 < 0 && i0 >= -255) + T2_LDRSBIN(_jit, r0, r1, -i0); + else if (i0 >= 0 && i0 <= 4095) + T2_LDRSBWI(_jit, r0, r1, i0); + else if (r0 != r1) { + movi(_jit, r0, i0); + if ((r0|r1) < 8) + T1_LDRSB(_jit, r0, r1, r0); + else + T2_LDRSB(_jit, r0, r1, r0); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + if ((r0|r1|jit_gpr_regno(reg)) < 8) + T1_LDRSB(_jit, r0, r1, jit_gpr_regno(reg)); + else + T2_LDRSB(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldr_uc(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + T2_LDRBI(_jit, r0, r1, 0); +} + +static void +ldi_uc(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + T2_LDRBI(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +ldxr_uc(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if ((r0|r1|r2) < 8) + T1_LDRB(_jit, r0, r1, r2); + else + T2_LDRB(_jit, r0, r1, r2); +} + +static void +ldxi_uc(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if ((r0|r1) < 8 && i0 >= 0 && i0 < 0x20) + T1_LDRBI(_jit, r0, r1, i0); + else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255) + T2_LDRBI(_jit, r0, r1, i0); + else if (i0 < 0 && i0 >= -255) + T2_LDRBIN(_jit, r0, r1, -i0); + else if (i0 >= 0 && i0 <= 4095) + T2_LDRBWI(_jit, r0, r1, i0); + else if (r0 != r1) { + movi(_jit, r0, i0); + if ((r0|r1) < 8) + T1_LDRB(_jit, r0, r1, r0); + else + T2_LDRB(_jit, r0, r1, r0); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + if ((r0|r1|jit_gpr_regno(reg)) < 8) + T1_LDRB(_jit, r0, r1, jit_gpr_regno(reg)); + else + T2_LDRB(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldr_s(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + T2_LDRSHI(_jit, r0, r1, 0); +} + +static void +ldi_s(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + T2_LDRSHI(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +ldxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if ((r0|r1|r2) < 8) + T1_LDRSH(_jit, r0, r1, r2); + else + T2_LDRSH(_jit, r0, r1, r2); +} + +static void +ldxi_s(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255) + T2_LDRSHI(_jit, r0, r1, i0); + else if (i0 < 0 && i0 >= -255) + T2_LDRSHIN(_jit, r0, r1, -i0); + else if (i0 >= 0 && i0 <= 4095) + T2_LDRSHWI(_jit, r0, r1, i0); + else if (r0 != r1) { + movi(_jit, r0, i0); + if ((r0|r1) < 8) + T1_LDRSH(_jit, r0, r1, r0); + else + T2_LDRSH(_jit, r0, r1, r0); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + if ((r0|r1|jit_gpr_regno(reg)) < 8) + T1_LDRSH(_jit, r0, r1, jit_gpr_regno(reg)); + else + T2_LDRSH(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldr_us(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + T2_LDRHI(_jit, r0, r1, 0); +} + +static void +ldi_us(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + T2_LDRHI(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +ldxr_us(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + + if ((r0|r1|r2) < 8) + T1_LDRH(_jit, r0, r1, r2); + else + T2_LDRH(_jit, r0, r1, r2); +} + +static void +ldxi_us(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 1) && (i0 >> 1) < 0x20) + T1_LDRHI(_jit, r0, r1, i0 >> 1); + else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255) + T2_LDRHI(_jit, r0, r1, i0); + else if (i0 < 0 && i0 >= -255) + T2_LDRHIN(_jit, r0, r1, -i0); + else if (i0 >= 0 && i0 <= 4095) + T2_LDRHWI(_jit, r0, r1, i0); + else if (r0 != r1) { + movi(_jit, r0, i0); + if ((r0|r1) < 8) + T1_LDRH(_jit, r0, r1, r0); + else + T2_LDRH(_jit, r0, r1, r0); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + if ((r0|r1|jit_gpr_regno(reg)) < 8) + T1_LDRH(_jit, r0, r1, jit_gpr_regno(reg)); + else + T2_LDRH(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldr_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + T2_LDRI(_jit, r0, r1, 0); +} + +static void +ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + T2_LDRI(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +ldxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if ((r0|r1|r2) < 8) + T1_LDR(_jit, r0, r1, r2); + else + T2_LDR(_jit, r0, r1, r2); +} + +static void +ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) < 0x20) + T1_LDRI(_jit, r0, r1, i0 >> 2); + else if (r1 == jit_gpr_regno(JIT_SP) && r0 < 8 && + i0 >= 0 && !(i0 & 3) && (i0 >> 2) <= 255) + T1_LDRISP(_jit, r0, i0 >> 2); + else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255) + T2_LDRI(_jit, r0, r1, i0); + else if (i0 < 0 && i0 > -255) + T2_LDRIN(_jit, r0, r1, -i0); + else if (i0 >= 0 && i0 <= 4095) + T2_LDRWI(_jit, r0, r1, i0); + else if (r0 != r1) { + movi(_jit, r0, i0); + if ((r0|r1) < 8) + T1_LDR(_jit, r0, r1, r0); + else + T2_LDR(_jit, r0, r1, r0); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + if ((r0|r1|jit_gpr_regno(reg)) < 8) + T1_LDR(_jit, r0, r1, jit_gpr_regno(reg)); + else + T2_LDR(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +str_c(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + T2_STRBI(_jit, r1, r0, 0); +} + +static void +sti_c(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + T2_STRBI(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +stxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if ((r0|r1|r2) < 8) + T1_STRB(_jit, r2, r1, r0); + else + T2_STRB(_jit, r2, r1, r0); +} + +static void +stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if ((r0|r1) < 8 && i0 >= 0 && i0 < 0x20) + T1_STRBI(_jit, r1, r0, i0); + else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255) + T2_STRBI(_jit, r1, r0, i0); + else if (i0 < 0 && i0 >= -255) + T2_STRBIN(_jit, r1, r0, -i0); + else if (i0 >= 0 && i0 <= 4095) + T2_STRBWI(_jit, r1, r0, i0); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + if ((r0|r1|jit_gpr_regno(reg)) < 8) + T1_STRB(_jit, r1, r0, jit_gpr_regno(reg)); + else + T2_STRB(_jit, r1, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +str_s(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + T2_STRHI(_jit, r1, r0, 0); +} + +static void +sti_s(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + T2_STRHI(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +stxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if ((r0|r1|r2) < 8) + T1_STRH(_jit, r2, r1, r0); + else + T2_STRH(_jit, r2, r1, r0); +} + +static void +stxi_s(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 1) && (i0 >> 1) < 0x20) + T1_STRHI(_jit, r1, r0, i0 >> 1); + else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255) + T2_STRHI(_jit, r1, r0, i0); + else if (i0 < 0 && i0 >= -255) + T2_STRHIN(_jit, r1, r0, -i0); + else if (i0 >= 0 && i0 <= 4095) + T2_STRHWI(_jit, r1, r0, i0); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + if ((r0|r1|jit_gpr_regno(reg)) < 8) + T1_STRH(_jit, r1, r0, jit_gpr_regno(reg)); + else + T2_STRH(_jit, r1, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +str_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + T2_STRI(_jit, r1, r0, 0); +} + +static void +sti_i(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + T2_STRI(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +stxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if ((r0|r1|r2) < 8) + T1_STR(_jit, r2, r1, r0); + else + T2_STR(_jit, r2, r1, r0); +} + +static void +stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) < 0x20) + T1_STRI(_jit, r1, r0, i0 >> 2); + else if (r0 == jit_gpr_regno(JIT_SP) && r1 < 8 && + i0 >= 0 && !(i0 & 3) && (i0 >> 2) <= 255) + T1_STRISP(_jit, r1, i0 >> 2); + else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255) + T2_STRI(_jit, r1, r0, i0); + else if (i0 < 0 && i0 >= -255) + T2_STRIN(_jit, r1, r0, -i0); + else if (i0 >= 0 && i0 <= 4095) + T2_STRWI(_jit, r1, r0, i0); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + if ((r0|r1|jit_gpr_regno(reg)) < 8) + T1_STR(_jit, r1, r0, jit_gpr_regno(reg)); + else + T2_STR(_jit, r1, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if ((r0|r1) < 8) + T1_REV(_jit, r0, r1); + else + T2_REV(_jit, r0, r1); + rshi_u(_jit, r0, r0, 16); +} + +/* inline glibc htonl (without register clobber) */ +static void +bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if ((r0|r1) < 8) + T1_REV(_jit, r0, r1); + else + T2_REV(_jit, r0, r1); +} + +static void +extr_c(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + + if ((r0|r1) < 8) + T1_SXTB(_jit, r0, r1); + else + T2_SXTB(_jit, r0, r1); +} + +static void +extr_uc(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if ((r0|r1) < 8) + T1_UXTB(_jit, r0, r1); + else + T2_UXTB(_jit, r0, r1); +} + +static void +extr_s(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if ((r0|r1) < 8) + T1_SXTH(_jit, r0, r1); + else + T2_SXTH(_jit, r0, r1); +} + +static void +extr_us(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if ((r0|r1) < 8) + T1_UXTH(_jit, r0, r1); + else + T2_UXTH(_jit, r0, r1); +} + +static void +callr(jit_state_t *_jit, int32_t r0) +{ + T1_BLX(_jit, r0); +} + +static void +calli(jit_state_t *_jit, jit_word_t i0) +{ + jit_patch_there(_jit, T2_BLI(_jit), (void*)i0); +} + +static void +jmpi_with_link(jit_state_t *_jit, jit_word_t i0) +{ + jit_patch_there(_jit, T2_BLI(_jit), (void*)i0); +} + +static void +push_link_register(jit_state_t *_jit) +{ +} + +static void +pop_link_register(jit_state_t *_jit) +{ +} + +static void +ret(jit_state_t *_jit) +{ + T1_BX(_jit, jit_gpr_regno(_LR)); +} + +static void +reti(jit_state_t *_jit, int32_t i0) +{ + movi(_jit, jit_gpr_regno(_R0), i0); + ret(_jit); +} + +static void +retr(jit_state_t *_jit, int32_t r0) +{ + movr(_jit, jit_gpr_regno(_R0), r0); + ret(_jit); +} + +static void +retval_c(jit_state_t *_jit, int32_t r0) +{ + extr_c(_jit, r0, jit_gpr_regno(_R0)); +} + +static void +retval_uc(jit_state_t *_jit, int32_t r0) +{ + extr_uc(_jit, r0, jit_gpr_regno(_R0)); +} + +static void +retval_s(jit_state_t *_jit, int32_t r0) +{ + extr_s(_jit, r0, jit_gpr_regno(_R0)); +} + +static void +retval_us(jit_state_t *_jit, int32_t r0) +{ + extr_us(_jit, r0, jit_gpr_regno(_R0)); +} + +static void +retval_i(jit_state_t *_jit, int32_t r0) +{ + movr(_jit, r0, jit_gpr_regno(_R0)); +} + +static uint32_t* +jmp_without_veneer(jit_state_t *_jit) +{ + uint32_t *loc = _jit->pc.ui; + emit_u16(_jit, 0); + emit_u16(_jit, 0); + return loc; +} + +static void +patch_jmp_without_veneer(jit_state_t *_jit, uint32_t *loc) +{ + uint8_t *pc_base = ((uint8_t *)loc) + 4; + int32_t off = (uint8_t*)jit_address(_jit) - pc_base; + write_wide_thumb(loc, THUMB2_B | encode_thumb_jump(off)); +} + +struct veneer +{ + uint16_t ldr; + uint16_t br; + uint32_t addr; +}; + +static void +patch_veneer(uint32_t *loc, jit_pointer_t addr) +{ + struct veneer *v = (struct veneer*) loc; + v->addr = (uintptr_t) addr; +} + +static void +emit_veneer(jit_state_t *_jit, jit_pointer_t target) +{ + uint16_t thumb1_ldr = 0x4800; + int32_t tmp = jit_gpr_regno(JIT_TMP1); + ASSERT(tmp < 8); + // Loaded addr is 4 bytes after the LDR, which is aligned, so offset is 0. + emit_u16(_jit, thumb1_ldr | (tmp << 8)); + emit_u16(_jit, THUMB_BX|(_u4(tmp)<<3)); + emit_u32(_jit, (uint32_t) target); +} + +static void +ldr_atomic(jit_state_t *_jit, int32_t dst, int32_t loc) +{ + T1_DMB(_jit, DMB_ISH); + ldr_i(_jit, dst, loc); + T1_DMB(_jit, DMB_ISH); +} + +static void +str_atomic(jit_state_t *_jit, int32_t loc, int32_t val) +{ + T1_DMB(_jit, DMB_ISH); + str_i(_jit, loc, val); + T1_DMB(_jit, DMB_ISH); +} + +static void +swap_atomic(jit_state_t *_jit, int32_t dst, int32_t loc, int32_t val) +{ + int32_t result = jit_gpr_regno(get_temp_gpr(_jit)); + int32_t dst_or_tmp; + if (dst == val || dst == loc) + dst_or_tmp = jit_gpr_regno(get_temp_gpr(_jit)); + else + dst_or_tmp = dst; + + T1_DMB(_jit, DMB_ISH); + void *retry = jit_address(_jit); + T1_LDREX(_jit, dst_or_tmp, loc, 0); + T1_STREX(_jit, result, val, loc, 0); + jit_patch_there(_jit, bnei(_jit, result, 0), retry); + T1_DMB(_jit, DMB_ISH); + movr(_jit, dst, dst_or_tmp); + + if (dst == val || dst == loc) unget_temp_gpr(_jit); + unget_temp_gpr(_jit); +} + +static void +cas_atomic(jit_state_t *_jit, int32_t dst, int32_t loc, int32_t expected, + int32_t desired) +{ + int32_t dst_or_tmp; + if (dst == loc || dst == expected || dst == desired) + dst_or_tmp = jit_gpr_regno(get_temp_gpr(_jit)); + else + dst_or_tmp = dst; + T1_DMB(_jit, DMB_ISH); + void *retry = jit_address(_jit); + T1_LDREX(_jit, dst_or_tmp, loc, 0); + jit_reloc_t bad = bner(_jit, dst_or_tmp, expected); + int result = jit_gpr_regno(get_temp_gpr(_jit)); + T1_STREX(_jit, result, desired, loc, 0); + jit_patch_there(_jit, bnei(_jit, result, 0), retry); + unget_temp_gpr(_jit); + jit_patch_here(_jit, bad); + T1_DMB(_jit, DMB_ISH); + movr(_jit, dst, dst_or_tmp); + + if (dst == loc || dst == expected || dst == desired) + unget_temp_gpr(_jit); +} + +static void +breakpoint(jit_state_t *_jit) +{ + T1_BRK(_jit); +} diff --git a/deps/lightening/lightening/arm-vfp.c b/deps/lightening/lightening/arm-vfp.c new file mode 100644 index 0000000..208edc3 --- /dev/null +++ b/deps/lightening/lightening/arm-vfp.c @@ -0,0 +1,1168 @@ +/* + * Copyright (C) 2012-2017, 2019 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#define ARM_V_F64 0x00000100 +#define ARM_VADD_F 0x0e300a00 +#define ARM_VSUB_F 0x0e300a40 +#define ARM_VMUL_F 0x0e200a00 +#define ARM_VDIV_F 0x0e800a00 +#define ARM_VABS_F 0x0eb00ac0 +#define ARM_VNEG_F 0x0eb10a40 +#define ARM_VSQRT_F 0x0eb10ac0 +#define ARM_VMOV_F 0x0eb00a40 +#define ARM_VMOV_A_S 0x0e100a10 /* vmov rn, sn */ +#define ARM_VMOV_S_A 0x0e000a10 /* vmov sn, rn */ +#define ARM_VMOV_D_AA 0x0c400b10 /* vmov dn, rn,rn */ +#define ARM_VCMP 0x0eb40a40 +#define ARM_VMRS 0x0ef10a10 +#define ARM_VCVT_2I 0x00040000 /* to integer */ +#define ARM_VCVT_2S 0x00010000 /* to signed */ +#define ARM_VCVT_RS 0x00000080 /* round to zero or signed */ +#define ARM_VCVT 0x0eb80a40 +#define ARM_VCVT_S32_F32 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS +#define ARM_VCVT_S32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS|ARM_V_F64 +#define ARM_VCVT_F32_S32 ARM_VCVT|ARM_VCVT_RS +#define ARM_VCVT_F64_S32 ARM_VCVT|ARM_VCVT_RS|ARM_V_F64 +#define ARM_VCVT_F 0x0eb70ac0 +#define ARM_VCVT_F32_F64 ARM_VCVT_F +#define ARM_VCVT_F64_F32 ARM_VCVT_F|ARM_V_F64 +#define ARM_P 0x00800000 /* positive offset */ +#define ARM_V_D 0x00400000 +#define ARM_V_N 0x00000080 +#define ARM_V_M 0x00000020 +#define ARM_V_I32 0x00200000 +#define ARM_VMOVI 0x02800010 +#define ARM_VMVNI 0x02800030 +#define ARM_VLDR 0x0d100a00 +#define ARM_VSTR 0x0d000a00 +#define ARM_VM 0x0c000a00 +#define ARM_VMOV_A_D 0x0e100b10 +#define ARM_VMOV_D_A 0x0e000b10 + +#define vfp_regno(rn) ((rn) >> 1) + +static void +vodi(jit_state_t *_jit, int oi, int r0) +{ + ASSERT(!(oi & 0x0000f000)); + ASSERT(!(r0 & 1)); + r0 >>= 1; + emit_wide_thumb(_jit, oi|(_u4(r0)<<12)); +} + +static void +vo_ss(jit_state_t *_jit, int o, int r0, int r1) +{ + ASSERT(!(o & 0xf000f00f)); + if (r0 & 1) o |= ARM_V_D; + if (r1 & 1) o |= ARM_V_M; + r0 >>= 1; r1 >>= 1; + emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r0)<<12)|_u4(r1)); +} + +static void +vo_dd(jit_state_t *_jit, int o, int r0, int r1) +{ + ASSERT(!(o & 0xf000f00f)); + ASSERT(!(r0 & 1) && !(r1 & 1)); + r0 >>= 1; r1 >>= 1; + emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r0)<<12)|_u4(r1)); +} + +static void +vors_(jit_state_t *_jit, int o, int r0, int r1) +{ + ASSERT(!(o & 0xf000f00f)); + if (r1 & 1) o |= ARM_V_N; + r1 >>= 1; + emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)); +} + +static void +vori_(jit_state_t *_jit, int o, int r0, int r1) +{ + ASSERT(!(o & 0xf000f00f)); + /* use same bit pattern, to set opc1... */ + if (r1 & 1) o |= ARM_V_I32; + r1 >>= 1; + emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)); +} + +static void +vorrd(jit_state_t *_jit, int o, int r0, int r1, int r2) +{ + ASSERT(!(o & 0xf00ff00f)); + ASSERT(!(r2 & 1)); + r2 >>= 1; + emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2)); +} + +static void +vosss(jit_state_t *_jit, int o, int r0, int r1, int r2) +{ + ASSERT(!(o & 0xf00ff00f)); + if (r0 & 1) o |= ARM_V_D; + if (r1 & 1) o |= ARM_V_N; + if (r2 & 1) o |= ARM_V_M; + r0 >>= 1; r1 >>= 1; r2 >>= 1; + emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2)); +} + +static void +voddd(jit_state_t *_jit, int o, int r0, int r1, int r2) +{ + ASSERT(!(o & 0xf00ff00f)); + ASSERT(!(r0 & 1) && !(r1 & 1) && !(r2 & 1)); + r0 >>= 1; r1 >>= 1; r2 >>= 1; + emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2)); +} + +static void +vldst(jit_state_t *_jit, int o, int r0, int r1, int i0) +{ + /* i0 << 2 is byte offset */ + ASSERT(!(o & 0xf00ff0ff)); + if (r0 & 1) { + ASSERT(!(o & ARM_V_F64)); + o |= ARM_V_D; + } + r0 >>= 1; + emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u8(i0)); +} + +static void +VADD_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + vosss(_jit,ARM_VADD_F,r0,r1,r2); +} + +static void +VADD_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + voddd(_jit,ARM_VADD_F|ARM_V_F64,r0,r1,r2); +} + +static void +VSUB_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + vosss(_jit,ARM_VSUB_F,r0,r1,r2); +} + +static void +VSUB_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + voddd(_jit,ARM_VSUB_F|ARM_V_F64,r0,r1,r2); +} + +static void +VMUL_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + vosss(_jit,ARM_VMUL_F,r0,r1,r2); +} + +static void +VMUL_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + voddd(_jit,ARM_VMUL_F|ARM_V_F64,r0,r1,r2); +} + +static void +VDIV_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + vosss(_jit,ARM_VDIV_F,r0,r1,r2); +} + +static void +VDIV_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + voddd(_jit,ARM_VDIV_F|ARM_V_F64,r0,r1,r2); +} + +static void +VABS_F32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VABS_F,r0,r1); +} + +static void +VABS_F64(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_dd(_jit,ARM_VABS_F|ARM_V_F64,r0,r1); +} + +static void +VNEG_F32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VNEG_F,r0,r1); +} + +static void +VNEG_F64(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_dd(_jit,ARM_VNEG_F|ARM_V_F64,r0,r1); +} + +static void +VSQRT_F32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VSQRT_F,r0,r1); +} + +static void +VSQRT_F64(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_dd(_jit,ARM_VSQRT_F|ARM_V_F64,r0,r1); +} + +static void +VMOV_F32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VMOV_F,r0,r1); +} + +static void +VMOV_F64(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_dd(_jit,ARM_VMOV_F|ARM_V_F64,r0,r1); +} + +static void +VMOV_D_AA(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + vorrd(_jit,ARM_VMOV_D_AA,r1,r2,r0); +} + +static void +VMOV_S_A(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vors_(_jit,ARM_VMOV_S_A,r1,r0); +} + +static void +VCMP_F32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VCMP,r0,r1); +} + +static void +VCMP_F64(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_dd(_jit,ARM_VCMP|ARM_V_F64,r0,r1); +} + +static void +VMRS(jit_state_t *_jit) +{ + emit_wide_thumb(_jit, ARM_CC_AL|ARM_VMRS|(0xf<<12)); +} + +static void +VCVT_S32_F32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VCVT_S32_F32,r0,r1); +} + +static void +VCVT_S32_F64(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VCVT_S32_F64,r0,r1); +} + +static void +VCVT_F32_S32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VCVT_F32_S32,r0,r1); +} + +static void +VCVT_F64_S32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VCVT_F64_S32,r0,r1); +} + +static void +VCVT_F32_F64(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VCVT_F32_F64,r0,r1); +} + +static void +VCVT_F64_F32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vo_ss(_jit,ARM_VCVT_F64_F32,r0,r1); +} + +static void +VMOV_A_S32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vori_(_jit,ARM_VMOV_A_D,r0,r1); +} + +static void +VMOV_V_I32(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + vori_(_jit,ARM_VMOV_D_A,r1,r0); +} + +/* "oi" should be the result of encode_vfp_double */ +static void +VIMM(jit_state_t *_jit, int32_t oi, int32_t r0) +{ + vodi(_jit, oi,r0); +} + +/* index is multipled by four */ +static void +VLDRN_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + vldst(_jit,ARM_VLDR,r0,r1,i0); +} + +static void +VLDR_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + vldst(_jit,ARM_VLDR|ARM_P,r0,r1,i0); +} + +static void +VLDRN_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + vldst(_jit,ARM_VLDR|ARM_V_F64,r0,r1,i0); +} + +static void +VLDR_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + vldst(_jit,ARM_VLDR|ARM_V_F64|ARM_P,r0,r1,i0); +} + +static void +VSTRN_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + vldst(_jit,ARM_VSTR,r0,r1,i0); +} + +static void +VSTR_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + vldst(_jit,ARM_VSTR|ARM_P,r0,r1,i0); +} + +static void +VSTRN_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + vldst(_jit,ARM_VSTR|ARM_V_F64,r0,r1,i0); +} + +static void +VSTR_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + vldst(_jit,ARM_VSTR|ARM_V_F64|ARM_P,r0,r1,i0); +} + +static void +absr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VABS_F32(_jit, r0,r1); +} + +static void +absr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VABS_F64(_jit, r0,r1); +} + +static void +negr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VNEG_F32(_jit, r0,r1); +} + +static void +negr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VNEG_F64(_jit, r0,r1); +} + +static void +sqrtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VSQRT_F32(_jit, r0,r1); +} + +static void +sqrtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VSQRT_F64(_jit, r0,r1); +} + +static void +addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + VADD_F32(_jit, r0,r1,r2); +} + +static void +addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + VADD_F64(_jit, r0,r1,r2); +} + +static void +subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + VSUB_F32(_jit, r0,r1,r2); +} + +static void +subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + VSUB_F64(_jit, r0,r1,r2); +} + +static void +mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + VMUL_F32(_jit, r0,r1,r2); +} + +static void +mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + VMUL_F64(_jit, r0,r1,r2); +} + +static void +divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + VDIV_F32(_jit, r0,r1,r2); +} + +static void +divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + VDIV_F64(_jit, r0,r1,r2); +} + +static void +cmp_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VCMP_F32(_jit, r0, r1); +} + +static void +cmp_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VCMP_F64(_jit, r0, r1); +} + +static jit_reloc_t +vbcmp_x(jit_state_t *_jit, int cc) +{ + VMRS(_jit); + return T2_CC_B(_jit, cc); +} + +static jit_reloc_t +vbcmp_f(jit_state_t *_jit, int cc, int32_t r0, int32_t r1) +{ + cmp_f(_jit, r0, r1); + return vbcmp_x(_jit, cc); +} + +static jit_reloc_t +vbcmp_d(jit_state_t *_jit, int cc, int32_t r0, int32_t r1) +{ + cmp_d(_jit, r0, r1); + return vbcmp_x(_jit, cc); +} + +static jit_reloc_t +vbncmp_x(jit_state_t *_jit, int cc) +{ + VMRS(_jit); + jit_reloc_t cont = T2_CC_B(_jit, cc); + jit_reloc_t ret = T2_B(_jit); + jit_patch_here(_jit, cont); + return ret; +} + +static jit_reloc_t +vbncmp_f(jit_state_t *_jit, int cc, int32_t r0, int32_t r1) +{ + cmp_f(_jit, r0, r1); + return vbncmp_x(_jit, cc); +} + +static jit_reloc_t +vbncmp_d(jit_state_t *_jit, int cc, int32_t r0, int32_t r1) +{ + cmp_d(_jit, r0, r1); + return vbncmp_x(_jit, cc); +} + +static jit_reloc_t +bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_f(_jit, ARM_CC_MI, r0, r1); +} + +static jit_reloc_t +bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_d(_jit, ARM_CC_MI, r0, r1); +} + +static jit_reloc_t +bler_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_f(_jit, ARM_CC_LS, r0, r1); +} + +static jit_reloc_t +bler_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_d(_jit, ARM_CC_LS, r0, r1); +} + +static jit_reloc_t +beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_f(_jit, ARM_CC_EQ, r0, r1); +} + +static jit_reloc_t +beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_d(_jit, ARM_CC_EQ, r0, r1); +} + +static jit_reloc_t +bger_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_f(_jit, ARM_CC_GE, r0, r1); +} + +static jit_reloc_t +bger_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_d(_jit, ARM_CC_GE, r0, r1); +} + +static jit_reloc_t +bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_f(_jit, ARM_CC_GT, r0, r1); +} + +static jit_reloc_t +bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_d(_jit, ARM_CC_GT, r0, r1); +} + +static jit_reloc_t +bner_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_f(_jit, ARM_CC_NE, r0, r1); +} + +static jit_reloc_t +bner_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_d(_jit, ARM_CC_NE, r0, r1); +} + +static jit_reloc_t +bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbncmp_f(_jit, ARM_CC_GE, r0, r1); +} + +static jit_reloc_t +bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbncmp_d(_jit, ARM_CC_GE, r0, r1); +} + +static jit_reloc_t +bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbncmp_f(_jit, ARM_CC_GT, r0, r1); +} + +static jit_reloc_t +bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbncmp_d(_jit, ARM_CC_GT, r0, r1); +} + +static jit_reloc_t +bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_f(_jit, ARM_CC_HI, r0, r1); +} + +static jit_reloc_t +bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_d(_jit, ARM_CC_HI, r0, r1); +} + +static jit_reloc_t +bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_f(_jit, ARM_CC_VC, r0, r1); +} + +static jit_reloc_t +bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_d(_jit, ARM_CC_VC, r0, r1); +} + +static jit_reloc_t +bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_f(_jit, ARM_CC_VS, r0, r1); +} + +static jit_reloc_t +bunordr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return vbcmp_d(_jit, ARM_CC_VS, r0, r1); +} + +static jit_reloc_t +buneqr_x(jit_state_t *_jit) +{ + VMRS(_jit); + jit_reloc_t a = T2_CC_B(_jit, ARM_CC_VS); + jit_reloc_t b = T2_CC_B(_jit, ARM_CC_NE); + jit_patch_here(_jit, a); + jit_reloc_t ret = T2_B(_jit); + jit_patch_here(_jit, b); + return ret; +} + +static jit_reloc_t +buneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + cmp_f(_jit, r0, r1); + return buneqr_x(_jit); +} + +static jit_reloc_t +buneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + cmp_d(_jit, r0, r1); + return buneqr_x(_jit); +} + +static jit_reloc_t +bunger_x(jit_state_t *_jit) +{ + VMRS(_jit); + jit_reloc_t a = T2_CC_B(_jit, ARM_CC_MI); + jit_reloc_t ret = T2_CC_B(_jit, ARM_CC_HS); + jit_patch_here(_jit, a); + return ret; +} + +static jit_reloc_t +bunger_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + cmp_f(_jit, r0, r1); + return bunger_x(_jit); +} + +static jit_reloc_t +bunger_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + cmp_d(_jit, r0, r1); + return bunger_x(_jit); +} + +static jit_reloc_t +bltgtr_x(jit_state_t *_jit) +{ + VMRS(_jit); + jit_reloc_t a = T2_CC_B(_jit, ARM_CC_VS); + jit_reloc_t b = T2_CC_B(_jit, ARM_CC_EQ); + jit_reloc_t ret = T2_B(_jit); + jit_patch_here(_jit, a); + jit_patch_here(_jit, b); + return ret; +} + +static jit_reloc_t +bltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + cmp_f(_jit, r0, r1); + return bltgtr_x(_jit); +} + +static jit_reloc_t +bltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + cmp_d(_jit, r0, r1); + return bltgtr_x(_jit); +} + +static void +ldr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VLDR_F32(_jit, r0,r1,0); +} + +static void +ldr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VLDR_F64(_jit, r0,r1,0); +} + +static void +str_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VSTR_F32(_jit, r1,r0,0); +} + +static void +str_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VSTR_F64(_jit, r1,r0,0); +} + +static void +movr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 != r1) + VMOV_F32(_jit, r0, r1); +} + +static void +movr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 != r1) + VMOV_F64(_jit, r0, r1); +} + +static int +encode_vfp_double(int mov, int inv, unsigned lo, unsigned hi) +{ + int code, mode, imm, mask; + + if (hi != lo) { + if (mov && !inv) { + /* (I64) + * aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh + */ + for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) { + imm = lo & mask; + if (imm != mask && imm != 0) + goto fail; + imm = hi & mask; + if (imm != mask && imm != 0) + goto fail; + } + mode = 0xe20; + imm = (((hi & 0x80000000) >> 24) | ((hi & 0x00800000) >> 17) | + ((hi & 0x00008000) >> 10) | ((hi & 0x00000080) >> 3) | + ((lo & 0x80000000) >> 28) | ((lo & 0x00800000) >> 21) | + ((lo & 0x00008000) >> 14) | ((lo & 0x00000080) >> 7)); + goto success; + } + goto fail; + } + /* (I32) + * 00000000 00000000 00000000 abcdefgh + * 00000000 00000000 abcdefgh 00000000 + * 00000000 abcdefgh 00000000 00000000 + * abcdefgh 00000000 00000000 00000000 */ + for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) { + if ((lo & mask) == lo) { + imm = lo >> (mode << 3); + mode <<= 9; + goto success; + } + } + /* (I16) + * 00000000 abcdefgh 00000000 abcdefgh + * abcdefgh 00000000 abcdefgh 00000000 */ + for (mode = 0, mask = 0xff; mode < 2; mask <<= 8, mode++) { + if ((lo & mask) && ((lo & (mask << 16)) >> 16) == (lo & mask)) { + imm = lo >> (mode << 3); + mode = 0x800 | (mode << 9); + goto success; + } + } + if (mov) { + /* (I32) + * 00000000 00000000 abcdefgh 11111111 + * 00000000 abcdefgh 11111111 11111111 */ + for (mode = 0, mask = 0xff; mode < 2; + mask = (mask << 8) | 0xff, mode++) { + if ((lo & mask) == mask && + !((lo & ~mask) >> 8) && + (imm = lo >> (8 + (mode << 8)))) { + mode = 0xc00 | (mode << 8); + goto success; + } + } + if (!inv) { + /* (F32) + * aBbbbbbc defgh000 00000000 00000000 + * from the ARM Architecture Reference Manual: + * In this entry, B = NOT(b). The bit pattern represents the + * floating-point number (-1)^s* 2^exp * mantissa, where + * S = UInt(a), + * exp = UInt(NOT(b):c:d)-3 and + * mantissa = (16+UInt(e:f:g:h))/16. */ + if ((lo & 0x7ffff) == 0 && + (((lo & 0x7e000000) == 0x3e000000) || + ((lo & 0x7e000000) == 0x40000000))) { + mode = 0xf00; + imm = ((lo >> 24) & 0x80) | ((lo >> 19) & 0x7f); + goto success; + } + } + } + +fail: + /* need another approach (load from memory, move from arm register, etc) */ + return -1; + +success: + code = inv ? ARM_VMVNI : ARM_VMOVI; + switch ((mode & 0xf00) >> 8) { + case 0x0: case 0x2: case 0x4: case 0x6: + case 0x8: case 0xa: + if (inv) mode |= 0x20; + if (!mov) mode |= 0x100; + break; + case 0x1: case 0x3: case 0x5: case 0x7: + /* should actually not reach here */ + ASSERT(!inv); + case 0x9: case 0xb: + ASSERT(!mov); + break; + case 0xc: case 0xd: + /* should actually not reach here */ + ASSERT(inv); + case 0xe: + ASSERT(mode & 0x20); + ASSERT(mov && !inv); + break; + default: + ASSERT(!(mode & 0x20)); + break; + } + imm = ((imm & 0x80) << 17) | ((imm & 0x70) << 12) | (imm & 0x0f); + code |= mode | imm; + + if (code & 0x1000000) + code |= 0xff000000; + else + code |= 0xef000000; + + return code; +} + +static void +movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t i0) +{ + union { int32_t i; jit_float32_t f; } u = { .f = i0 }; + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), u.i); + VMOV_S_A(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t i0) +{ + union { int32_t i[2]; jit_float64_t d; } u = { .d = i0 }; + int32_t code; + if ((code = encode_vfp_double(1, 0, u.i[0], u.i[1])) != -1 || + (code = encode_vfp_double(1, 1, ~u.i[0], ~u.i[1])) != -1) + VIMM(_jit, code, r0); + else { + jit_gpr_t rg0 = get_temp_gpr(_jit); + jit_gpr_t rg1 = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(rg0), u.i[0]); + movi(_jit, jit_gpr_regno(rg1), u.i[1]); + VMOV_D_AA(_jit, r0, jit_gpr_regno(rg0), jit_gpr_regno(rg1)); + unget_temp_gpr(_jit); + unget_temp_gpr(_jit); + } +} + +static void +extr_d_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VCVT_F64_F32(_jit, r0, r1); +} + +static void +extr_f_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VCVT_F32_F64(_jit, r0, r1); +} + +static void +extr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VMOV_V_I32(_jit, r0, r1); + VCVT_F32_S32(_jit, r0, r0); +} + +static void +extr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VMOV_V_I32(_jit, r0, r1); + VCVT_F64_S32(_jit, r0, r0); +} + +static void +truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + jit_fpr_t reg = get_temp_fpr(_jit); + VCVT_S32_F32(_jit, jit_fpr_regno(reg), r1); + VMOV_A_S32(_jit, r0, jit_fpr_regno(reg)); + unget_temp_fpr(_jit); +} + +static void +truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + jit_fpr_t reg = get_temp_fpr(_jit); + VCVT_S32_F64(_jit, jit_fpr_regno(reg), r1); + VMOV_A_S32(_jit, r0, jit_fpr_regno(reg)); + unget_temp_fpr(_jit); +} + +static void +ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t gpr = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(gpr), i0); + VLDR_F32(_jit, r0, jit_gpr_regno(gpr), 0); + unget_temp_gpr(_jit); +} + +static void +ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + addr(_jit, jit_gpr_regno(reg), r1, r2); + VLDR_F32(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + addr(_jit, jit_gpr_regno(reg), r1, r2); + VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 >= 0) { + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VLDR_F32(_jit, r0, r1, i0 >> 2); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + addi(_jit, jit_gpr_regno(reg), r1, i0); + VLDR_F32(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); + } + } + else { + i0 = -i0; + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VLDRN_F32(_jit, r0, r1, i0 >> 2); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + subi(_jit, jit_gpr_regno(reg), r1, i0); + VLDR_F32(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); + } + } +} + +static void +ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 >= 0) { + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VLDR_F64(_jit, r0, r1, i0 >> 2); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + addi(_jit, jit_gpr_regno(reg), r1, i0); + VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); + } + } + else { + i0 = -i0; + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VLDRN_F64(_jit, r0, r1, i0 >> 2); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + subi(_jit, jit_gpr_regno(reg), r1, i0); + VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); + } + } +} + +static void +sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + VSTR_F32(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + VSTR_F64(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + addr(_jit, jit_gpr_regno(reg), r0, r1); + VSTR_F32(_jit, r2, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + addr(_jit, jit_gpr_regno(reg), r0, r1); + VSTR_F64(_jit, r2, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (i0 >= 0) { + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VSTR_F32(_jit, r1, r0, i0 >> 2); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + addi(_jit, jit_gpr_regno(reg), r0, i0); + VSTR_F32(_jit, r1, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); + } + } + else { + i0 = -i0; + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VSTRN_F32(_jit, r1, r0, i0 >> 2); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + subi(_jit, jit_gpr_regno(reg), r0, i0); + VSTR_F32(_jit, r1, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); + } + } +} + +static void +stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (i0 >= 0) { + ASSERT(!(i0 & 3)); + if (i0 < 0124) + VSTR_F64(_jit, r1, r0, i0 >> 2); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + addi(_jit, jit_gpr_regno(reg), r0, i0); + VSTR_F64(_jit, r1, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); + } + } + else { + i0 = -i0; + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VSTRN_F64(_jit, r1, r0, i0 >> 2); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + subi(_jit, jit_gpr_regno(reg), r0, i0); + VSTR_F64(_jit, r1, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); + } + } +} + +static void +retr_d(jit_state_t *_jit, int32_t r) +{ + movr_d(_jit, jit_fpr_regno(_D0), r); + ret(_jit); +} + +static void +retr_f(jit_state_t *_jit, int32_t r) +{ + movr_f(_jit, jit_fpr_regno(_S0), r); + ret(_jit); +} + +static void +retval_f(jit_state_t *_jit, int32_t r0) +{ + movr_f(_jit, r0, jit_fpr_regno(_S0)); +} + +static void +retval_d(jit_state_t *_jit, int32_t r0) +{ + movr_d(_jit, r0, jit_fpr_regno(_D0)); +} diff --git a/deps/lightening/lightening/arm.c b/deps/lightening/lightening/arm.c new file mode 100644 index 0000000..f44f04d --- /dev/null +++ b/deps/lightening/lightening/arm.c @@ -0,0 +1,148 @@ +/* + * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +/* + * Types + */ +typedef union _jit_thumb_t { + int32_t i; + int16_t s[2]; +} jit_thumb_t; + +/* libgcc */ +extern void __clear_cache(void *, void *); + +#include "arm-cpu.c" +#include "arm-vfp.c" + +static const jit_gpr_t abi_gpr_args[] = { + _R0, _R1, _R2, _R3 +}; +static const int abi_gpr_arg_count = sizeof(abi_gpr_args) / sizeof(abi_gpr_args[0]); + +struct abi_arg_iterator +{ + const jit_operand_t *args; + size_t argc; + + size_t arg_idx; + size_t gpr_idx; + uint32_t vfp_used_registers; + size_t stack_size; + size_t stack_padding; +}; + +static size_t page_size; + +jit_bool_t +jit_get_cpu(void) +{ + page_size = sysconf(_SC_PAGE_SIZE); + // FIXME check version, thumb, hardware fp support + return 1; +} + +jit_bool_t +jit_init(jit_state_t *_jit) +{ + return 1; +} + +static size_t +jit_initial_frame_size (void) +{ + return 0; +} + +static void +reset_abi_arg_iterator(struct abi_arg_iterator *iter, size_t argc, + const jit_operand_t *args) +{ + memset(iter, 0, sizeof *iter); + iter->argc = argc; + iter->args = args; +} + +static void +next_abi_arg(struct abi_arg_iterator *iter, jit_operand_t *arg) +{ + ASSERT(iter->arg_idx < iter->argc); + enum jit_operand_abi abi = iter->args[iter->arg_idx].abi; + iter->arg_idx++; + if (is_gpr_arg(abi) && iter->gpr_idx < abi_gpr_arg_count) { + *arg = jit_operand_gpr (abi, abi_gpr_args[iter->gpr_idx++]); + return; + } + if (is_fpr_arg(abi)) { + // The ARM VFP ABI passes floating-point arguments in d0-d7 + // (s0-s15), and allows for "back-filling". Say you have a + // function: + // + // void f(float a, double b, float c); + // + // A gets allocated to s0, then b to d1 (which aliases s2+s3), then + // c to s1. + uint32_t width = abi == JIT_OPERAND_ABI_FLOAT ? 1 : 2; + uint32_t mask = (1 << width) - 1; + for (size_t i = 0; i < 16; i += width) { + if ((iter->vfp_used_registers & (mask << i)) == 0) { + iter->vfp_used_registers |= (mask << i); + *arg = jit_operand_fpr (abi, JIT_FPR(i)); + return; + } + } + } + + // doubles passed on the stack need to be aligned up to the next 8 byte boundary + if (abi == JIT_OPERAND_ABI_DOUBLE) + iter->stack_size = jit_align_up(iter->stack_size, 8); + + *arg = jit_operand_mem (abi, JIT_SP, iter->stack_size); + + if (abi == JIT_OPERAND_ABI_DOUBLE) + iter->stack_size += 8; + else + iter->stack_size += 4; +} + +static void +jit_flush(void *fptr, void *tptr) +{ + jit_word_t f = (jit_word_t)fptr & -page_size; + jit_word_t t = (((jit_word_t)tptr) + page_size - 1) & -page_size; + __clear_cache((void *)f, (void *)t); +} + +static inline size_t +jit_stack_alignment(void) +{ + return 8; +} + +static void +jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc, jit_pointer_t addr) +{ +} + +static void* +bless_function_pointer(void *ptr) +{ + // Set low bit to mark as thumb mode. + return (void*) (((uintptr_t)ptr) | 1); +} diff --git a/deps/lightening/lightening/arm.h b/deps/lightening/lightening/arm.h new file mode 100644 index 0000000..6131330 --- /dev/null +++ b/deps/lightening/lightening/arm.h @@ -0,0 +1,135 @@ +/* + * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#ifndef _jit_arm_h +#define _jit_arm_h + + +#define JIT_NEEDS_LITERAL_POOL 1 + +#define _R0 JIT_GPR(0) +#define _R1 JIT_GPR(1) +#define _R2 JIT_GPR(2) +#define _R3 JIT_GPR(3) +#define _R4 JIT_GPR(4) +#define _R5 JIT_GPR(5) +#define _R6 JIT_GPR(6) +#define _R7 JIT_GPR(7) +#define _R8 JIT_GPR(8) +#define _R9 JIT_GPR(9) +#define _R10 JIT_GPR(10) +#define _R11 JIT_GPR(11) +#define _R12 JIT_GPR(12) +#define _R13 JIT_GPR(13) +#define _R14 JIT_GPR(14) +#define _R15 JIT_GPR(15) + +#define _D0 JIT_FPR(0) +#define _D1 JIT_FPR(2) +#define _D2 JIT_FPR(4) +#define _D3 JIT_FPR(6) +#define _D4 JIT_FPR(8) +#define _D5 JIT_FPR(10) +#define _D6 JIT_FPR(12) +#define _D7 JIT_FPR(14) +#define _D8 JIT_FPR(16) +#define _D9 JIT_FPR(18) +#define _D10 JIT_FPR(20) +#define _D11 JIT_FPR(22) +#define _D12 JIT_FPR(24) +#define _D13 JIT_FPR(26) +#define _D14 JIT_FPR(28) +#define _D15 JIT_FPR(30) + +#define _S0 JIT_FPR(0) +#define _S1 JIT_FPR(1) +#define _S2 JIT_FPR(2) +#define _S3 JIT_FPR(3) +#define _S4 JIT_FPR(4) +#define _S5 JIT_FPR(5) +#define _S6 JIT_FPR(6) +#define _S7 JIT_FPR(7) +#define _S8 JIT_FPR(8) +#define _S9 JIT_FPR(9) +#define _S10 JIT_FPR(10) +#define _S11 JIT_FPR(11) +#define _S12 JIT_FPR(12) +#define _S13 JIT_FPR(13) +#define _S14 JIT_FPR(14) +#define _S15 JIT_FPR(15) +#define _S16 JIT_FPR(16) +#define _S17 JIT_FPR(17) +#define _S18 JIT_FPR(18) +#define _S19 JIT_FPR(19) +#define _S20 JIT_FPR(20) +#define _S21 JIT_FPR(21) +#define _S22 JIT_FPR(22) +#define _S23 JIT_FPR(23) +#define _S24 JIT_FPR(24) +#define _S25 JIT_FPR(25) +#define _S26 JIT_FPR(26) +#define _S27 JIT_FPR(27) +#define _S28 JIT_FPR(28) +#define _S29 JIT_FPR(29) +#define _S30 JIT_FPR(30) +#define _S31 JIT_FPR(31) + +#define JIT_R0 _R0 +#define JIT_R1 _R1 +#define JIT_R2 _R2 +#define JIT_R3 _R3 +#define JIT_TMP0 _R12 + +#define JIT_V0 _R4 +#define JIT_V1 _R5 +#define JIT_V2 _R6 +#define JIT_TMP1 _R7 +#define JIT_V3 _R8 +#define JIT_V4 _R9 +#define JIT_V5 _R10 +#define JIT_V6 _R11 + +#define JIT_LR _R14 +#define JIT_SP _R13 +#define _LR _R14 +#define _PC _R15 + +#define JIT_F0 _D0 +#define JIT_F1 _D1 +#define JIT_F2 _D2 +#define JIT_F3 _D3 +#define JIT_F4 _D4 +#define JIT_F5 _D5 +#define JIT_F6 _D6 +#define JIT_F7 _D7 + +#define JIT_VF0 _D8 +#define JIT_VF1 _D9 +#define JIT_VF2 _D10 +#define JIT_VF3 _D11 +#define JIT_VF4 _D12 +#define JIT_VF5 _D13 +#define JIT_VF6 _D14 +#define JIT_FTMP _D15 + +#define JIT_PLATFORM_CALLEE_SAVE_GPRS _LR, JIT_TMP1 +#define JIT_PLATFORM_CALLEE_SAVE_FPRS JIT_FTMP + + +#endif /* _jit_arm_h */ diff --git a/deps/lightening/lightening/endian.h b/deps/lightening/lightening/endian.h new file mode 100644 index 0000000..3b34a15 --- /dev/null +++ b/deps/lightening/lightening/endian.h @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + * Andy Wingo + */ + +#ifndef _jit_endian_h +#define _jit_endian_h + +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <stddef.h> + +#ifndef __WORDSIZE +# if defined(WORDSIZE) /* ppc darwin */ +# define __WORDSIZE WORDSIZE +# elif defined(__SIZEOF_POINTER__) /* ppc aix */ +# define __WORDSIZE (__SIZEOF_POINTER__ << 3) +# elif defined(_MIPS_SZPTR) /* mips irix */ +# if _MIPS_SZPTR == 32 +# define __WORDSIZE 32 +# else +# define __WORDSIZE 64 +# endif +# else /* From FreeBSD 9.1 stdint.h */ +# if defined(UINTPTR_MAX) && defined(UINT64_MAX) && \ + (UINTPTR_MAX == UINT64_MAX) +# define __WORDSIZE 64 +# else +# define __WORDSIZE 32 +# endif +# endif +#endif +#ifndef __LITTLE_ENDIAN +# if defined(LITTLE_ENDIAN) /* ppc darwin */ +# define __LITTLE_ENDIAN LITTLE_ENDIAN +# elif defined(__ORDER_LITTLE_ENDIAN__) /* ppc aix */ +# define __LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__ +# else +# define __LITTLE_ENDIAN 1234 +# endif +#endif +#ifndef __BIG_ENDIAN +# if defined(BIG_ENDIAN) /* ppc darwin */ +# define __BIG_ENDIAN BIG_ENDIAN +# elif defined(__ORDER_BIG_ENDIAN__) /* ppc aix */ +# define __BIG_ENDIAN __ORDER_BIG_ENDIAN__ +# else +# define __BIG_ENDIAN 4321 +# endif +#endif +#ifndef __BYTE_ORDER +# if defined(BYTE_ORDER) /* ppc darwin */ +# define __BYTE_ORDER BYTE_ORDER +# elif defined(__BYTE_ORDER__) /* ppc aix */ +# define __BYTE_ORDER __BYTE_ORDER__ +# elif defined(__i386__) /* 32 bit x86 solaris */ +# define __BYTE_ORDER __LITTLE_ENDIAN +# elif defined(__x86_64__) /* 64 bit x86 solaris */ +# define __BYTE_ORDER __LITTLE_ENDIAN +# elif defined(__MIPSEB) /* mips irix */ +# define __BYTE_ORDER __BIG_ENDIAN +# else +# error cannot figure __BYTE_ORDER +# endif +#endif + +#if __WORDSIZE == 32 +#define CHOOSE_32_64(x, y) x +#elif __WORDSIZE == 64 +#define CHOOSE_32_64(x, y) y +#else +#error unhandled __WORDSIZE +#endif + +#define WHEN_64(x) CHOOSE_32_64(/**/, x) + + +#endif /* _jit_endian_h */ diff --git a/deps/lightening/lightening/lightening.c b/deps/lightening/lightening/lightening.c new file mode 100644 index 0000000..937fd14 --- /dev/null +++ b/deps/lightening/lightening/lightening.c @@ -0,0 +1,1762 @@ +/* Copyright (C) 2012-2020 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include <assert.h> +#include <fcntl.h> +#include <limits.h> +#include <stdio.h> +#include <sys/mman.h> + +#include "../lightening.h" + +#define ASSERT(x) do { if (!(x)) abort(); } while (0) + +#if defined(__GNUC__) +# define maybe_unused __attribute__ ((unused)) +# define UNLIKELY(exprn) __builtin_expect(exprn, 0) +#else +# define maybe_unused /**/ +# define UNLIKELY(exprn) exprn +#endif + +union jit_pc +{ + uint8_t *uc; + uint16_t *us; + uint32_t *ui; + uint64_t *ul; + intptr_t w; + uintptr_t uw; +}; + +#ifdef JIT_NEEDS_LITERAL_POOL +struct jit_literal_pool_entry +{ + jit_reloc_t reloc; + uintptr_t value; +}; + +struct jit_literal_pool +{ + uint32_t deadline; + uint32_t size; + uint32_t capacity; + struct jit_literal_pool_entry entries[]; +}; +#endif // JIT_NEEDS_LITERAL_POOL + +struct jit_state +{ + union jit_pc pc; + uint8_t *start; + uint8_t *last_instruction_start; + uint8_t *limit; + uint8_t temp_gpr_saved; + uint8_t temp_fpr_saved; + uint8_t overflow; + uint8_t emitting_data; + uint8_t preparing_call; + int frame_size; // Used to know when to align stack. +#ifdef JIT_NEEDS_LITERAL_POOL + struct jit_literal_pool *pool; +#endif + void* (*alloc)(size_t); + void (*free)(void*); +}; + +static jit_bool_t jit_get_cpu(void); +static jit_bool_t jit_init(jit_state_t *); +static void jit_flush(void *fptr, void *tptr); +static void jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc, + jit_pointer_t addr); +static void* bless_function_pointer(void *ptr); + +struct abi_arg_iterator; + +#ifdef JIT_NEEDS_LITERAL_POOL +static struct jit_literal_pool* alloc_literal_pool(jit_state_t *_jit, + size_t capacity); +static void reset_literal_pool(jit_state_t *_jit, + struct jit_literal_pool *pool); +static jit_bool_t add_pending_literal(jit_state_t *_jit, jit_reloc_t src, + uint8_t max_offset_bits); +static void remove_pending_literal(jit_state_t *_jit, jit_reloc_t src); +static void patch_pending_literal(jit_state_t *_jit, jit_reloc_t src, + uintptr_t value); +enum guard_pool { GUARD_NEEDED, NO_GUARD_NEEDED }; +static void emit_literal_pool(jit_state_t *_jit, enum guard_pool guard); + +static int32_t read_jmp_offset(uint32_t *loc); +static int offset_in_jmp_range(ptrdiff_t offset, int flags); +static void patch_jmp_offset(uint32_t *loc, ptrdiff_t offset); +static void patch_veneer_jmp_offset(uint32_t *loc, ptrdiff_t offset); +static int32_t read_jcc_offset(uint32_t *loc); +static int offset_in_jcc_range(ptrdiff_t offset, int flags); +static void patch_jcc_offset(uint32_t *loc, ptrdiff_t offset); +static void patch_veneer_jcc_offset(uint32_t *loc, ptrdiff_t offset); +static void patch_veneer(uint32_t *loc, jit_pointer_t addr); +static int32_t read_load_from_pool_offset(uint32_t *loc); +#endif + +#ifdef JIT_USE_IMMEDIATE_RELOC +static void patch_immediate_reloc(uint32_t *loc, jit_pointer_t addr); +#endif + +static jit_bool_t is_fpr_arg(enum jit_operand_abi arg); +static jit_bool_t is_gpr_arg(enum jit_operand_abi arg); +#if JIT_ASYMMETRIC_STACK +static void reset_call_arg_iterator(struct abi_arg_iterator *iter, size_t argc, + const jit_operand_t *args); + +static void reset_load_arg_iterator(struct abi_arg_iterator *iter, size_t argc, + const jit_operand_t *args); +#else +static void reset_abi_arg_iterator(struct abi_arg_iterator *iter, size_t argc, + const jit_operand_t *args); +#endif +static void next_abi_arg(struct abi_arg_iterator *iter, + jit_operand_t *arg); + +jit_bool_t +init_jit(void) +{ + return jit_get_cpu (); +} + +jit_state_t * +jit_new_state(void* (*alloc_fn)(size_t), void (*free_fn)(void*)) +{ + if (!alloc_fn) alloc_fn = malloc; + if (!free_fn) free_fn = free; + + jit_state_t *_jit = alloc_fn (sizeof (*_jit)); + if (!_jit) + abort (); + + memset(_jit, 0, sizeof (*_jit)); + _jit->alloc = alloc_fn; + _jit->free = free_fn; + + if (!jit_init (_jit)) { +#ifdef JIT_NEEDS_LITERAL_POOL + free_fn (_jit->pool); +#endif + free_fn (_jit); + return NULL; + } + +#ifdef JIT_NEEDS_LITERAL_POOL + _jit->pool = alloc_literal_pool(_jit, 0); +#endif + + return _jit; +} + +void +jit_destroy_state(jit_state_t *_jit) +{ +#ifdef JIT_NEEDS_LITERAL_POOL + _jit->free (_jit->pool); +#endif + _jit->free (_jit); +} + +jit_pointer_t +jit_address(jit_state_t *_jit) +{ + ASSERT (_jit->start); + jit_pointer_t ret = _jit->pc.uc; + return _jit->emitting_data ? ret : jit_address_to_function_pointer (ret); +} + +void +jit_begin(jit_state_t *_jit, uint8_t* buf, size_t length) +{ + ASSERT (!_jit->start); + + _jit->pc.uc = _jit->start = buf; + _jit->limit = buf + length; + _jit->overflow = 0; + _jit->frame_size = 0; + _jit->emitting_data = 0; +#if JIT_NEEDS_LITERAL_POOL + ASSERT(_jit->pool->size == 0); + _jit->pool->deadline = length; +#endif +} + +jit_bool_t +jit_has_overflow(jit_state_t *_jit) +{ + ASSERT (_jit->start); + return _jit->overflow; +} + +void +jit_reset(jit_state_t *_jit) +{ + ASSERT (_jit); + _jit->pc.uc = _jit->start = _jit->limit = NULL; + _jit->overflow = 0; + _jit->frame_size = 0; + _jit->emitting_data = 0; +#ifdef JIT_NEEDS_LITERAL_POOL + reset_literal_pool(_jit, _jit->pool); +#endif +} + +jit_function_pointer_t +jit_address_to_function_pointer(jit_pointer_t p) +{ + return bless_function_pointer(p); +} + +void* +jit_end(jit_state_t *_jit, size_t *length) +{ +#ifdef JIT_NEEDS_LITERAL_POOL + if (_jit->pool->size) + emit_literal_pool(_jit, NO_GUARD_NEEDED); +#endif + + uint8_t *start = _jit->start; + uint8_t *end = _jit->pc.uc; + + if (length) { + *length = end - start; + } + + if (_jit->overflow) { + jit_reset(_jit); + return NULL; + } + + ASSERT(start); + ASSERT(start <= end); + ASSERT(end <= _jit->limit); + ASSERT(!_jit->emitting_data); + + jit_flush (start, end); + + + _jit->pc.uc = _jit->start = _jit->limit = NULL; + _jit->overflow = 0; + _jit->frame_size = 0; +#ifdef JIT_NEEDS_LITERAL_POOL + reset_literal_pool(_jit, _jit->pool); +#endif + + return jit_address_to_function_pointer(start); +} + +static int +is_power_of_two (unsigned x) +{ + return x && !(x & (x-1)); +} + +inline static jit_word_t +jit_align_up(jit_word_t val, jit_uword_t a) +{ + if (!a) + return val; + + jit_word_t rem = val % a; + + if (rem == 0) + return val; + + return val + a - rem; +} + +inline static jit_word_t +jit_align_down(jit_word_t val, jit_uword_t a) +{ + if (!a) + return val; + + return val - (val % a); +} + +static jit_gpr_t +get_temp_gpr(jit_state_t *_jit) +{ + switch(_jit->temp_gpr_saved++) + { + case 0: + return JIT_TMP0; +#ifdef JIT_TMP1 + case 1: + return JIT_TMP1; +#endif +#ifdef JIT_TMP2 + case 2: + return JIT_TMP2; +#endif +#ifdef JIT_TMP3 + case 3: + return JIT_TMP3; +#endif +#ifdef JIT_TMP4 + case 4: + return JIT_TMP4; +#endif +#ifdef JIT_TMP5 + case 5: + return JIT_TMP5; +#endif + default: + abort(); + } +} + +static jit_fpr_t +get_temp_fpr(jit_state_t *_jit) +{ + switch(_jit->temp_fpr_saved++) + { + case 0: + return JIT_FTMP; + default: + abort(); + } +} + +static void +unget_temp_fpr(jit_state_t *_jit) +{ + ASSERT(_jit->temp_fpr_saved); + _jit->temp_fpr_saved--; +} + +static void +unget_temp_gpr(jit_state_t *_jit) +{ + ASSERT(_jit->temp_gpr_saved); + _jit->temp_gpr_saved--; +} + +static inline void emit_u8(jit_state_t *_jit, uint8_t u8) { + if (UNLIKELY(_jit->pc.uc + 1 > _jit->limit)) { + _jit->overflow = 1; + } else { + *_jit->pc.uc = u8; + } + + _jit->pc.uc++; +} + +static inline void emit_u16(jit_state_t *_jit, uint16_t u16) { + if (UNLIKELY(_jit->pc.us + 1 > (uint16_t*)_jit->limit)) { + _jit->overflow = 1; + } else { + *_jit->pc.us = u16; + } + + _jit->pc.us++; +} + +static inline void emit_u32(jit_state_t *_jit, uint32_t u32) { + if (UNLIKELY(_jit->pc.ui + 1 > (uint32_t*)_jit->limit)) { + _jit->overflow = 1; + } else { + *_jit->pc.ui = u32; + } + + _jit->pc.ui++; +} + +#ifdef JIT_NEEDS_LITERAL_POOL +static inline void emit_u16_with_pool(jit_state_t *_jit, uint16_t u16) { + emit_u16(_jit, u16); + if (UNLIKELY(_jit->pc.uc >= _jit->start + _jit->pool->deadline)) + emit_literal_pool(_jit, GUARD_NEEDED); +} + +static inline void emit_u32_with_pool(jit_state_t *_jit, uint32_t u32) { + emit_u32(_jit, u32); + if (UNLIKELY(_jit->pc.uc >= _jit->start + _jit->pool->deadline)) + emit_literal_pool(_jit, GUARD_NEEDED); +} +#endif + +static inline void emit_u64(jit_state_t *_jit, uint64_t u64) { + if (UNLIKELY(_jit->pc.ul + 1 > (uint64_t*)_jit->limit)) { + _jit->overflow = 1; + } else { + *_jit->pc.ul = u64; + } + + _jit->pc.ul++; +} + +static inline void emit_uintptr(jit_state_t *_jit, uintptr_t u) { + if (sizeof(u) == 4) + emit_u32 (_jit, u); + else + emit_u64 (_jit, u); +} + +static inline jit_reloc_t +jit_reloc(jit_state_t *_jit, enum jit_reloc_kind kind, + uint8_t inst_start_offset, uint8_t *loc, uint8_t *pc_base, + uint8_t rsh) +{ + jit_reloc_t ret; + + ASSERT(rsh < __WORDSIZE); + ASSERT(pc_base >= (loc - inst_start_offset)); + ASSERT(pc_base - (loc - inst_start_offset) < 256); + + ret.kind = kind; + ret.inst_start_offset = inst_start_offset; + ret.pc_base_offset = pc_base - (loc - inst_start_offset); + ret.rsh = rsh; + ret.offset = loc - _jit->start; + + return ret; +} + +static inline jit_reloc_t +emit_abs_reloc (jit_state_t *_jit, uint8_t inst_start) +{ + uint8_t *loc = _jit->pc.uc; + emit_uintptr (_jit, 0); + return jit_reloc(_jit, JIT_RELOC_ABSOLUTE, inst_start, loc, _jit->pc.uc, 0); +} + +void +jit_patch_here(jit_state_t *_jit, jit_reloc_t reloc) +{ + jit_patch_there (_jit, reloc, jit_address (_jit)); +} + +void +jit_patch_there(jit_state_t* _jit, jit_reloc_t reloc, jit_pointer_t addr) +{ + if (_jit->overflow) + return; + union jit_pc loc; + uint8_t *end; + loc.uc = _jit->start + reloc.offset; + uint8_t *pc_base = loc.uc - reloc.inst_start_offset + reloc.pc_base_offset; + ptrdiff_t diff = (uint8_t*)addr - pc_base; + ASSERT((diff & ((1 << reloc.rsh) - 1)) == 0); + diff >>= reloc.rsh; +#ifdef JIT_NEEDS_LITERAL_POOL + int flags = reloc.kind & ~JIT_RELOC_MASK; +#endif + + switch (reloc.kind & JIT_RELOC_MASK) + { + case JIT_RELOC_ABSOLUTE: + if (sizeof(diff) == 4) + *loc.ui = (uintptr_t)addr; + else + *loc.ul = (uintptr_t)addr; + end = loc.uc + sizeof(diff); + break; + case JIT_RELOC_REL8: + ASSERT (INT8_MIN <= diff && diff <= INT8_MAX); + *loc.uc = diff; + end = loc.uc + 1; + break; + case JIT_RELOC_REL16: + ASSERT (INT16_MIN <= diff && diff <= INT16_MAX); + *loc.us = diff; + end = loc.uc + 2; + break; +#ifdef JIT_NEEDS_LITERAL_POOL + case JIT_RELOC_JMP_WITH_VENEER: { + int32_t voff = read_jmp_offset(loc.ui); + uint8_t *target = pc_base + (voff << reloc.rsh); + if (target == loc.uc) { + // PC still in range to reify direct branch. + if (offset_in_jmp_range(diff, flags)) { + // Target also in range: reify direct branch. + patch_jmp_offset(loc.ui, diff); + remove_pending_literal(_jit, reloc); + } else { + // Target out of range; branch to veneer. + patch_pending_literal(_jit, reloc, (uintptr_t) addr); + } + } else { + // Already emitted a veneer. In this case, patch the veneer + // directly. + patch_veneer((uint32_t *) target, addr); + } + return; + } + case JIT_RELOC_JCC_WITH_VENEER: { + int32_t voff = read_jcc_offset(loc.ui); + uint8_t *target = pc_base + (voff << reloc.rsh); + if (target == loc.uc) { + if (offset_in_jcc_range(diff, flags)) { + patch_jcc_offset(loc.ui, diff); + remove_pending_literal(_jit, reloc); + } else { + patch_pending_literal(_jit, reloc, (uintptr_t) addr); + } + } else { + patch_veneer((uint32_t *) target, addr); + } + return; + } + case JIT_RELOC_LOAD_FROM_POOL: { + int32_t voff = read_load_from_pool_offset(loc.ui); + uint8_t *target = pc_base + (voff << reloc.rsh); + if (target == loc.uc) { + patch_pending_literal(_jit, reloc, (uintptr_t) addr); + } else { + *(uintptr_t *) target = (uintptr_t) addr; + } + return; + } +#endif +#ifdef JIT_USE_IMMEDIATE_RELOC + case JIT_RELOC_IMMEDIATE: { + patch_immediate_reloc(loc.ui, addr); + return; + } +#endif + case JIT_RELOC_REL32: + ASSERT (INT32_MIN <= diff && diff <= INT32_MAX); + *loc.ui = diff; + end = loc.uc + 4; + break; + case JIT_RELOC_REL64: + *loc.ul = diff; + end = loc.uc + 8; + break; + default: + abort (); + } + + if (end == _jit->pc.uc) + jit_try_shorten (_jit, reloc, addr); +} + +void +jit_begin_data(jit_state_t *j, size_t max_size_or_zero) +{ +#ifdef JIT_NEEDS_LITERAL_POOL + if (j->pool->size) { + uint8_t *deadline = j->start + j->pool->deadline; + // Emit a literal pool now if the data might overwrite the deadline. + // Emitting data won't add entries to the pool. + if (max_size_or_zero == 0 || j->pc.uc + max_size_or_zero >= deadline) + emit_literal_pool(j, NO_GUARD_NEEDED); + } +#endif + + ASSERT(!j->emitting_data); + j->emitting_data = 1; +} + +void +jit_end_data(jit_state_t *j) +{ + ASSERT(j->emitting_data); + j->emitting_data = 0; +} + +void +jit_emit_u8(jit_state_t *j, uint8_t u8) +{ + ASSERT(j->emitting_data); + emit_u8(j, u8); +} + +void +jit_emit_u16(jit_state_t *j, uint16_t u16) +{ + ASSERT(j->emitting_data); + emit_u16(j, u16); +} + +void +jit_emit_u32(jit_state_t *j, uint32_t u32) +{ + ASSERT(j->emitting_data); + emit_u32(j, u32); +} + +void +jit_emit_u64(jit_state_t *j, uint64_t u64) +{ + ASSERT(j->emitting_data); + emit_u64(j, u64); +} + +jit_reloc_t +jit_emit_addr(jit_state_t *j) +{ + ASSERT(j->emitting_data); + uint8_t inst_start = 0; + return emit_abs_reloc(j, inst_start); +} + +#if defined(__i386__) || defined(__x86_64__) +# include "x86.c" +#elif defined(__mips__) +# include "mips.c" +#elif defined(__arm__) +# include "arm.c" +#elif defined(__ppc__) || defined(__powerpc__) +# include "ppc.c" +#elif defined(__aarch64__) +# include "aarch64.c" +#elif defined(__s390__) || defined(__s390x__) +# include "s390.c" +#endif + +#define JIT_IMPL_0(stem, ret) \ + ret jit_##stem (jit_state_t* _jit) \ + { \ + return stem(_jit); \ + } +#define JIT_IMPL_1(stem, ret, ta) \ + ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a) \ + { \ + return stem(_jit, unwrap_##ta(a)); \ + } +#define JIT_IMPL_2(stem, ret, ta, tb) \ + ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b) \ + { \ + return stem(_jit, unwrap_##ta(a), unwrap_##tb(b)); \ + } +#define JIT_IMPL_3(stem, ret, ta, tb, tc) \ + ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, jit_##tc##_t c) \ + { \ + return stem(_jit, unwrap_##ta(a), unwrap_##tb(b), unwrap_##tc(c)); \ + } +#define JIT_IMPL_4(stem, ret, ta, tb, tc, td) \ + ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, jit_##tc##_t c, jit_##td##_t d) \ + { \ + return stem(_jit, unwrap_##ta(a), unwrap_##tb(b), unwrap_##tc(c), unwrap_##td(d)); \ + } + +#define JIT_IMPL_RFF__(stem) JIT_IMPL_2(stem, jit_reloc_t, fpr, fpr) +#define JIT_IMPL_RGG__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, gpr) +#define JIT_IMPL_RG___(stem) JIT_IMPL_1(stem, jit_reloc_t, gpr) +#define JIT_IMPL_RGi__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, imm) +#define JIT_IMPL_RGu__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, uimm) +#define JIT_IMPL_R____(stem) JIT_IMPL_0(stem, jit_reloc_t) +#define JIT_IMPL__FFF_(stem) JIT_IMPL_3(stem, void, fpr, fpr, fpr) +#define JIT_IMPL__FF__(stem) JIT_IMPL_2(stem, void, fpr, fpr) +#define JIT_IMPL__FGG_(stem) JIT_IMPL_3(stem, void, fpr, gpr, gpr) +#define JIT_IMPL__FG__(stem) JIT_IMPL_2(stem, void, fpr, gpr) +#define JIT_IMPL__FGo_(stem) JIT_IMPL_3(stem, void, fpr, gpr, off) +#define JIT_IMPL__F___(stem) JIT_IMPL_1(stem, void, fpr) +#define JIT_IMPL__Fd__(stem) JIT_IMPL_2(stem, void, fpr, float64) +#define JIT_IMPL__Ff__(stem) JIT_IMPL_2(stem, void, fpr, float32) +#define JIT_IMPL__Fp__(stem) JIT_IMPL_2(stem, void, fpr, pointer) +#define JIT_IMPL__GF__(stem) JIT_IMPL_2(stem, void, gpr, fpr) +#define JIT_IMPL__GGF_(stem) JIT_IMPL_3(stem, void, gpr, gpr, fpr) +#define JIT_IMPL__GGGG(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, gpr) +#define JIT_IMPL__GGG_(stem) JIT_IMPL_3(stem, void, gpr, gpr, gpr) +#define JIT_IMPL__GGGi(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, imm) +#define JIT_IMPL__GGGu(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, uimm) +#define JIT_IMPL__GG__(stem) JIT_IMPL_2(stem, void, gpr, gpr) +#define JIT_IMPL__GGi_(stem) JIT_IMPL_3(stem, void, gpr, gpr, imm) +#define JIT_IMPL__GGo_(stem) JIT_IMPL_3(stem, void, gpr, gpr, off) +#define JIT_IMPL__GGu_(stem) JIT_IMPL_3(stem, void, gpr, gpr, uimm) +#define JIT_IMPL__G___(stem) JIT_IMPL_1(stem, void, gpr) +#define JIT_IMPL__Gi__(stem) JIT_IMPL_2(stem, void, gpr, imm) +#define JIT_IMPL__Gp__(stem) JIT_IMPL_2(stem, void, gpr, pointer) +#define JIT_IMPL______(stem) JIT_IMPL_0(stem, void) +#define JIT_IMPL__i___(stem) JIT_IMPL_1(stem, void, imm) +#define JIT_IMPL__oGF_(stem) JIT_IMPL_3(stem, void, off, gpr, fpr) +#define JIT_IMPL__oGG_(stem) JIT_IMPL_3(stem, void, off, gpr, gpr) +#define JIT_IMPL__pF__(stem) JIT_IMPL_2(stem, void, pointer, fpr) +#define JIT_IMPL__pG__(stem) JIT_IMPL_2(stem, void, pointer, gpr) +#define JIT_IMPL__p___(stem) JIT_IMPL_1(stem, void, pointer) +#define JIT_IMPL__GGGo(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, off) +#define JIT_IMPL__oGGG(stem) JIT_IMPL_4(stem, void, off, gpr, gpr, gpr) + +#define unwrap_gpr(r) jit_gpr_regno(r) +#define unwrap_fpr(r) jit_fpr_regno(r) +#define unwrap_imm(i) i +#define unwrap_uimm(u) u +#define unwrap_off(o) o +#define unwrap_pointer(p) ((uintptr_t) p) +#define unwrap_float32(f) f +#define unwrap_float64(d) d + +#define IMPL_INSTRUCTION(kind, stem) JIT_IMPL_##kind(stem) +FOR_EACH_INSTRUCTION(IMPL_INSTRUCTION) +#ifdef JIT_PASS_DOUBLES_IN_GPR_PAIRS +/* internal use only */ +static void jit_movr_d_ww(jit_state_t *_jit, jit_fpr_t f0, jit_gpr_t r0, jit_gpr_t r1); +static void jit_movr_ww_d(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_fpr_t f0); +static void jit_ldxi_ww(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, + jit_gpr_t r2, jit_off_t o0); +static void jit_stxi_ww(jit_state_t *_jit, jit_off_t o0, jit_gpr_t r0, + jit_gpr_t r1, jit_gpr_t r2); +IMPL_INSTRUCTION(_FGG_, movr_d_ww) +IMPL_INSTRUCTION(_GGF_, movr_ww_d) +IMPL_INSTRUCTION(_GGGo, ldxi_ww) +IMPL_INSTRUCTION(_oGGG, stxi_ww) +#endif +#ifdef JIT_PASS_FLOATS_IN_GPRS +static void jit_movr_f_w(jit_state_t *_jit, jit_fpr_t f0, jit_gpr_t r0); +static void jit_movr_w_f(jit_state_t *_jit, jit_gpr_t r0, jit_fpr_t f0); +static void jit_ldxi_w(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, + jit_off_t o0); +static void jit_stxi_w(jit_state_t *_jit, jit_off_t o0, jit_gpr_t r0, + jit_gpr_t r1); +IMPL_INSTRUCTION(_FG__, movr_f_w) +IMPL_INSTRUCTION(_GF__, movr_w_f) +IMPL_INSTRUCTION(_GGo_, ldxi_w) +IMPL_INSTRUCTION(_oGG_, stxi_w) +#endif +#undef IMPL_INSTRUCTION + +void +jit_align(jit_state_t *_jit, unsigned align) +{ + ASSERT (is_power_of_two (align)); + uintptr_t here = _jit->pc.w; + uintptr_t there = (here + align - 1) & ~(align - 1); + if (there - here) + nop(_jit, there - here); +} + +static jit_bool_t +is_fpr_arg(enum jit_operand_abi arg) +{ + switch (arg) + { + case JIT_OPERAND_ABI_UINT8: + case JIT_OPERAND_ABI_INT8: + case JIT_OPERAND_ABI_UINT16: + case JIT_OPERAND_ABI_INT16: + case JIT_OPERAND_ABI_UINT32: + case JIT_OPERAND_ABI_INT32: + case JIT_OPERAND_ABI_UINT64: + case JIT_OPERAND_ABI_INT64: + case JIT_OPERAND_ABI_POINTER: + return 0; + case JIT_OPERAND_ABI_FLOAT: + case JIT_OPERAND_ABI_DOUBLE: + return 1; + default: + abort(); + } +} + +static jit_bool_t +is_gpr_arg(enum jit_operand_abi arg) +{ + return !is_fpr_arg(arg); +} + +static void +abi_imm_to_gpr(jit_state_t *_jit, enum jit_operand_abi abi, jit_gpr_t dst, + jit_imm_t imm) +{ + switch (abi) { + case JIT_OPERAND_ABI_UINT8: + ASSERT(0 <= imm); + ASSERT(imm <= UINT8_MAX); + break; + case JIT_OPERAND_ABI_INT8: + ASSERT(INT8_MIN <= imm); + ASSERT(imm <= INT8_MAX); + break; + case JIT_OPERAND_ABI_UINT16: + ASSERT(0 <= imm); + ASSERT(imm <= UINT16_MAX); + break; + case JIT_OPERAND_ABI_INT16: + ASSERT(INT16_MIN <= imm); + ASSERT(imm <= INT16_MAX); + break; +#if __WORDSIZE > 32 + case JIT_OPERAND_ABI_UINT32: + ASSERT(0 <= imm); + ASSERT(imm <= UINT32_MAX); + break; + case JIT_OPERAND_ABI_INT32: + ASSERT(INT32_MIN <= imm); + ASSERT(imm <= INT32_MAX); + break; + case JIT_OPERAND_ABI_UINT64: + case JIT_OPERAND_ABI_INT64: + break; +#else + case JIT_OPERAND_ABI_UINT32: + case JIT_OPERAND_ABI_INT32: + break; +#endif + case JIT_OPERAND_ABI_POINTER: + break; + default: + abort(); + } + jit_movi (_jit, dst, imm); +} + +static void +abi_gpr_to_mem_walign(jit_state_t *_jit, enum jit_operand_abi abi, + jit_gpr_t base, ptrdiff_t offset, jit_gpr_t src) +{ + // Invariant: GPR memory destination operand sizes are rounded up to words. + // True for ARM, AArch64, IA32, and X86-64. Some ABIs expect to be able to + // load operands from the stack via a full-word read, so we need to make sure + // we don't leave garbage in the high bytes of (for example) the stack slot + // for a uint8_t arg. + switch (abi) { + case JIT_OPERAND_ABI_UINT8: + case JIT_OPERAND_ABI_INT8: + jit_stxi(_jit, offset, base, src); + break; + case JIT_OPERAND_ABI_UINT16: + case JIT_OPERAND_ABI_INT16: + jit_stxi(_jit, offset, base, src); + break; + case JIT_OPERAND_ABI_UINT32: + case JIT_OPERAND_ABI_INT32: +#if __WORDSIZE == 32 + case JIT_OPERAND_ABI_POINTER: +#endif + jit_stxi(_jit, offset, base, src); + break; +#if __WORDSIZE == 64 + case JIT_OPERAND_ABI_UINT64: + case JIT_OPERAND_ABI_INT64: + case JIT_OPERAND_ABI_POINTER: + jit_stxi_l(_jit, offset, base, src); + break; +#endif +#if JIT_PASS_FLOATS_IN_GPRS + case JIT_OPERAND_ABI_FLOAT: + jit_stxi_w(_jit, offset, base, src); + break; +#endif + default: + abort(); + } +} + +static void +abi_gpr_to_mem_nalign(jit_state_t *_jit, enum jit_operand_abi abi, + jit_gpr_t base, ptrdiff_t offset, jit_gpr_t src) +{ + switch (abi) { + case JIT_OPERAND_ABI_UINT8: + case JIT_OPERAND_ABI_INT8: + jit_stxi_c(_jit, offset, base, src); + break; + case JIT_OPERAND_ABI_UINT16: + case JIT_OPERAND_ABI_INT16: + jit_stxi_s(_jit, offset, base, src); + break; +#if __WORDSIZE == 32 + case JIT_OPERAND_ABI_UINT32: + case JIT_OPERAND_ABI_POINTER: +#endif + case JIT_OPERAND_ABI_INT32: + jit_stxi_i(_jit, offset, base, src); + break; +#if __WORDSIZE == 64 + case JIT_OPERAND_ABI_UINT32: + jit_stxi_i(_jit, offset, base, src); + break; + case JIT_OPERAND_ABI_UINT64: + case JIT_OPERAND_ABI_POINTER: + case JIT_OPERAND_ABI_INT64: + jit_stxi_l(_jit, offset, base, src); + break; +#endif +#if JIT_PASS_FLOATS_IN_GPRS + case JIT_OPERAND_ABI_FLOAT: + jit_stxi_w(_jit, offset, base, src); + break; +#endif + default: + abort(); + } +} + +static void +abi_gpr_to_mem(jit_state_t *_jit, enum jit_operand_abi abi, + jit_gpr_t base, ptrdiff_t offset, jit_gpr_t src) +{ + if (JIT_CALL_STACK_ALIGN_WORD && _jit->preparing_call) + abi_gpr_to_mem_walign(_jit, abi, base, offset, src); + else + abi_gpr_to_mem_nalign(_jit, abi, base, offset, src); +} + +static void +abi_fpr_to_mem(jit_state_t *_jit, enum jit_operand_abi abi, + jit_gpr_t base, ptrdiff_t offset, jit_fpr_t src) +{ + switch (abi) { + case JIT_OPERAND_ABI_FLOAT: + jit_stxi_f(_jit, offset, base, src); + break; + case JIT_OPERAND_ABI_DOUBLE: + jit_stxi_d(_jit, offset, base, src); + break; + default: + abort(); + } +} + +static void +abi_mem_to_gpr(jit_state_t *_jit, enum jit_operand_abi abi, + jit_gpr_t dst, jit_gpr_t base, ptrdiff_t offset) +{ + switch (abi) { + case JIT_OPERAND_ABI_UINT8: + jit_ldxi_uc(_jit, dst, base, offset); + break; + case JIT_OPERAND_ABI_INT8: + jit_ldxi_c(_jit, dst, base, offset); + break; + case JIT_OPERAND_ABI_UINT16: + jit_ldxi_us(_jit, dst, base, offset); + break; + case JIT_OPERAND_ABI_INT16: + jit_ldxi_s(_jit, dst, base, offset); + break; +#if __WORDSIZE == 32 + case JIT_OPERAND_ABI_UINT32: + case JIT_OPERAND_ABI_POINTER: +#endif + case JIT_OPERAND_ABI_INT32: + jit_ldxi_i(_jit, dst, base, offset); + break; +#if __WORDSIZE == 64 + case JIT_OPERAND_ABI_UINT32: + jit_ldxi_ui(_jit, dst, base, offset); + break; + case JIT_OPERAND_ABI_UINT64: + case JIT_OPERAND_ABI_POINTER: + case JIT_OPERAND_ABI_INT64: + jit_ldxi_l(_jit, dst, base, offset); + break; +#endif +#if JIT_PASS_FLOATS_IN_GPRS + case JIT_OPERAND_ABI_FLOAT: + jit_ldxi_w(_jit, dst, base, offset); + break; +#endif + default: + abort(); + } +} + +static void +abi_mem_to_fpr(jit_state_t *_jit, enum jit_operand_abi abi, + jit_fpr_t dst, jit_gpr_t base, ptrdiff_t offset) +{ + switch (abi) { + case JIT_OPERAND_ABI_FLOAT: + jit_ldxi_f(_jit, dst, base, offset); + break; + case JIT_OPERAND_ABI_DOUBLE: + jit_ldxi_d(_jit, dst, base, offset); + break; + default: + abort(); + } +} + +static void +abi_imm_to_mem(jit_state_t *_jit, enum jit_operand_abi abi, jit_gpr_t base, + ptrdiff_t offset, jit_imm_t imm) +{ + ASSERT(!is_fpr_arg(abi)); + + jit_gpr_t tmp = get_temp_gpr(_jit); + abi_imm_to_gpr(_jit, abi, tmp, imm); + abi_gpr_to_mem(_jit, abi, base, offset, tmp); + unget_temp_gpr(_jit); +} + +static void +abi_mem_to_mem(jit_state_t *_jit, enum jit_operand_abi abi, jit_gpr_t base, + ptrdiff_t offset, jit_gpr_t src_base, ptrdiff_t src_offset) +{ + if (is_gpr_arg (abi)) { + jit_gpr_t tmp = get_temp_gpr(_jit); + abi_mem_to_gpr(_jit, abi, tmp, src_base, src_offset); + abi_gpr_to_mem(_jit, abi, base, offset, tmp); + unget_temp_gpr(_jit); + } else { + jit_fpr_t tmp = get_temp_fpr(_jit); + abi_mem_to_fpr(_jit, abi, tmp, src_base, src_offset); + abi_fpr_to_mem(_jit, abi, base, offset, tmp); + unget_temp_fpr(_jit); + } +} + +#define MOVE_KIND(a, b) ((((int) a) << 4) | ((int) b)) + +#define MOVE_KIND_ENUM(a, b) \ + MOVE_##a##_TO_##b = MOVE_KIND(JIT_OPERAND_KIND_##a, JIT_OPERAND_KIND_##b) +enum move_kind { + MOVE_KIND_ENUM(IMM, GPR), + MOVE_KIND_ENUM(GPR, GPR), + MOVE_KIND_ENUM(MEM, GPR), + MOVE_KIND_ENUM(FPR, FPR), + MOVE_KIND_ENUM(MEM, FPR), + MOVE_KIND_ENUM(IMM, MEM), + MOVE_KIND_ENUM(GPR, MEM), + MOVE_KIND_ENUM(FPR, MEM), + MOVE_KIND_ENUM(MEM, MEM), +#if JIT_PASS_DOUBLES_IN_GPR_PAIRS + MOVE_KIND_ENUM(FPR, GPR_PAIR), + MOVE_KIND_ENUM(GPR_PAIR, FPR), + MOVE_KIND_ENUM(MEM, GPR_PAIR), + MOVE_KIND_ENUM(GPR_PAIR, MEM), + /* needed to make sure nobody overwrites anything */ + MOVE_KIND_ENUM(GPR, GPR_PAIR), + MOVE_KIND_ENUM(GPR_PAIR, GPR), +#endif +#if JIT_PASS_FLOATS_IN_GPRS + MOVE_KIND_ENUM(FPR, GPR), + MOVE_KIND_ENUM(GPR, FPR), +#endif +}; +#undef MOVE_KIND_ENUM + +static void +move_operand(jit_state_t *_jit, jit_operand_t dst, jit_operand_t src) +{ + switch (MOVE_KIND (src.kind, dst.kind)) { + case MOVE_IMM_TO_GPR: + return abi_imm_to_gpr(_jit, src.abi, dst.loc.gpr.gpr, src.loc.imm); + + case MOVE_GPR_TO_GPR: + return jit_movr(_jit, dst.loc.gpr.gpr, src.loc.gpr.gpr); + + case MOVE_MEM_TO_GPR: + return abi_mem_to_gpr(_jit, src.abi, dst.loc.gpr.gpr, src.loc.mem.base, + src.loc.mem.offset); + + case MOVE_FPR_TO_FPR: + ASSERT(src.abi == dst.abi); + if (src.abi == JIT_OPERAND_ABI_DOUBLE) + return jit_movr_d(_jit, dst.loc.fpr.fpr, src.loc.fpr.fpr); + else + return jit_movr_f(_jit, dst.loc.fpr.fpr, src.loc.fpr.fpr); + + case MOVE_MEM_TO_FPR: + return abi_mem_to_fpr(_jit, src.abi, dst.loc.fpr.fpr, src.loc.mem.base, + src.loc.mem.offset); + + case MOVE_IMM_TO_MEM: + return abi_imm_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset, + src.loc.imm); + + case MOVE_GPR_TO_MEM: + return abi_gpr_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset, + src.loc.gpr.gpr); + + case MOVE_FPR_TO_MEM: + return abi_fpr_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset, + src.loc.fpr.fpr); + + case MOVE_MEM_TO_MEM: + return abi_mem_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset, + src.loc.mem.base, src.loc.mem.offset); + +#if JIT_PASS_DOUBLES_IN_GPR_PAIRS + case MOVE_GPR_PAIR_TO_FPR: + ASSERT(dst.abi == JIT_OPERAND_ABI_DOUBLE); + return jit_movr_d_ww(_jit, dst.loc.fpr.fpr, src.loc.gpr_pair.l, src.loc.gpr_pair.h); + + case MOVE_FPR_TO_GPR_PAIR: + ASSERT(src.abi == JIT_OPERAND_ABI_DOUBLE); + return jit_movr_ww_d(_jit, dst.loc.gpr_pair.l, dst.loc.gpr_pair.h, src.loc.fpr.fpr); + + case MOVE_MEM_TO_GPR_PAIR: + ASSERT(src.abi == JIT_OPERAND_ABI_DOUBLE); + return jit_ldxi_ww(_jit, dst.loc.gpr_pair.l, dst.loc.gpr_pair.h, src.loc.mem.base, + src.loc.mem.offset); + + case MOVE_GPR_PAIR_TO_MEM: + ASSERT(dst.abi == JIT_OPERAND_ABI_DOUBLE); + return jit_stxi_ww(_jit, dst.loc.mem.offset, dst.loc.mem.base, + src.loc.gpr_pair.l, src.loc.gpr_pair.h); +#endif + +#if JIT_PASS_FLOATS_IN_GPRS + case MOVE_GPR_TO_FPR: + return jit_movr_f_w(_jit, dst.loc.fpr.fpr, src.loc.gpr.gpr); + + case MOVE_FPR_TO_GPR: + return jit_movr_w_f(_jit, dst.loc.gpr.gpr, src.loc.fpr.fpr); +#endif + + default: + abort(); + } +} + +// A direct transliteration of "Tilting at windmills with Coq: formal +// verification of a compilation algorithm for parallel moves" by +// Laurence Rideau, Bernard Paul Serpette, and Xavier Leroy: +// https://xavierleroy.org/publi/parallel-move.pdf + +enum move_status { TO_MOVE, BEING_MOVED, MOVED }; + +static inline int +already_in_place(jit_operand_t src, jit_operand_t dst) +{ + switch (MOVE_KIND(src.kind, dst.kind)) { + case MOVE_GPR_TO_GPR: + return jit_same_gprs (src.loc.gpr.gpr, dst.loc.gpr.gpr); + case MOVE_FPR_TO_FPR: + return jit_same_fprs (src.loc.fpr.fpr, dst.loc.fpr.fpr); + case MOVE_MEM_TO_MEM: + return jit_same_gprs (src.loc.mem.base, dst.loc.mem.base) && + src.loc.mem.offset == dst.loc.mem.offset; + default: + return 0; + } +} + +static inline int +write_would_clobber(jit_operand_t src, jit_operand_t dst) +{ + if (already_in_place (src, dst)) + return 1; + + if (MOVE_KIND(src.kind, dst.kind) == MOVE_MEM_TO_GPR) + return jit_same_gprs(src.loc.mem.base, dst.loc.gpr.gpr); + +#if JIT_PASS_DOUBLES_IN_GPR_PAIRS + if (MOVE_KIND(src.kind, dst.kind) == MOVE_GPR_PAIR_TO_GPR) + return jit_same_gprs(src.loc.gpr_pair.h, dst.loc.gpr.gpr) + || jit_same_gprs(src.loc.gpr_pair.l, dst.loc.gpr.gpr); + + if (MOVE_KIND(src.kind, dst.kind) == MOVE_GPR_PAIR_TO_MEM) + return jit_same_gprs(src.loc.gpr_pair.h, dst.loc.mem.base) + || jit_same_gprs(src.loc.gpr_pair.l, dst.loc.mem.base); +#endif + +#if JIT_PASS_FLOATS_IN_GPRS + if (MOVE_KIND(src.kind, dst.kind) == MOVE_FPR_TO_GPR) + return jit_same_gprs(src.loc.fpr.gpr, dst.loc.gpr.gpr); +#endif + + + return 0; +} + +static inline ptrdiff_t +operand_addend(jit_operand_t op) +{ + switch (op.kind) { + case JIT_OPERAND_KIND_GPR: + return op.loc.gpr.addend; + case JIT_OPERAND_KIND_MEM: + return op.loc.mem.addend; + default: + abort(); + } +} + +static void +move_one(jit_state_t *_jit, jit_operand_t *dst, jit_operand_t *src, + size_t argc, enum move_status *status, size_t i) +{ + int tmp_gpr = 0, tmp_fpr = 0; + + if (already_in_place(src[i], dst[i])) + return; + + status[i] = BEING_MOVED; + for (size_t j = 0; j < argc; j++) { + if (write_would_clobber(src[j], dst[i])) { + switch (status[j]) { + case TO_MOVE: + move_one(_jit, dst, src, argc, status, j); + break; + case BEING_MOVED: { + jit_operand_t tmp; + if (is_fpr_arg ((enum jit_operand_abi)src[j].kind)) { + tmp_fpr = 1; + tmp = jit_operand_fpr(src[j].abi, get_temp_fpr(_jit)); + } else { + tmp_gpr = 1; + /* Preserve addend, if any, from source operand, to be applied + at the end. */ + tmp = jit_operand_gpr_with_addend(src[j].abi, get_temp_gpr(_jit), + operand_addend(src[j])); + } + move_operand (_jit, tmp, src[j]); + src[j] = tmp; + break; + } + case MOVED: + break; + default: + abort (); + } + } + } + + move_operand (_jit, dst[i], src[i]); + status[i] = MOVED; + if (tmp_gpr) + unget_temp_gpr(_jit); + else if (tmp_fpr) + unget_temp_fpr(_jit); +} + +static void +apply_addend(jit_state_t *_jit, jit_operand_t dst, jit_operand_t src) +{ + switch (MOVE_KIND(src.kind, dst.kind)) { + case MOVE_GPR_TO_GPR: + case MOVE_MEM_TO_GPR: + if (operand_addend(src)) + jit_addi(_jit, dst.loc.gpr.gpr, dst.loc.gpr.gpr, operand_addend(src)); + break; + case MOVE_GPR_TO_MEM: + case MOVE_MEM_TO_MEM: + if (operand_addend(src)) { + jit_gpr_t tmp = get_temp_gpr(_jit); + abi_mem_to_gpr(_jit, dst.abi, tmp, dst.loc.mem.base, dst.loc.mem.offset); + jit_addi(_jit, tmp, tmp, operand_addend(src)); + abi_gpr_to_mem(_jit, dst.abi, dst.loc.mem.base, dst.loc.mem.offset, tmp); + unget_temp_gpr(_jit); + } + break; + default: + break; + } +} + +/* Preconditions: No dest operand is IMM. No dest operand aliases + another dest operand. No dest MEM operand uses a base register which + is used as a dest GPR. No dst operand has an addend. The registers + returned by get_temp_gpr and get_temp_fpr do not appear in source or + dest args. */ +void +jit_move_operands(jit_state_t *_jit, jit_operand_t *dst, jit_operand_t *src, + size_t argc) +{ + // Check preconditions, except the condition about tmp registers. + { + uint64_t src_gprs = 0; + uint64_t dst_gprs = 0; + uint64_t dst_fprs = 0; + uint64_t dst_mem_base_gprs = 0; + for (size_t i = 0; i < argc; i++) { + switch (src[i].kind) { + case JIT_OPERAND_KIND_GPR: + src_gprs |= 1ULL << jit_gpr_regno(src[i].loc.gpr.gpr); + break; +#if JIT_PASS_DOUBLES_IN_GPR_PAIRS + case JIT_OPERAND_KIND_GPR_PAIR: { + uint64_t bit0 = 1ULL << jit_gpr_regno(dst[i].loc.gpr_pair.l); + uint64_t bit1 = 1ULL << jit_gpr_regno(dst[i].loc.gpr_pair.h); + src_gprs |= bit0 | bit1; + break; + } +#endif + case JIT_OPERAND_KIND_FPR: + case JIT_OPERAND_KIND_IMM: + case JIT_OPERAND_KIND_MEM: + break; + default: + abort(); + } + switch (dst[i].kind) { + case JIT_OPERAND_KIND_GPR: { + ASSERT(dst[i].loc.gpr.addend == 0); + uint64_t bit = 1ULL << jit_gpr_regno(dst[i].loc.gpr.gpr); + ASSERT((dst_gprs & bit) == 0); + dst_gprs |= bit; + break; + } + case JIT_OPERAND_KIND_FPR: { +#if JIT_PASS_FLOATS_IN_GPRS + if(src[i].kind == JIT_OPERAND_KIND_GPR) { + dst[i].loc.fpr.gpr = src[i].loc.gpr.gpr; + } +#endif + uint64_t bit = 1ULL << jit_fpr_regno(dst[i].loc.fpr.fpr); + ASSERT((dst_fprs & bit) == 0); + dst_fprs |= bit; + break; + } + case JIT_OPERAND_KIND_MEM: { + ASSERT(dst[i].loc.mem.addend == 0); + uint64_t bit = 1ULL << jit_gpr_regno(dst[i].loc.mem.base); + dst_mem_base_gprs |= bit; + break; + } +#if JIT_PASS_DOUBLES_IN_GPR_PAIRS + case JIT_OPERAND_KIND_GPR_PAIR: { + uint64_t bit0 = 1ULL << jit_gpr_regno(dst[i].loc.gpr_pair.l); + uint64_t bit1 = 1ULL << jit_gpr_regno(dst[i].loc.gpr_pair.h); + dst_gprs |= bit0 | bit1; + break; + } +#endif + case JIT_OPERAND_KIND_IMM: + default: + abort(); + break; + } + } + ASSERT(((src_gprs | dst_gprs) & dst_mem_base_gprs) == 0); + } + + enum move_status status[argc]; + for (size_t i = 0; i < argc; i++) + status[i] = TO_MOVE; + for (size_t i = 0; i < argc; i++) + if (status[i] == TO_MOVE) + move_one(_jit, dst, src, argc, status, i); + + // Apply addends at the end. We could do it earlier in some cases but + // at least at the end we know that an in-place increment of one + // operand won't alias another. + for (size_t i = 0; i < argc; i++) + apply_addend(_jit, dst[i], src[i]); +} + +size_t +jit_align_stack(jit_state_t *_jit, size_t expand) +{ + size_t new_size = _jit->frame_size + expand; + // Align stack to double-word boundaries. This isn't really a + // principle but it does work for Aarch32, AArch64 and x86-64. + size_t alignment = jit_stack_alignment (); + size_t aligned_size = (new_size + alignment - 1) & ~(alignment - 1); + size_t diff = aligned_size - _jit->frame_size; + if (diff) + jit_subi (_jit, JIT_SP, JIT_SP, diff); + _jit->frame_size = aligned_size; + return diff; +} + +void +jit_shrink_stack(jit_state_t *_jit, size_t diff) +{ + if (diff) + jit_addi (_jit, JIT_SP, JIT_SP, diff); + _jit->frame_size -= diff; +} + +static const jit_gpr_t platform_callee_save_gprs[] = { + JIT_PLATFORM_CALLEE_SAVE_GPRS +}; + +static const jit_fpr_t platform_callee_save_fprs[] = { + JIT_PLATFORM_CALLEE_SAVE_FPRS +}; + +static const jit_gpr_t user_callee_save_gprs[] = { + JIT_V0, JIT_V1, JIT_V2 +#ifdef JIT_V3 + , JIT_V3 +#endif +#ifdef JIT_V4 + , JIT_V4 +#endif +#ifdef JIT_V5 + , JIT_V5 +#endif +#ifdef JIT_V6 + , JIT_V6 +#endif +#ifdef JIT_V7 + , JIT_V7 +#endif +#ifdef JIT_V8 + , JIT_V8 +#endif +#ifdef JIT_V9 + , JIT_V9 +#endif + }; + +static const jit_fpr_t user_callee_save_fprs[] = { +#ifdef JIT_VF0 + JIT_VF0 +#endif +#ifdef JIT_VF1 + , JIT_VF1 +#endif +#ifdef JIT_VF2 + , JIT_VF2 +#endif +#ifdef JIT_VF3 + , JIT_VF3 +#endif +#ifdef JIT_VF4 + , JIT_VF4 +#endif +#ifdef JIT_VF5 + , JIT_VF5 +#endif +#ifdef JIT_VF6 + , JIT_VF6 +#endif +#ifdef JIT_VF7 + , JIT_VF7 +#endif +}; + +#define ARRAY_SIZE(X) (sizeof (X)/sizeof ((X)[0])) +static const size_t pv_count = ARRAY_SIZE(platform_callee_save_gprs); +static const size_t pf_count = ARRAY_SIZE(platform_callee_save_fprs); +static const size_t v_count = ARRAY_SIZE(user_callee_save_gprs); +static const size_t vf_count = ARRAY_SIZE(user_callee_save_fprs); + +size_t +jit_enter_jit_abi(jit_state_t *_jit, size_t v, size_t vf, size_t frame_size) +{ + (void)frame_size; + + ASSERT(v <= v_count); + ASSERT(vf <= vf_count); + + _jit->frame_size = jit_initial_frame_size(); + +#if JIT_NEEDS_PROLOG + jit_prolog(_jit); +#endif + + size_t reserved = + jit_align_stack(_jit, (pv_count + pf_count + v) * (__WORDSIZE / 8) + vf * 8); + + size_t offset = 0; + for (size_t i = 0; i < vf; i++, offset += 8) + jit_stxi_d(_jit, offset, JIT_SP, user_callee_save_fprs[i]); + for (size_t i = 0; i < v; i++, offset += __WORDSIZE / 8) + jit_stxi(_jit, offset, JIT_SP, user_callee_save_gprs[i]); + for (size_t i = 0; i < pf_count; i++, offset += __WORDSIZE / 8) + jit_stxi_d(_jit, offset, JIT_SP, platform_callee_save_fprs[i]); + for (size_t i = 0; i < pv_count; i++, offset += __WORDSIZE / 8) + jit_stxi(_jit, offset, JIT_SP, platform_callee_save_gprs[i]); + ASSERT(offset <= reserved); + + return reserved; +} + +void +jit_leave_jit_abi(jit_state_t *_jit, size_t v, size_t vf, size_t frame_size) +{ + ASSERT(v <= v_count); + ASSERT(vf <= vf_count); + ASSERT((pv_count + v) * (__WORDSIZE / 8) + vf * 8 <= frame_size); + + size_t offset = 0; + for (size_t i = 0; i < vf; i++, offset += 8) + jit_ldxi_d(_jit, user_callee_save_fprs[i], JIT_SP, offset); + for (size_t i = 0; i < v; i++, offset += __WORDSIZE / 8) + jit_ldxi(_jit, user_callee_save_gprs[i], JIT_SP, offset); + for (size_t i = 0; i < pf_count; i++, offset += __WORDSIZE / 8) + jit_ldxi_d(_jit, platform_callee_save_fprs[i], JIT_SP, offset); + for (size_t i = 0; i < pv_count; i++, offset += __WORDSIZE / 8) + jit_ldxi(_jit, platform_callee_save_gprs[i], JIT_SP, offset); + ASSERT(offset <= frame_size); + + jit_shrink_stack(_jit, frame_size); + _jit->frame_size -= jit_initial_frame_size(); + +#if JIT_NEEDS_PROLOG + jit_epilog(_jit); +#endif +} + +// Precondition: stack is already aligned. +static size_t +prepare_call_args(jit_state_t *_jit, size_t argc, jit_operand_t args[]) +{ + _jit->preparing_call = 1; + jit_operand_t dst[argc]; + struct abi_arg_iterator iter; + + // Compute shuffle destinations and space for spilled arguments. +#if JIT_ASYMMETRIC_STACK + reset_call_arg_iterator(&iter, argc, args); +#else + reset_abi_arg_iterator(&iter, argc, args); +#endif + for (size_t i = 0; i < argc; i++) + next_abi_arg(&iter, &dst[i]); + + // Reserve space for spilled arguments and ensure stack alignment. + size_t stack_size = jit_align_stack(_jit, iter.stack_size); + + // Fix up SP-relative operands. + for (size_t i = 0; i < argc; i++) { + switch(args[i].kind) { + case JIT_OPERAND_KIND_GPR: + if (jit_same_gprs (args[i].loc.gpr.gpr, JIT_SP)) + args[i].loc.gpr.addend += stack_size; + break; + case JIT_OPERAND_KIND_MEM: + if (jit_same_gprs (args[i].loc.mem.base, JIT_SP)) + args[i].loc.mem.offset += stack_size; + break; + +#if JIT_PASS_FLOATS_IN_GPRS + case JIT_OPERAND_KIND_FPR: + if (dst[i].kind == JIT_OPERAND_KIND_GPR) { + args[i].loc.fpr.gpr = dst[i].loc.gpr.gpr; + break; + } +#endif + break; + + default: + break; + } + } + + jit_move_operands(_jit, dst, args, argc); + + _jit->preparing_call = 0; + return stack_size; +} + +void +jit_calli(jit_state_t *_jit, jit_pointer_t f, size_t argc, jit_operand_t args[]) +{ + size_t stack_bytes = prepare_call_args(_jit, argc, args); + + calli(_jit, (jit_word_t)f); + + jit_shrink_stack(_jit, stack_bytes); +} + +void +jit_callr(jit_state_t *_jit, jit_gpr_t f, size_t argc, jit_operand_t args[]) +{ + size_t stack_bytes = prepare_call_args(_jit, argc, args); + + callr(_jit, jit_gpr_regno(f)); + + jit_shrink_stack(_jit, stack_bytes); +} + +void +jit_locate_args(jit_state_t *_jit, size_t argc, jit_operand_t args[]) +{ + struct abi_arg_iterator iter; + +#if JIT_ASYMMETRIC_STACK + reset_load_arg_iterator(&iter, argc, args); +#else + reset_abi_arg_iterator(&iter, argc, args); +#endif + iter.stack_size += _jit->frame_size; + for (size_t i = 0; i < argc; i++) + next_abi_arg(&iter, &args[i]); +} + +/* Precondition: args are distinct locations of type GPR or FPR. All + addends of arg operands are zero. No GPR arg is SP. */ +void +jit_load_args(jit_state_t *_jit, size_t argc, jit_operand_t args[]) +{ + jit_operand_t src[argc]; + + memcpy(src, args, sizeof(src[0]) * argc); + + jit_locate_args(_jit, argc, src); + jit_move_operands(_jit, args, src, argc); +} + +#ifdef JIT_NEEDS_LITERAL_POOL +static uint32_t +literal_pool_byte_size(struct jit_literal_pool *pool) +{ + // Check arch header for actual values for these literals, or if applicable, + // see default values defined in lightening.h + return JIT_EXTRA_SPACE + JIT_JMP_MAX_SIZE + 7 + pool->size * JIT_LITERAL_MAX_SIZE; +} + +static void +reset_literal_pool(jit_state_t *_jit, struct jit_literal_pool *pool) +{ + pool->deadline = _jit->limit - _jit->start; + memset(pool->entries, 0, sizeof(pool->entries[0]) * pool->size); + pool->size = 0; +} + +#define INITIAL_LITERAL_POOL_CAPACITY 12 +static struct jit_literal_pool* +alloc_literal_pool(jit_state_t *_jit, size_t capacity) +{ + if (capacity == 0) capacity = INITIAL_LITERAL_POOL_CAPACITY; + + struct jit_literal_pool *ret = + _jit->alloc (sizeof (struct jit_literal_pool) + + sizeof (struct jit_literal_pool_entry) * capacity); + ASSERT (ret); + ret->capacity = capacity; + ret->size = 0; + reset_literal_pool(_jit, ret); + return ret; +} + +static void +grow_literal_pool(jit_state_t *_jit) +{ + struct jit_literal_pool *new_pool = + alloc_literal_pool(_jit, _jit->pool->capacity * 2); + + for (size_t i = 0; i < _jit->pool->size; i++) + new_pool->entries[new_pool->size++] = _jit->pool->entries[i]; + new_pool->deadline = _jit->pool->deadline; + + _jit->free (_jit->pool); + _jit->pool = new_pool; +} + +static jit_bool_t +add_literal_pool_entry(jit_state_t *_jit, struct jit_literal_pool_entry entry, + uint32_t max_offset) +{ + if (_jit->overflow) + return 1; + + if (max_offset <= literal_pool_byte_size(_jit->pool)) { + emit_literal_pool(_jit, GUARD_NEEDED); + return 0; + } + + if (_jit->pool->size == _jit->pool->capacity) + grow_literal_pool (_jit); + + uint32_t loc_offset = _jit->pc.uc - _jit->start; + uint32_t inst_offset = loc_offset - entry.reloc.inst_start_offset; + uint32_t pc_base_offset = inst_offset + entry.reloc.pc_base_offset; + uint32_t deadline = + pc_base_offset + (max_offset - literal_pool_byte_size(_jit->pool)); + if (deadline < _jit->pool->deadline) + _jit->pool->deadline = deadline; + + _jit->pool->entries[_jit->pool->size++] = entry; + + return 1; +} + +static jit_bool_t +add_pending_literal(jit_state_t *_jit, jit_reloc_t src, + uint8_t max_offset_bits) +{ + struct jit_literal_pool_entry entry = { src, 0 }; + uint32_t max_inst_size = JIT_INST_MAX_SIZE; + uint32_t max_offset = (1 << (max_offset_bits + src.rsh)) - max_inst_size; + return add_literal_pool_entry(_jit, entry, max_offset); +} + +static void +remove_pending_literal(jit_state_t *_jit, jit_reloc_t src) +{ + for (size_t i = _jit->pool->size; i--; ) { + if (_jit->pool->entries[i].reloc.offset == src.offset) { + for (size_t j = i + 1; j < _jit->pool->size; j++) + _jit->pool->entries[j-1] = _jit->pool->entries[j]; + _jit->pool->size--; + return; + } + } + abort(); +} + +static void +patch_pending_literal(jit_state_t *_jit, jit_reloc_t src, uintptr_t value) +{ + for (size_t i = _jit->pool->size; i--; ) { + if (_jit->pool->entries[i].reloc.offset == src.offset) { + ASSERT(_jit->pool->entries[i].value == 0); + _jit->pool->entries[i].value = value; + return; + } + } + abort(); +} + +static void +emit_literal_pool(jit_state_t *_jit, enum guard_pool guard) +{ + if (_jit->overflow) + return; + + if (!_jit->pool->size) + return; + + uint32_t *patch_loc = NULL; + if (guard == GUARD_NEEDED) + patch_loc = jmp_without_veneer(_jit); + + // FIXME: Could de-duplicate constants. + for (size_t i = 0; i < _jit->pool->size; i++) { + // Align to word boundary without emitting pool. + if (_jit->pc.w & 1) emit_u8(_jit, 0); + if (_jit->pc.w & 2) emit_u16(_jit, 0); + if (sizeof(uintptr_t) > 4 && (_jit->pc.w & 4)) + emit_u32(_jit, 0); + ASSERT((_jit->pc.w & (sizeof(uintptr_t) - 1)) == 0); + struct jit_literal_pool_entry *entry = &_jit->pool->entries[i]; + uint8_t *loc = _jit->start + entry->reloc.offset; + uint8_t *pc_base = + loc - entry->reloc.inst_start_offset + entry->reloc.pc_base_offset; + ptrdiff_t diff = _jit->pc.uc - pc_base; + diff >>= entry->reloc.rsh; + + if (_jit->overflow) + return; + + switch (entry->reloc.kind & JIT_RELOC_MASK) { + case JIT_RELOC_JMP_WITH_VENEER: + patch_veneer_jmp_offset((uint32_t*) loc, diff); + emit_veneer(_jit, (void*) entry->value); + break; + case JIT_RELOC_JCC_WITH_VENEER: + patch_veneer_jcc_offset((uint32_t*) loc, diff); + emit_veneer(_jit, (void*) entry->value); + break; + case JIT_RELOC_LOAD_FROM_POOL: + patch_load_from_pool_offset((uint32_t*) loc, diff); + emit_uintptr(_jit, entry->value); + break; + default: + abort(); + } + } + + if (_jit->overflow) + return; + + if (guard == GUARD_NEEDED) + patch_jmp_without_veneer(_jit, patch_loc); + + reset_literal_pool(_jit, _jit->pool); +} +#endif diff --git a/deps/lightening/lightening/mips-cpu.c b/deps/lightening/lightening/mips-cpu.c new file mode 100644 index 0000000..bf8b5ba --- /dev/null +++ b/deps/lightening/lightening/mips-cpu.c @@ -0,0 +1,2674 @@ +/* + * Copyright (C) 2012-2017 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +typedef union { +#if __BYTE_ORDER == __LITTLE_ENDIAN + struct { + uint32_t funct:6; + uint32_t shamt:5; + uint32_t rd:5; + uint32_t rt:5; + uint32_t rs:5; + uint32_t op:6; + } R; + + struct { + int32_t i0:16; + uint32_t rt:5; + uint32_t rs:5; + uint32_t op:6; + } I; + + struct { + uint32_t addr:26; + uint32_t op:6; + } J; +#else + struct { + uint32_t op:6; + uint32_t rs:5; + uint32_t rt:5; + uint32_t rd:5; + uint32_t shamt:5; + uint32_t funct:6; + } R; + + struct { + uint32_t op:6; + uint32_t rs:5; + uint32_t rt:5; + int32_t i0:16; + } I; + + struct { + uint32_t op:6; + uint32_t addr:26; + } J; +#endif + uint32_t w; +} instr_t; + +#define can_sign_extend_short_p(im) ((im) >= -32678 && (im) <= 32767) +#define can_zero_extend_short_p(im) ((im) >= 0 && (im) <= 65535) +#if __WORDSIZE == 32 +#define can_sign_extend_int_p(im) 1 +#define can_zero_extend_int_p(im) 1 +#else +#define can_sign_extend_int_p(im) \ + (((im) >= 0 && (im) <= 0x7fffffffL) || \ + ((im) < 0 && (im) >= -0x80000000L)) +#define can_zero_extend_int_p(im) ((im) >= 0 && (im) <= 0xffffffff) +#endif + +#define simm16_p(i0) ((i0) <= 0x7fff && (i0) >= -0x8000) +#define simm18_p(i0) ((i0) <= 0x1ffff && (i0) >= -0x20000) +#define simm26_p(i0) ((i0) <= 0x1ffffff && (i0) >= -0x2000000) + +#define uimm16_p(i0) (!((i0) & ~0xffff)) +#define uimm18_p(i0) (!((i0) & ~0x3ffff)) +#define uimm26_p(i0) (!((i0) & ~0x3ffffff)) + +#define op_p(op) (!((op) & ~0x3f)) +#define reg_p(r) (!((r) & ~0x1f)) + +#define em_wp(jit, inst) emit_u32_with_pool(jit, inst) + +static uint32_t +Rtype(int32_t op, int32_t rs, int32_t rt, int32_t rd, int32_t shamt, + int32_t funct) +{ + instr_t i; + assert(op_p(op)); + assert(reg_p(rs)); + assert(reg_p(rt)); + assert(reg_p(rd)); + assert(!(shamt & ~0x1f)); + assert(!(funct & ~0x3f)); + i.R.op = op; + i.R.rs = rs; + i.R.rt = rt; + i.R.rd = rd; + i.R.shamt = shamt; + i.R.funct = funct; + return i.w; +} + +static uint32_t +Itype(int32_t op, int32_t rs, int32_t rt, int32_t i0) +{ + instr_t i; + assert(op_p(op)); + assert(reg_p(rs)); + assert(reg_p(rt)); + assert(simm16_p(i0) || uimm16_p(i0)); + i.I.op = op; + i.I.rs = rs; + i.I.rt = rt; + i.I.i0 = i0; + return i.w; +} + +static uint32_t +Jtype(int32_t op, int32_t addr) +{ + instr_t i; + assert(op_p(op)); + assert(simm26_p(addr) || uimm26_p(addr)); + i.J.op = op; + i.J.addr = addr; + return i.w; +} + +/* + * FIXME + */ +#define jit_mips2_p() 0x00 +#define OP_SPECIAL 0x00 +#define OP_REGIMM 0x01 +#define OP_J 0x02 +#define OP_SRL 0x02 +#define OP_JAL 0x03 +#define OP_SRA 0x03 +#define OP_BEQ 0x04 +#define OP_BNE 0x05 +#define OP_BLEZ 0x06 +#define OP_BGTZ 0x07 +#define OP_ADDI 0x08 +#define OP_ADDIU 0x09 +#define OP_SLTI 0x0a +#define OP_SLTIU 0x0b +#define OP_ANDI 0x0c +#define OP_ORI 0x0d +#define OP_XORI 0x0e +#define OP_LUI 0x0f +#define OP_COP0 0x10 +#define OP_COP1 0x11 +#define OP_COP2 0x12 +#define OP_COP1X 0x13 +#define OP_BEQL 0x14 +#define OP_BNEL 0x15 +#define OP_BLEZL 0x16 +#define OP_BGTZL 0x17 +#define OP_DADDI 0x18 +#define OP_DADDIU 0x19 +#define OP_LDL 0x1a +#define OP_LDR 0x1b +#define OP_SPECIAL2 0x1c +#define OP_JALX 0x1d +#define OP_SPECIAL3 0x1f +#define OP_LB 0x20 +#define OP_LH 0x21 +#define OP_LWL 0x22 +#define OP_LW 0x23 +#define OP_LBU 0x24 +#define OP_LHU 0x25 +#define OP_LWR 0x26 +#define OP_LWU 0x27 +#define OP_SB 0x28 +#define OP_SH 0x29 +#define OP_SWL 0x2a +#define OP_SW 0x2b +#define OP_SWR 0x2e +#define OP_CACHE 0x2f +#define OP_LL 0x30 +#define OP_LWC1 0x31 +#define OP_LWC2 0x32 +#define OP_PREF 0x33 +#define OP_LLD 0x34 +#define OP_LDC1 0x35 +#define OP_LDC2 0x36 +#define OP_LD 0x37 +#define OP_SC 0x38 +#define OP_SCD 0x3c +#define OP_SDC1 0x3d +#define OP_SDC2 0x3e +#define OP_SWC1 0x39 +#define OP_SWC2 0x3a +#define OP_SD 0x3f +#define OP_MF 0x00 +#define OP_MFH 0x03 +#define OP_DMF 0x01 +#define OP_CF 0x02 +#define OP_MFH 0x03 +#define OP_MT 0x04 +#define OP_MTH 0x07 +#define OP_DMT 0x05 +#define OP_CT 0x06 +#define OP_MTH 0x07 +#define OP_BC 0x08 +#define OP_WRPGPR 0x0e +#define OP_BGZAL 0x11 +#define OP_MFMC0 0x11 +#define OP_BCF 0x00 +#define OP_BLTZ 0x00 +#define OP_BCT 0x01 +#define OP_BGEZ 0x01 +#define OP_BCFL 0x02 +#define OP_BLTZL 0x02 +#define OP_BCTL 0x03 +#define OP_BGEZL 0x03 +#define OP_TGEI 0x08 +#define OP_TGEIU 0x09 +#define OP_TLTI 0x0a +#define OP_TLTIU 0x0b +#define OP_TEQI 0x0c +#define OP_TNEI 0x0e +#define OP_BLTZAL 0x10 +#define OP_BGEZAL 0x11 +#define OP_BLTZALL 0x12 +#define OP_BGEZALL 0x13 +#define OP_SYNCI 0x1f +#define OP_WSBH 0x02 +#define OP_DBSH 0x02 +#define OP_DSHD 0x05 +#define OP_SEB 0x10 +#define OP_SEH 0x18 +#define OP_MADD 0x00 +#define OP_SLL 0x00 +#define OP_EXT 0x00 +#define OP_DEXTM 0x01 +#define OP_MADDU 0x01 +#define OP_MOVFT 0x01 +#define OP_TLBR 0x01 +#define OP_MUL 0x02 +#define OP_DEXTU 0x02 +#define OP_TLBWI 0x02 +#define OP_DEXT 0x03 +#define OP_SLLV 0x04 +#define OP_INS 0x04 +#define OP_MSUB 0x04 +#define OP_DINSM 0x05 +#define OP_MSUBU 0x05 +#define OP_SRLV 0x06 +#define OP_DINSU 0x06 +#define OP_TLBWR 0x06 +#define OP_SRAV 0x07 +#define OP_DINS 0x07 +#define OP_JR 0x08 +#define OP_TLBP 0x08 +#define OP_JALR 0x09 +#define OP_MOVZ 0x0a +#define OP_MOVN 0x0b +#define OP_SYSCALL 0x0c +#define OP_BREAK 0x0d +#define OP_PREFX 0x0f +#define OP_SYNC 0x0f +#define OP_MFHI 0x10 +#define OP_MTHI 0x11 +#define OP_MFLO 0x12 +#define OP_MTLO 0x13 +#define OP_DSLLV 0x14 +#define OP_DSRLV 0x16 +#define OP_DSRAV 0x17 +#define OP_MULT 0x18 +#define OP_ERET 0x18 +#define OP_MULTU 0x19 +#define OP_DIV 0x1a +#define OP_DIVU 0x1b +#define OP_DMULT 0x1c +#define OP_DMULTU 0x1d +#define OP_DDIV 0x1e +#define OP_DDIVU 0x1f +#define OP_DERET 0x1f +#define OP_ADD 0x20 +#define OP_CLZ 0x20 +#define OP_BSHFL 0x20 +#define OP_ADDU 0x21 +#define OP_CLO 0x21 +#define OP_SUB 0x22 +#define OP_SUBU 0x23 +#define OP_AND 0x24 +#define OP_DCLZ 0x24 +#define OP_DBSHFL 0x24 +#define OP_OR 0x25 +#define OP_DCLO 0x25 +#define OP_XOR 0x26 +#define OP_NOR 0x27 +#define OP_SLT 0x2a +#define OP_SLTU 0x2b +#define OP_PCREL 0x2b +#define OP_DADD 0x2c +#define OP_DADDU 0x2d +#define OP_DSUB 0x2e +#define OP_DSUBU 0x2f +#define OP_TGE 0x30 +#define OP_TGEU 0x31 +#define OP_TLT 0x32 +#define OP_TLTU 0x33 +#define OP_TEQ 0x34 +#define OP_TNE 0x36 +#define OP_DSLL 0x38 +#define OP_DSRL 0x3a +#define OP_DSRA 0x3b +#define OP_DSLL32 0x3c +#define OP_DSRL32 0x3e +#define OP_AUIPC 0x3e +#define OP_DSRA32 0x3f +#define OP_SDBBP 0x3f + +/* + * lightning uses these, but it's not a complete implementation + */ +#define _NOP(i0) Rtype(OP_SPECIAL, 0, 0, 0, 0, OP_SLL) +#define _LUI(rt, i0) Itype(OP_LUI, 0, rt, i0) +#define _ADDU(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_ADDU) +#define _DADDU(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_DADDU) +#define _ADDIU(rt, rs, i0) Itype(OP_ADDIU, rs, rt, i0) +#define _DADDIU(rt, rs, i0) Itype(OP_DADDIU, rs, rt, i0) +#define _SUBU(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_SUBU) +#define _DSUBU(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_DSUBU) +#define _MULT(rs, rt) Rtype(OP_SPECIAL, rs, rt, 00, 00, OP_MULT) +#define _MULTU(rs, rt) Rtype(OP_SPECIAL, rs, rt, 00, 00, OP_MULTU) +#define _DMULT(rs, rt) Rtype(OP_SPECIAL, rs, rt, 00, 00, OP_DMULT) +#define _DMULTU(rs, rt) Rtype(OP_SPECIAL, rs, rt, 00, 00, OP_DMULTU) +#define _DIV(rs, rt) Rtype(OP_SPECIAL, rs, rt, 00, 00, OP_DIV) +#define _DIVU(rs, rt) Rtype(OP_SPECIAL, rs, rt, 00, 00, OP_DIVU) +#define _DDIV(rs, rt) Rtype(OP_SPECIAL, rs, rt, 00, 00, OP_DDIV) +#define _DDIVU(rs, rt) Rtype(OP_SPECIAL, rs, rt, 00, 00, OP_DDIVU) +#define _SLLV(rd, rt, rs) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_SLLV) +#define _SLL(rd, rt, sa) Rtype(OP_SPECIAL, 00, rt, rd, sa, OP_SLL) +#define _DSLLV(rd, rt, rs) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_DSLLV) +#define _DSLL(rd, rt, sa) Rtype(OP_SPECIAL, 00, rt, rd, sa, OP_DSLL) +#define _DSLL32(rd, rt, sa) Rtype(OP_SPECIAL, 00, rt, rd, sa, OP_DSLL32) +#define _SRAV(rd, rt, rs) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_SRAV) +#define _SRA(rd, rt, sa) Rtype(OP_SPECIAL, 00, rt, rd, sa, OP_SRA) +#define _SRLV(rd, rt, rs) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_SRLV) +#define _SRL(rd, rt, sa) Rtype(OP_SPECIAL, 00, rt, rd, sa, OP_SRL) +#define _DSRAV(rd, rt, rs) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_DSRAV) +#define _DSRA(rd, rt, sa) Rtype(OP_SPECIAL, 00, rt, rd, sa, OP_DSRA) +#define _DSRA32(rd, rt, sa) Rtype(OP_SPECIAL, 00, rt, rd, sa, OP_DSRA32) +#define _DSRLV(rd, rt, rs) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_DSRLV) +#define _DSRL(rd, rt, sa) Rtype(OP_SPECIAL, 00, rt, rd, sa, OP_DSRL) +#define _DSRL32(rd, rt, sa) Rtype(OP_SPECIAL, 00, rt, rd, sa, OP_DSRL32) +#define _INS(rt, rs, pos, size) Rtype(OP_SPECIAL3, rs, rt, \ + pos + size - 1, pos, 0x04) +#define _DINS(rt, rs, pos, size) Rtype(OP_SPECIAL3, rs, rt, \ + pos + size - 1, pos, 0x07) +#define _ROTR(rd, rs, sa) Rtype(OP_SPECIAL, 01, rt, rd, sa, OP_SRL) +#define _DROTR(rd, rs, sa) Rtype(OP_SPECIAL, 01, rt, rd, sa, OP_DSRL) +#define _MFHI(rd) Rtype(OP_SPECIAL, 00, 00, rd, 00, OP_MFHI) +#define _MFLO(rd) Rtype(OP_SPECIAL, 00, 00, rd, 00, OP_MFLO) +#define _MTHI(rs) Rtype(OP_SPECIAL, rs, 00, 00, 00, OP_MTHI) +#define _MTLO(rs) Rtype(OP_SPECIAL, rs, 00, 00, 00, OP_MTLO) +#define _AND(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_AND) +#define _ANDI(rt, rs, i0) Itype(OP_ANDI, rs, rt, i0) +#define _OR(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_OR) +#define _ORI(rt, rs, i0) Itype(OP_ORI, rs, rt, i0) +#define _XOR(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 0, OP_XOR) +#define _XORI(rt, rs, i0) Itype(OP_XORI, rs, rt, i0) +#define _LB(rt, of, bs) Itype(OP_LB, bs, rt, of) +#define _LBU(rt, of, bs) Itype(OP_LBU, bs, rt, of) +#define _LH(rt, of, bs) Itype(OP_LH, bs, rt, of) +#define _LHU(rt, of, bs) Itype(OP_LHU, bs, rt, of) +#define _LW(rt, of, bs) Itype(OP_LW, bs, rt, of) +#define _LWU(rt, of, bs) Itype(OP_LWU, bs, rt, of) +#define _LD(rt, of, bs) Itype(OP_LD, bs, rt, of) +#define _SB(rt, of, bs) Itype(OP_SB, bs, rt, of) +#define _SH(rt, of, bs) Itype(OP_SH, bs, rt, of) +#define _SW(rt, of, bs) Itype(OP_SW, bs, rt, of) +#define _SD(rt, of, bs) Itype(OP_SD, bs, rt, of) +#define _WSBH(rd, rt) Rtype(OP_SPECIAL3, 00, rt, rd, OP_WSBH, OP_BSHFL) +#define _SEB(rd, rt) Rtype(OP_SPECIAL3, 00, rt, rd, OP_SEB, OP_BSHFL) +#define _SEH(rd, rt) Rtype(OP_SPECIAL3, 00, rt, rd, OP_SEH, OP_BSHFL) +#define _SLT(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_SLT) +#define _SLTU(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_SLTU) +#define _SLTI(rt, rs, i0) Itype(OP_SLTI, rs, rt, i0) +#define _SLTIU(rt, rs, i0) Itype(OP_SLTIU, rs, rt, i0) +#define _BLTZ(rs, of) Itype(OP_REGIMM, rs, OP_BLTZ, of) +#define _BLEZ(rs, of) Itype(OP_BLEZ, rs, 00, of) +#define _BEQ(rs, rt, of) Itype(OP_BEQ, rs, rt, of) +#define _BGEZ(rs, of) Itype(OP_REGIMM, rs, OP_BGEZ, of) +#define _BGTZ(rs, of) Itype(OP_BGTZ, rs, 00, of) +#define _BNE(rs, rt, of) Itype(OP_BNE, rs, rt, of) +#define _JALR(rd, rs) Rtype(OP_SPECIAL, rs, 00, rd, 00, OP_JALR) +#define _JR(rs) Rtype(OP_SPECIAL, rs, 00, 00, 00, OP_JR) +#define _J(t) Jtype(OP_J, t) +#define _MOVZ(rd, rs, rt) Rtype(OP_SPECIAL, rs, rt, rd, 00, OP_MOVZ) + +#define _SDBBP() Rtype(OP_SPECIAL2, 0, 0, 0, 0, OP_SDBBP) +#define _AUIPC(rs, im) Itype(OP_PCREL, rs, OP_AUIPC, im) +#define _SYNC(st) Rtype(OP_SPECIAL, 0, 0, 0, st, OP_SYNC) +/* + * note: these use pre-release 6 formats, should probably eventually add + * in some detection + */ +#define _LL(rt, of, bs) Itype(OP_LL, bs, rt, of) +#define _LLD(rt, of, bs) Itype(OP_LLD, bs, rt, of) +#define _SC(rt, of, bs) Itype(OP_SC, bs, rt, of) +#define _SCD(rt, of, bs) Itype(OP_SCD, bs, rt, of) +#define _MOVR(rt, rd) _ORI(rt, rd, rn(_ZERO)) + +#if __WORDSIZE == 64 +#define _WADDR(rd, rs, rt) _DADDU(rd, rs, rt) +#define _WADDIU(rd, rs, i0) _DADDIU(rd, rs, i0) +#define _WSUBR(rd, rs, rt) _DSUBU(rd, rs, rt) +#define _WMULT(rs, rt) _DMULT(rs, rt) +#define _WMULTU(rs, rt) _DMULTU(rs, rt) +#define _WDIV(rs, rt) _DDIV(rs, rt) +#define _WDIVU(rs, rt) _DDIVU(rs, rt) +#define _WSLLV(rd, rt, rs) _DSLLV(rd, rt, rs) +#define _WSRAV(rd, rt, rs) _DSRAV(rd, rt, rs) +#define _WSRLV(rd, rt, rs) _DSRLV(rd, rt, rs) +#define _WLD(rt, of, bs) _LD(rt, of, bs) +#define _WLL(rt, of, bs) _LLD(rt, of, bs) +#define _WSC(rt, of, bs) _SCD(rt, of, bs) +#else +#define _WADDR(rd, rs, rt) _ADDU(rd, rs, rt) +#define _WADDIU(rd, rs, i0) _ADDIU(rd, rs, i0) +#define _WSUBR(rd, rs, rt) _SUBU(rd, rs, rt) +#define _WMULT(rs, rt) _MULT(rs, rt) +#define _WMULTU(rs, rt) _MULTU(rs, rt) +#define _WDIV(rs, rt) _DIV(rs, rt) +#define _WDIVU(rs, rt) _DIVU(rs, rt) +#define _WSLLV(rd, rt, rs) _SLLV(rd, rt, rs) +#define _WSRAV(rd, rt, rs) _SRAV(rd, rt, rs) +#define _WSRLV(rd, rt, rs) _SRLV(rd, rt, rs) +#define _WLD(rt, of, bs) _LW(rt, of, bs) +#define _WLL(rt, of, bs) _LL(rt, of, bs) +#define _WSC(rt, of, bs) _SC(rt, of, bs) +#endif + +static void addr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void addi(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); +static void addcr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void addci(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); +static void addxr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void addxi(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); + +static void subr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void subi(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); +static void subcr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void subci(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); +static void subxr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void subxi(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); + +static void mulr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void muli(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); + +static void divr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void divi(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); +static void divr_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void divi_u(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); + +static void remr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void remi(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); +static void remr_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void remi_u(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); + +static void andr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void andi(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); +static void orr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void ori(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0); +static void xorr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void xori(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); +static void lshr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void lshi(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); +static void rshr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void rshi(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); +static void rshr_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void rshi_u(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); + +static void qmulr(jit_state_t * _jit, int32_t r0, int32_t r1, + int32_t r2, int32_t r3); +static void qmulr_u(jit_state_t * _jit, int32_t r0, int32_t r1, + int32_t r2, int32_t r3); +static void qmuli(jit_state_t * _jit, int32_t r0, int32_t r1, + int32_t r2, jit_word_t i0); +static void qmuli_u(jit_state_t * _jit, int32_t r0, int32_t r1, + int32_t r2, jit_word_t i0); + +static void qdivr(jit_state_t * _jit, int32_t r0, int32_t r1, + int32_t r2, int32_t r3); +static void qdivr_u(jit_state_t * _jit, int32_t r0, int32_t r1, + int32_t r2, int32_t r3); +static void qdivi(jit_state_t * _jit, int32_t r0, int32_t r1, + int32_t r2, jit_word_t i0); +static void qdivi_u(jit_state_t * _jit, int32_t r0, int32_t r1, + int32_t r2, jit_word_t i0); + +static void negr(jit_state_t * _jit, int32_t r0, int32_t r1); +static void comr(jit_state_t * _jit, int32_t r0, int32_t r1); + +static void movr(jit_state_t * _jit, int32_t r0, int32_t r1); +static void movi(jit_state_t * _jit, int32_t r0, jit_word_t i0); + +static jit_reloc_t mov_addr(jit_state_t * _jit, int32_t r0); +static jit_reloc_t movi_from_immediate(jit_state_t * _jit, int32_t r0); +static void emit_immediate_reloc(jit_state_t * _jit, int32_t r0, + jit_bool_t in_veneer); + +static void extr_c(jit_state_t * _jit, int32_t r0, int32_t r1); +static void extr_uc(jit_state_t * _jit, int32_t r0, int32_t r1); +static void extr_s(jit_state_t * _jit, int32_t r0, int32_t r1); +static void extr_us(jit_state_t * _jit, int32_t r0, int32_t r1); + +#if __WORDSIZE == 64 +static void extr_i(jit_state_t * _jit, int32_t r0, int32_t r1); +static void extr_ui(jit_state_t * _jit, int32_t r0, int32_t r1); +#endif + +static jit_reloc_t bltr(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t blti(jit_state_t * _jit, int32_t r0, jit_word_t i1); +static jit_reloc_t bltr_u(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t blti_u(jit_state_t * _jit, int32_t r0, jit_word_t i1); +static jit_reloc_t bler(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t blei(jit_state_t * _jit, int32_t r0, jit_word_t i1); +static jit_reloc_t bler_u(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t blei_u(jit_state_t * _jit, int32_t r0, jit_word_t i1); +static jit_reloc_t beqr(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t beqi(jit_state_t * _jit, int32_t r0, jit_word_t i1); +static jit_reloc_t bger(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bgei(jit_state_t * _jit, int32_t r0, jit_word_t i1); +static jit_reloc_t bger_u(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bgei_u(jit_state_t * _jit, int32_t r0, jit_word_t i1); +static jit_reloc_t bgtr(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bgti(jit_state_t * _jit, int32_t r0, jit_word_t i1); +static jit_reloc_t bgtr_u(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bgti_u(jit_state_t * _jit, int32_t r0, jit_word_t i1); +static jit_reloc_t bner(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bnei(jit_state_t * _jit, int32_t r0, jit_word_t i1); + +static jit_reloc_t bmsr(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bmsi(jit_state_t * _jit, int32_t r0, jit_word_t i1); +static jit_reloc_t bmcr(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bmci(jit_state_t * _jit, int32_t r0, jit_word_t i1); +static jit_reloc_t boaddr(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t boaddi(jit_state_t * _jit, int32_t r0, jit_word_t i1); +static jit_reloc_t boaddr_u(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t boaddi_u(jit_state_t * _jit, int32_t r0, jit_word_t i1); +static jit_reloc_t bxaddr(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bxaddi(jit_state_t * _jit, int32_t r0, jit_word_t i1); +static jit_reloc_t bxaddr_u(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bxaddi_u(jit_state_t * _jit, int32_t r0, jit_word_t i1); +static jit_reloc_t bosubr(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bosubi(jit_state_t * _jit, int32_t r0, jit_word_t i1); +static jit_reloc_t bosubr_u(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bosubi_u(jit_state_t * _jit, int32_t r0, jit_word_t i1); +static jit_reloc_t bxsubr(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bxsubi(jit_state_t * _jit, int32_t r0, jit_word_t i1); +static jit_reloc_t bxsubr_u(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bxsubi_u(jit_state_t * _jit, int32_t r0, jit_word_t i1); + +static void str_c(jit_state_t * _jit, int32_t r0, int32_t r1); +static void str_s(jit_state_t * _jit, int32_t r0, int32_t r1); +static void str_i(jit_state_t * _jit, int32_t r0, int32_t r1); +#if __WORDSIZE == 64 +static void str_l(jit_state_t * _jit, int32_t r0, int32_t r1); +#endif + +static void sti_c(jit_state_t * _jit, jit_word_t i0, int32_t r0); +static void sti_s(jit_state_t * _jit, jit_word_t i0, int32_t r0); +static void sti_i(jit_state_t * _jit, jit_word_t i0, int32_t r0); +#if __WORDSIZE == 64 +static void sti_l(jit_state_t * _jit, jit_word_t i0, int32_t r0); +#endif + +static void stxr_c(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void stxr_s(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void stxr_i(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +#if __WORDSIZE == 64 +static void stxr_l(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +#endif + +static void stxi_c(jit_state_t * _jit, jit_word_t i0, int32_t r0, + int32_t r1); +static void stxi_s(jit_state_t * _jit, jit_word_t i0, int32_t r0, + int32_t r1); +static void stxi_i(jit_state_t * _jit, jit_word_t i0, int32_t r0, + int32_t r1); +#if __WORDSIZE == 64 +static void stxi_l(jit_state_t * _jit, jit_word_t i0, int32_t r0, + int32_t r1); +#endif + +static void ldr_c(jit_state_t * _jit, int32_t r0, int32_t r1); +static void ldr_uc(jit_state_t * _jit, int32_t r0, int32_t r1); +static void ldr_s(jit_state_t * _jit, int32_t r0, int32_t r1); +static void ldr_us(jit_state_t * _jit, int32_t r0, int32_t r1); +static void ldr_i(jit_state_t * _jit, int32_t r0, int32_t r1); +#if __WORDSIZE == 64 +static void ldr_ui(jit_state_t * _jit, int32_t r0, int32_t r1); +static void ldr_l(jit_state_t * _jit, int32_t r0, int32_t r1); +#endif + +static void ldi_c(jit_state_t * _jit, int32_t r0, jit_word_t i0); +static void ldi_uc(jit_state_t * _jit, int32_t r0, jit_word_t i0); +static void ldi_s(jit_state_t * _jit, int32_t r0, jit_word_t i0); +static void ldi_us(jit_state_t * _jit, int32_t r0, jit_word_t i0); +static void ldi_i(jit_state_t * _jit, int32_t r0, jit_word_t i0); +#if __WORDSIZE == 64 +static void ldi_ui(jit_state_t * _jit, int32_t r0, jit_word_t i0); +static void ldi_l(jit_state_t * _jit, int32_t r0, jit_word_t i0); +#endif + +static void ldxr_c(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void ldxr_uc(jit_state_t * _jit, int32_t r0, int32_t r1, + int32_t r2); +static void ldxr_s(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void ldxr_us(jit_state_t * _jit, int32_t r0, int32_t r1, + int32_t r2); +static void ldxr_i(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +#if __WORDSIZE == 64 +static void ldxr_ui(jit_state_t * _jit, int32_t r0, int32_t r1, + int32_t r2); +static void ldxr_l(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +#endif + +static void ldxi_c(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); +static void ldxi_uc(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); +static void ldxi_us(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); +static void ldxi_s(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); +static void ldxi_i(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); +#if __WORDSIZE == 64 +static void ldxi_ui(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); +static void ldxi_l(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); +#endif + +static void ret(jit_state_t * _jit); +static void retr(jit_state_t * _jit, int32_t r0); +static void reti(jit_state_t * _jit, jit_word_t i0); +static void retval_c(jit_state_t * _jit, int32_t r0); +static void retval_uc(jit_state_t * _jit, int32_t r0); +static void retval_s(jit_state_t * _jit, int32_t r0); +static void retval_us(jit_state_t * _jit, int32_t r0); +static void retval_i(jit_state_t * _jit, int32_t r0); +#if __WORDSIZE == 64 +static void retval_ui(jit_state_t * _jit, int32_t r0); +static void retval_l(jit_state_t * _jit, int32_t r0); +#endif + +static uint32_t patch_jump(uint32_t inst, int32_t offset); +static jit_reloc_t emit_jump(jit_state_t * _jit, uint32_t inst, uint32_t delay_slot); + +static void callr(jit_state_t * _jit, int32_t r0); +static void calli(jit_state_t * _jit, jit_word_t i0); +static void jmpi_with_link(jit_state_t * _jit, jit_word_t i0); +static void pop_link_register(jit_state_t * _jit); +static void push_link_register(jit_state_t * _jit); +static void jmpr(jit_state_t * _jit, int32_t r0); +static void jmpi(jit_state_t * _jit, jit_word_t i0); +static jit_reloc_t jmp(jit_state_t * _jit); + +static void ldr_atomic(jit_state_t * _jit, int32_t dst, int32_t loc); +static void str_atomic(jit_state_t * _jit, int32_t loc, int32_t val); +static void swap_atomic(jit_state_t * _jit, int32_t dst, int32_t loc, + int32_t val); +static void cas_atomic(jit_state_t * _jit, int32_t dst, int32_t loc, + int32_t expected, int32_t desired); + +static void bswapr_us(jit_state_t * _jit, int32_t r0, int32_t r1); +static void bswapr_ui(jit_state_t * _jit, int32_t r0, int32_t r1); +#if __WORDSIZE == 64 +static void bswapr_ul(jit_state_t * _jit, int32_t r0, int32_t r1); +#endif + +static void nop(jit_state_t * _jit, int32_t i0); +static void breakpoint(jit_state_t * _jit); + +#define rn(x) jit_gpr_regno(x) + +static void +addr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _WADDR(r0, r1, r2)); +} + +static void +addi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) + movr(_jit, r0, r1); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + em_wp(_jit, _WADDR(r0, r1, rn(t0))); + unget_temp_gpr(_jit); + } +} + +static void +addcr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) { + jit_gpr_t t0 = get_temp_gpr(_jit); + em_wp(_jit, _WADDR(rn(t0), r1, r2)); + em_wp(_jit, _SLTU(rn(JIT_CARRY), rn(t0), r1)); + movr(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + } else { + em_wp(_jit, _WADDR(r0, r1, r2)); + em_wp(_jit, _SLTU(rn(JIT_CARRY), r0, r1)); + } +} + +static void +addci(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + if (r0 == r1) { + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _WADDIU(rn(t0), r1, i0)); + else { + movi(_jit, rn(t0), i0); + em_wp(_jit, _WADDR(rn(t0), r1, rn(t0))); + } + em_wp(_jit, _SLTU(rn(JIT_CARRY), rn(t0), r1)); + movr(_jit, r0, rn(t0)); + } else { + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _WADDIU(r0, r1, i0)); + else { + movi(_jit, rn(t0), i0); + em_wp(_jit, _WADDR(r0, r1, rn(t0))); + } + em_wp(_jit, _SLTU(rn(JIT_CARRY), r0, r1)); + } + + unget_temp_gpr(_jit); +} + +static void +addxr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movr(_jit, rn(t0), rn(JIT_CARRY)); + addcr(_jit, r0, r1, r2); + addcr(_jit, r0, r0, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +addxi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movr(_jit, rn(t0), rn(JIT_CARRY)); + addci(_jit, r0, r1, i0); + addcr(_jit, r0, r0, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +subr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _WSUBR(r0, r1, r2)); +} + +static void +subi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) + movr(_jit, r0, r1); + else if (can_sign_extend_short_p(i0) && (i0 & 0xffff) != 0x8000) + em_wp(_jit, _WADDIU(r0, r1, -i0)); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + em_wp(_jit, _WSUBR(r0, r1, rn(t0))); + unget_temp_gpr(_jit); + } +} + +static void +subcr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) { + jit_gpr_t t0 = get_temp_gpr(_jit); + em_wp(_jit, _WSUBR(rn(t0), r1, r2)); + em_wp(_jit, _SLTU(rn(JIT_CARRY), r1, rn(t0))); + movr(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + } else { + em_wp(_jit, _WSUBR(r0, r1, r2)); + em_wp(_jit, _SLTU(rn(JIT_CARRY), r1, r0)); + } +} + +static void +subci(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + if (r0 == r1) { + if (can_sign_extend_short_p(i0) && (i0 & 0xffff) != 0x8000) + em_wp(_jit, _WADDIU(rn(t0), r1, -i0)); + else { + movi(_jit, rn(t0), i0); + em_wp(_jit, _WSUBR(rn(t0), r1, rn(t0))); + } + em_wp(_jit, _SLTU(rn(JIT_CARRY), r1, rn(t0))); + movr(_jit, r0, rn(t0)); + } else { + if (can_sign_extend_short_p(i0) && (i0 & 0xffff) != 0x8000) + em_wp(_jit, _WADDIU(r0, r1, -i0)); + else { + movi(_jit, rn(t0), i0); + em_wp(_jit, _WSUBR(r0, r1, rn(t0))); + } + em_wp(_jit, _SLTU(rn(JIT_CARRY), r1, r0)); + } + unget_temp_gpr(_jit); +} + +static void +subxr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movr(_jit, rn(t0), rn(JIT_CARRY)); + subcr(_jit, r0, r1, r2); + subcr(_jit, r0, r0, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +subxi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movr(_jit, rn(t0), rn(JIT_CARRY)); + subci(_jit, r0, r1, i0); + subcr(_jit, r0, r0, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +mulr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _WMULTU(r1, r2)); + em_wp(_jit, _MFLO(r0)); +} + +static void +muli(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + mulr(_jit, r0, r1, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +divr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _WDIV(r1, r2)); + em_wp(_jit, _MFLO(r0)); +} + +static void +divi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + divr(_jit, r0, r1, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +divr_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _WDIVU(r1, r2)); + em_wp(_jit, _MFLO(r0)); +} + +static void +divi_u(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + divr_u(_jit, r0, r1, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +remr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _WDIV(r1, r2)); + em_wp(_jit, _MFHI(r0)); +} + +static void +remi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + remr(_jit, r0, r1, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +remr_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _WDIVU(r1, r2)); + em_wp(_jit, _MFHI(r0)); +} + +static void +remi_u(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + remr_u(_jit, r0, r1, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +lshr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _WSLLV(r0, r1, r2)); +} + +static void +rshr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _WSRAV(r0, r1, r2)); +} + +static void +rshr_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _WSRLV(r0, r1, r2)); +} + +#if __WORDSIZE == 64 +static void +lshi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + assert(i0 >= 0 && i0 <= 63); + if (i0 < 32) + em_wp(_jit, _DSLL(r0, r1, i0)); + else + em_wp(_jit, _DSLL32(r0, r1, i0 - 32)); +} + +static void +rshi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + assert(i0 >= 0 && i0 <= 63); + if (i0 < 32) + em_wp(_jit, _DSRA(r0, r1, i0)); + else + em_wp(_jit, _DSRA32(r0, r1, i0 - 32)); +} + +static void +rshi_u(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + assert(i0 >= 0 && i0 <= 63); + if (i0 < 32) + em_wp(_jit, _DSRL(r0, r1, i0)); + else + em_wp(_jit, _DSRL32(r0, r1, i0 - 32)); +} +#else +static void +lshi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + em_wp(_jit, _SLL(r0, r1, i0)); +} + +static void +rshi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + em_wp(_jit, _SRA(r0, r1, i0)); +} + +static void +rshi_u(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + em_wp(_jit, _SRL(r0, r1, i0)); +} +#endif + +static void +iqmulr(jit_state_t * _jit, int32_t r0, int32_t r1, + int32_t r2, int32_t r3, jit_bool_t sign) +{ + if (sign) + em_wp(_jit, _WMULT(r2, r3)); + else + em_wp(_jit, _WMULTU(r2, r3)); + + em_wp(_jit, _MFLO(r0)); + em_wp(_jit, _MFHI(r1)); +} + +static void +iqmuli(jit_state_t * _jit, int32_t r0, int32_t r1, + int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + iqmulr(_jit, r0, r1, r2, rn(t0), sign); + unget_temp_gpr(_jit); +} + +static void +qmulr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + iqmulr(_jit, r0, r1, r2, r3, 1); +} + +static void +qmulr_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + iqmulr(_jit, r0, r1, r2, r3, 0); +} + +static void +qmuli(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2, + jit_word_t i0) +{ + iqmuli(_jit, r0, r1, r2, i0, 1); +} + +static void +qmuli_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2, + jit_word_t i0) +{ + iqmuli(_jit, r0, r1, r2, i0, 0); +} + +static void +iqdivr(jit_state_t * _jit, int32_t r0, int32_t r1, + int32_t r2, int32_t r3, jit_bool_t sign) +{ + if (sign) + em_wp(_jit, _WDIV(r2, r3)); + else + em_wp(_jit, _WDIVU(r2, r3)); + + em_wp(_jit, _MFLO(r0)); + em_wp(_jit, _MFHI(r1)); +} + +static void +iqdivi(jit_state_t * _jit, int32_t r0, int32_t r1, + int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + iqdivr(_jit, r0, r1, r2, rn(t0), sign); + unget_temp_gpr(_jit); +} + +static void +qdivr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + iqdivr(_jit, r0, r1, r2, r3, 1); +} + +static void +qdivr_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + iqdivr(_jit, r0, r1, r2, r3, 0); +} + +static void +qdivi(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2, + jit_word_t i0) +{ + iqdivi(_jit, r0, r1, r2, i0, 1); +} + +static void +qdivi_u(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2, + jit_word_t i0) +{ + iqdivi(_jit, r0, r1, r2, i0, 0); +} + +static void +negr(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + subr(_jit, r0, rn(_ZERO), r1); +} + +static void +comr(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + xori(_jit, r0, r1, -1); +} + +static void +andr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _AND(r0, r1, r2)); +} + +static void +andi(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_zero_extend_short_p(i0)) + em_wp(_jit, _ANDI(r0, r1, i0)); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + em_wp(_jit, _AND(r0, r1, rn(t0))); + unget_temp_gpr(_jit); + } +} + +static void +orr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _OR(r0, r1, r2)); +} + +static void +ori(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_zero_extend_short_p(i0)) + em_wp(_jit, _ORI(r0, r1, i0)); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + orr(_jit, r0, r1, rn(t0)); + unget_temp_gpr(_jit); + } +} + +static void +xorr(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _XOR(r0, r1, r2)); +} + +static void +xori(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_zero_extend_short_p(i0)) + em_wp(_jit, _XORI(r0, r1, i0 & 0xffff)); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + xorr(_jit, r0, r1, rn(t0)); + unget_temp_gpr(_jit); + } +} + +static void +movr(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + if (r0 != r1) + em_wp(_jit, _OR(r0, r1, rn(_ZERO))); +} + +static void +movi(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + if (i0 == 0) + em_wp(_jit, _OR(r0, rn(_ZERO), rn(_ZERO))); + else if (can_sign_extend_short_p(i0)) + em_wp(_jit, _WADDIU(r0, rn(_ZERO), i0)); + else if (can_zero_extend_short_p(i0)) + em_wp(_jit, _ORI(r0, rn(_ZERO), i0)); + else { + if (can_sign_extend_int_p(i0)) + em_wp(_jit, _LUI(r0, i0 >> 16)); + else if (can_zero_extend_int_p(i0)) { + if (i0 & 0xffff0000) { + em_wp(_jit, _ORI(r0, rn(_ZERO), (i0 >> 16) & 0xffff)); + lshi(_jit, r0, r0, 16); + } + } +#if __WORDSIZE == 64 + else { + movi(_jit, r0, (jit_uword_t) i0 >> 32); + if (i0 & 0xffff0000) { + lshi(_jit, r0, r0, 16); + em_wp(_jit, _ORI(r0, r0, (i0 >> 16) & 0xffff)); + lshi(_jit, r0, r0, 16); + } else + lshi(_jit, r0, r0, 32); + } +#endif + if (i0 & 0xffff) + em_wp(_jit, _ORI(r0, r0, i0 & 0xffff)); + } +} + +typedef struct { +#if __WORDSIZE == 64 + instr_t lui; + instr_t ori2; + instr_t dsl1; + instr_t ori1; + instr_t dsl0; +#else + instr_t lui; +#endif + instr_t ori0; +} immediate_t; + +/* + * TODO: does this work for both BE and LE? + */ +static void +patch_immediate_reloc(uint32_t * loc, jit_pointer_t addr) +{ + immediate_t *i = (immediate_t *) loc; + jit_word_t a = (jit_word_t) addr; +#if __WORDSIZE == 64 + i->lui.I.i0 = a >> 48; + i->ori2.I.i0 = a >> 32; + i->ori1.I.i0 = a >> 16; +#else + i->lui.I.i0 = a >> 16; +#endif + i->ori0.I.i0 = a & 0xffff; +} + +static void +emit_immediate_reloc(jit_state_t * _jit, int32_t r0, jit_bool_t in_veneer) +{ + void (*emit)(jit_state_t * _jit, uint32_t u32) = + in_veneer ? emit_u32 : emit_u32_with_pool; + +#if __WORDSIZE == 64 + emit(_jit, _LUI(r0, 0)); + emit(_jit, _ORI(r0, r0, 0)); + emit(_jit, _DSLL(r0, r0, 16)); + emit(_jit, _ORI(r0, r0, 0)); + emit(_jit, _DSLL(r0, r0, 16)); +#else + emit(_jit, _LUI(r0, 0)); +#endif + emit(_jit, _ORI(r0, r0, 0)); +} + +static jit_reloc_t +movi_from_immediate(jit_state_t * _jit, int32_t r0) +{ + uint8_t *pc_base = _jit->pc.uc; + jit_reloc_t w = + jit_reloc(_jit, JIT_RELOC_IMMEDIATE, 0, _jit->pc.uc, pc_base, 0); + emit_immediate_reloc(_jit, r0, 0); + + return w; +} + +static jit_reloc_t +mov_addr(jit_state_t * _jit, int32_t r0) +{ + return movi_from_immediate(_jit, r0); +} + +static void +ldr_c(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _LB(r0, 0, r1)); +} + +static void +ldr_uc(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _LBU(r0, 0, r1)); +} + +static void +ldr_s(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _LH(r0, 0, r1)); +} + +static void +ldr_us(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _LHU(r0, 0, r1)); +} + +static void +ldr_i(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _LW(r0, 0, r1)); +} + +#if __WORDSIZE == 64 +static void +ldr_ui(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _LWU(r0, 0, r1)); +} + +static void +ldr_l(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _LD(r0, 0, r1)); +} +#endif + +static void +ldi_c(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _LB(r0, i0, rn(_ZERO))); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + ldr_c(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + } +} + +static void +ldi_uc(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _LBU(r0, i0, rn(_ZERO))); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + ldr_uc(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + } +} + +static void +ldi_s(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _LH(r0, i0, rn(_ZERO))); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + ldr_s(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + } +} + +static void +ldi_us(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _LHU(r0, i0, rn(_ZERO))); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + ldr_us(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + } +} + +static void +ldi_i(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _LW(r0, i0, rn(_ZERO))); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + ldr_i(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + } +} + +#if __WORDSIZE == 64 +static void +ldi_ui(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _LWU(r0, i0, rn(_ZERO))); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + ldr_ui(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + } +} + +static void +ldi_l(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _LD(r0, i0, rn(_ZERO))); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + ldr_l(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + } +} +#endif + +static void +ldxr_c(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + em_wp(_jit, _WADDR(rn(t0), r1, r2)); + ldr_c(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +ldxi_c(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _LB(r0, i0, r1)); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + addi(_jit, rn(t0), r1, i0); + ldr_c(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_uc(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + em_wp(_jit, _WADDR(rn(t0), r1, r2)); + ldr_uc(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +ldxi_uc(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _LBU(r0, i0, r1)); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + addi(_jit, rn(t0), r1, i0); + ldr_uc(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_s(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + em_wp(_jit, _WADDR(rn(t0), r1, r2)); + ldr_s(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +ldxi_s(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _LH(r0, i0, r1)); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + addi(_jit, rn(t0), r1, i0); + ldr_s(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_us(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + em_wp(_jit, _WADDR(rn(t0), r1, r2)); + ldr_us(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +ldxi_us(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _LHU(r0, i0, r1)); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + addi(_jit, rn(t0), r1, i0); + ldr_us(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_i(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + em_wp(_jit, _WADDR(rn(t0), r1, r2)); + ldr_i(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +ldxi_i(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _LW(r0, i0, r1)); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + addi(_jit, rn(t0), r1, i0); + ldr_i(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + } +} + +#if __WORDSIZE == 64 +static void +ldxr_ui(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + em_wp(_jit, _WADDR(rn(t0), r1, r2)); + ldr_ui(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +ldxi_ui(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _LWU(r0, i0, r1)); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + addi(_jit, rn(t0), r1, i0); + ldr_ui(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_l(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + em_wp(_jit, _WADDR(rn(t0), r1, r2)); + ldr_l(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +ldxi_l(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _LD(r0, i0, r1)); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + addi(_jit, rn(t0), r1, i0); + ldr_l(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + } +} +#endif + +static void +str_c(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _SB(r1, 0, r0)); +} + +static void +str_s(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _SH(r1, 0, r0)); +} + +static void +str_i(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _SW(r1, 0, r0)); +} + +#if __WORDSIZE == 64 +static void +str_l(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _SD(r1, 0, r0)); +} +#endif + +static void +sti_c(jit_state_t * _jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _SB(r0, i0, rn(_ZERO))); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + str_c(_jit, rn(t0), r0); + unget_temp_gpr(_jit); + } +} + +static void +sti_s(jit_state_t * _jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _SH(r0, i0, rn(_ZERO))); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + str_s(_jit, rn(t0), r0); + unget_temp_gpr(_jit); + } +} + +static void +sti_i(jit_state_t * _jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _SW(r0, i0, rn(_ZERO))); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + str_i(_jit, rn(t0), r0); + unget_temp_gpr(_jit); + } +} + +#if __WORDSIZE == 64 +static void +sti_l(jit_state_t * _jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _SD(r0, i0, rn(_ZERO))); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + str_l(_jit, rn(t0), r0); + unget_temp_gpr(_jit); + } +} +#endif + +static void +stxr_c(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + em_wp(_jit, _WADDR(rn(t0), r0, r1)); + str_c(_jit, rn(t0), r2); + unget_temp_gpr(_jit); +} + +static void +stxi_c(jit_state_t * _jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _SB(r1, i0, r0)); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + addi(_jit, rn(t0), r0, i0); + str_c(_jit, rn(t0), r1); + unget_temp_gpr(_jit); + } +} + +static void +stxr_s(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + em_wp(_jit, _WADDR(rn(t0), r0, r1)); + str_s(_jit, rn(t0), r2); + unget_temp_gpr(_jit); +} + +static void +stxi_s(jit_state_t * _jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _SH(r1, i0, r0)); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + addi(_jit, rn(t0), r0, i0); + str_s(_jit, rn(t0), r1); + unget_temp_gpr(_jit); + } +} + +static void +stxr_i(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + em_wp(_jit, _WADDR(rn(t0), r0, r1)); + str_i(_jit, rn(t0), r2); + unget_temp_gpr(_jit); +} + +static void +stxi_i(jit_state_t * _jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _SW(r1, i0, r0)); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + addi(_jit, rn(t0), r0, i0); + str_i(_jit, rn(t0), r1); + unget_temp_gpr(_jit); + } +} + +#if __WORDSIZE == 64 +static void +stxr_l(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + em_wp(_jit, _WADDR(rn(t0), r0, r1)); + str_l(_jit, rn(t0), r2); + unget_temp_gpr(_jit); +} + +static void +stxi_l(jit_state_t * _jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _SD(r1, i0, r0)); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + addi(_jit, rn(t0), r0, i0); + str_l(_jit, rn(t0), r1); + unget_temp_gpr(_jit); + } +} +#endif + +static void +bswapr_us(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + rshi(_jit, rn(t0), r1, 8); + andi(_jit, r0, r1, 0xff); + andi(_jit, rn(t0), rn(t0), 0xff); + lshi(_jit, r0, r0, 8); + orr(_jit, r0, r0, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +bswapr_ui(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + jit_gpr_t t1 = get_temp_gpr(_jit); + jit_gpr_t t2 = get_temp_gpr(_jit); + rshi(_jit, rn(t0), r1, 24); + rshi(_jit, rn(t1), r1, 16); + rshi(_jit, rn(t2), r1, 8); + andi(_jit, rn(t0), rn(t0), 0xff); + andi(_jit, rn(t1), rn(t1), 0xff); + andi(_jit, rn(t2), rn(t2), 0xff); + andi(_jit, r0, r1, 0xff); + lshi(_jit, r0, r0, 24); + lshi(_jit, rn(t1), rn(t1), 8); + orr(_jit, r0, r0, rn(t0)); + lshi(_jit, rn(t2), rn(t2), 16); + orr(_jit, r0, r0, rn(t1)); + orr(_jit, r0, r0, rn(t2)); + unget_temp_gpr(_jit); + unget_temp_gpr(_jit); + unget_temp_gpr(_jit); +} + +#if __WORDSIZE == 64 +static void +bswapr_ul(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + rshi_u(_jit, rn(t0), r1, 32); + bswapr_ui(_jit, r0, r1); + bswapr_ui(_jit, rn(t0), rn(t0)); + lshi(_jit, r0, r0, 32); + orr(_jit, r0, r0, rn(t0)); + unget_temp_gpr(_jit); +} +#endif + +static void +extr_uc(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _ANDI(r0, r1, 0xff)); +} + +static void +extr_us(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _ANDI(r0, r1, 0xffff)); +} + +static void +extr_c(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + if (jit_mips2_p()) + em_wp(_jit, _SEB(r0, r1)); + else { + lshi(_jit, r0, r1, __WORDSIZE - 8); + rshi(_jit, r0, r0, __WORDSIZE - 8); + } +} + +static void +extr_s(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + if (jit_mips2_p()) + em_wp(_jit, _SEH(r0, r1)); + else { + lshi(_jit, r0, r1, __WORDSIZE - 16); + rshi(_jit, r0, r0, __WORDSIZE - 16); + } +} + +#if __WORDSIZE == 64 +static void +extr_i(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _SLL(r0, r1, 0)); +} + +static void +extr_ui(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + lshi(_jit, r0, r1, 32); + rshi_u(_jit, r0, r0, 32); +} +#endif + +static jit_reloc_t +bltr(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + em_wp(_jit, _SLT(rn(t0), r0, r1)); + jit_reloc_t w = emit_jump(_jit, _BNE(rn(t0), rn(_ZERO), 0), _NOP(1)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bltr_u(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + em_wp(_jit, _SLTU(rn(t0), r0, r1)); + jit_reloc_t w = emit_jump(_jit, _BNE(rn(t0), rn(_ZERO), 0), _NOP(1)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +blti(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + jit_reloc_t w = bltr(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +blti_u(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + jit_reloc_t w = bltr_u(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bler(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + return bger(_jit, r1, r0); +} + +static jit_reloc_t +bler_u(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + return bger_u(_jit, r1, r0); +} + +static jit_reloc_t +blei(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + jit_reloc_t w = bler(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +blei_u(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + jit_reloc_t w = bler_u(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +beqr(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_reloc_t w = emit_jump(_jit, _BEQ(r0, r1, 0), _NOP(1)); + return w; +} + +static jit_reloc_t +beqi(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + jit_reloc_t w = beqr(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bger(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + em_wp(_jit, _SLT(rn(t0), r0, r1)); + jit_reloc_t w = beqr(_jit, rn(t0), rn(_ZERO)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bger_u(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + em_wp(_jit, _SLTU(rn(t0), r0, r1)); + jit_reloc_t w = beqr(_jit, rn(t0), rn(_ZERO)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bgei(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + jit_reloc_t w = bger(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bgei_u(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + jit_reloc_t w = bger_u(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bgtr(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + return bltr(_jit, r1, r0); +} + +static jit_reloc_t +bgtr_u(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + return bltr_u(_jit, r1, r0); +} + +static jit_reloc_t +bgti(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + jit_reloc_t w = bgtr(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bgti_u(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + jit_reloc_t w = bgtr_u(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bner(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_reloc_t w = emit_jump(_jit, _BNE(r0, r1, 0), _NOP(1)); + + return (w); +} + +static jit_reloc_t +bnei(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + jit_reloc_t w = bner(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + + return (w); +} + +static int32_t +read_jmp_offset(uint32_t * loc) +{ + instr_t *i = (instr_t *) loc; + return i->I.i0 + 1; +} + +static int32_t +read_jcc_offset(uint32_t * loc) +{ + return read_jmp_offset(loc); +} + +static void +patch_jmp_offset(uint32_t * loc, ptrdiff_t offset) +{ + assert(simm16_p(offset - 1)); + instr_t *i = (instr_t *) loc; + i->I.i0 = offset - 1; +} + +static void +patch_jcc_offset(uint32_t * loc, ptrdiff_t offset) +{ + patch_jmp_offset(loc, offset); +} + +static void +patch_veneer_jmp_offset(uint32_t * loc, ptrdiff_t offset) +{ + patch_jmp_offset(loc, offset); +} + +static void +patch_veneer(uint32_t * loc, jit_pointer_t addr) +{ + patch_immediate_reloc(loc, addr); +} + +static void +emit_veneer(jit_state_t * _jit, jit_pointer_t target) +{ + jit_pointer_t veneer = jit_address(_jit); + emit_immediate_reloc(_jit, rn(_AT), 1); + + /* in some rare cases we can run into overflow in emit_immediate_reloc, + * and since patch_veneer uses patch_immediate_reloc it assumes all + * instructions are available */ + if (!jit_has_overflow(_jit)) + patch_veneer(veneer, target); + + emit_u32(_jit, _JR(rn(_AT))); + /* branch delay slot */ + emit_u32(_jit, _NOP(1)); +} + +static void +patch_veneer_jcc_offset(uint32_t * loc, ptrdiff_t offset) +{ + patch_jcc_offset(loc, offset); +} + +static int +offset_in_jmp_range(ptrdiff_t offset, int flags) +{ + (void) flags; + return simm16_p(offset - 1); +} + +static int +offset_in_jcc_range(ptrdiff_t offset, int flags) +{ + return offset_in_jmp_range(offset, flags); +} + +static uint32_t +patch_jump(uint32_t inst, int32_t offset) +{ + instr_t i; + i.w = inst; + i.I.i0 = offset - 1; + return i.w; +} + +static jit_reloc_t +emit_jump(jit_state_t * _jit, uint32_t inst, uint32_t delay_slot) +{ + while (1) { + uint8_t *pc_base = _jit->pc.uc; + int32_t off = ((uint8_t *)jit_address(_jit)) - pc_base; + jit_reloc_t w = + jit_reloc(_jit, JIT_RELOC_JMP_WITH_VENEER, 0, _jit->pc.uc, + pc_base, + 2); + uint8_t jump_width = 16; + + if (add_pending_literal(_jit, w, jump_width - 1)) { + emit_u32(_jit, patch_jump(inst, off >> 2)); + em_wp(_jit, delay_slot); + return w; + } + } +} + +static void +jmpr(jit_state_t * _jit, int32_t r0) +{ + emit_u32(_jit, _JR(r0)); + em_wp(_jit, _NOP(1)); +} + +static void +jmpi_with_link(jit_state_t * _jit, jit_word_t i0) +{ + calli(_jit, i0); +} + +static void +jmpi(jit_state_t * _jit, jit_word_t i0) +{ + if (((_jit->pc.w + sizeof(int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) { + emit_u32(_jit, _J((i0 & ~0xf0000000) >> 2)); + em_wp(_jit, _NOP(1)); + } else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + jmpr(_jit, rn(t0)); + unget_temp_gpr(_jit); + } +} + +static void +pop_link_register(jit_state_t * _jit) +{ + (void) _jit; /* unused */ +} + +static void +push_link_register(jit_state_t * _jit) +{ + (void) _jit; /* unused */ +} + +static jit_reloc_t +jmp(jit_state_t * _jit) +{ + /* + * BEQ works as unconditional jump in this case, J and the like aren't + * PC relative + */ + jit_reloc_t w = emit_jump(_jit, _BEQ(rn(_ZERO), rn(_ZERO), 0), _NOP(1)); + + return (w); +} + +static jit_reloc_t +boaddr(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + /* + * t1 = r0 + r1; overflow = r1 < 0 ? r0 < t1 : t1 < r0 + */ + jit_gpr_t t0 = get_temp_gpr(_jit); + jit_gpr_t t1 = get_temp_gpr(_jit); + jit_gpr_t t2 = get_temp_gpr(_jit); + + em_wp(_jit, _SLT(rn(t0), r1, rn(_ZERO))); + + addr(_jit, rn(t1), r0, r1); + + em_wp(_jit, _SLT(rn(t2), rn(t1), r0)); + em_wp(_jit, _SLT(rn(t1), r0, rn(t1))); + em_wp(_jit, _MOVZ(rn(t1), rn(t2), rn(t0))); + jit_reloc_t w = emit_jump(_jit, _BNE(rn(_ZERO), rn(t1), 0), _WADDR(r0, r0, r1)); + + unget_temp_gpr(_jit); + unget_temp_gpr(_jit); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +boaddi(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + jit_reloc_t w = boaddr(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +boaddr_u(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + jit_gpr_t t1 = get_temp_gpr(_jit); + + addr(_jit, rn(t0), r0, r1); + + em_wp(_jit, _SLTU(rn(t1), rn(t0), r0)); + jit_reloc_t w = emit_jump(_jit, _BNE(rn(_ZERO), rn(t1), 0), _MOVR(r0, rn(t0))); + + unget_temp_gpr(_jit); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +boaddi_u(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + jit_reloc_t w = boaddr_u(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bxaddr(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + /* + * t1 = r0 + r1; overflow = r1 < 0 ? r0 < t1 : t1 < r0 + */ + jit_gpr_t t0 = get_temp_gpr(_jit); + jit_gpr_t t1 = get_temp_gpr(_jit); + jit_gpr_t t2 = get_temp_gpr(_jit); + + addr(_jit, rn(t0), r0, r1); + em_wp(_jit, _SLTI(rn(t1), r1, 0)); + em_wp(_jit, _SLT(rn(t2), rn(t0), r0)); + jit_reloc_t w = emit_jump(_jit, _BEQ(rn(t1), rn(t2), 0), _MOVR(r0, rn(t0))); + + unget_temp_gpr(_jit); + unget_temp_gpr(_jit); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bxaddi(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + jit_reloc_t w = bxaddr(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bxaddr_u(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + jit_gpr_t t1 = get_temp_gpr(_jit); + + addr(_jit, rn(t0), r0, r1); + + em_wp(_jit, _SLTU(rn(t1), rn(t0), r0)); + jit_reloc_t w = emit_jump(_jit, _BEQ(rn(_ZERO), rn(t1), 0), _MOVR(r0, rn(t0))); + + unget_temp_gpr(_jit); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bxaddi_u(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + jit_reloc_t w = bxaddr_u(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bosubr(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + /* + * t1 = r0 - r1; overflow = 0 < r1 ? r0 < t1 : t1 < r0 + */ + jit_gpr_t t0 = get_temp_gpr(_jit); + jit_gpr_t t1 = get_temp_gpr(_jit); + jit_gpr_t t2 = get_temp_gpr(_jit); + + subr(_jit, rn(t0), r0, r1); + + em_wp(_jit, _SLTI(rn(t1), r1, 0)); + em_wp(_jit, _SLT(rn(t2), r0, rn(t0))); + jit_reloc_t w = emit_jump(_jit, _BNE(rn(t1), rn(t2), 0), _MOVR(r0, rn(t0))); + + unget_temp_gpr(_jit); + unget_temp_gpr(_jit); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bosubi(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + jit_reloc_t w = bosubr(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bosubr_u(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + jit_gpr_t t1 = get_temp_gpr(_jit); + + subr(_jit, rn(t0), r0, r1); + + em_wp(_jit, _SLTU(rn(t1), r0, rn(t0))); + jit_reloc_t w = emit_jump(_jit, _BNE(rn(_ZERO), rn(t1), 0), _MOVR(r0, rn(t0))); + + unget_temp_gpr(_jit); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bosubi_u(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + jit_reloc_t w = bosubr_u(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bxsubr(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + jit_gpr_t t1 = get_temp_gpr(_jit); + jit_gpr_t t2 = get_temp_gpr(_jit); + + subr(_jit, rn(t0), r0, r1); + + em_wp(_jit, _SLTI(rn(t1), r1, 0)); + em_wp(_jit, _SLT(rn(t2), r0, rn(t0))); + jit_reloc_t w = emit_jump(_jit, _BEQ(rn(t1), rn(t2), 0), _MOVR(r0, rn(t0))); + + unget_temp_gpr(_jit); + unget_temp_gpr(_jit); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bxsubi(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + jit_reloc_t w = bxsubr(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bxsubr_u(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + jit_gpr_t t1 = get_temp_gpr(_jit); + + subr(_jit, rn(t0), r0, r1); + + em_wp(_jit, _SLTU(rn(t1), r0, rn(t0))); + jit_reloc_t w = emit_jump(_jit, _BEQ(rn(_ZERO), rn(t1), 0), _MOVR(r0, rn(t0))); + + unget_temp_gpr(_jit); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bxsubi_u(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + jit_reloc_t w = bxsubr_u(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bmsr(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + andr(_jit, rn(t0), r0, r1); + jit_reloc_t w = bner(_jit, rn(t0), rn(_ZERO)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bmsi(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + andi(_jit, rn(t0), r0, i0); + jit_reloc_t w = bner(_jit, rn(t0), rn(_ZERO)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bmcr(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + andr(_jit, rn(t0), r0, r1); + jit_reloc_t w = beqr(_jit, rn(t0), rn(_ZERO)); + unget_temp_gpr(_jit); + + return (w); +} + +static jit_reloc_t +bmci(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + andi(_jit, rn(t0), r0, i0); + jit_reloc_t w = beqr(_jit, rn(t0), rn(_ZERO)); + unget_temp_gpr(_jit); + + return (w); +} + +static void +callr(jit_state_t * _jit, int32_t r0) +{ + if (r0 != rn(_T9)) + movr(_jit, rn(_T9), r0); + + emit_u32(_jit, _JALR(rn(_RA), rn(_T9))); + em_wp(_jit, _NOP(1)); +} + +static void +calli(jit_state_t * _jit, jit_word_t i0) +{ + movi(_jit, rn(_T9), i0); + emit_u32(_jit, _JALR(rn(_RA), rn(_T9))); + em_wp(_jit, _NOP(1)); +} + +static void +ret(jit_state_t * _jit) +{ + emit_u32(_jit, _JR(rn(_RA))); + em_wp(_jit, _NOP(1)); +} + +static void +retr(jit_state_t * _jit, int32_t r0) +{ + if (r0 != rn(_V0)) + movr(_jit, rn(_V0), r0); + + ret(_jit); +} + +static void +reti(jit_state_t * _jit, jit_word_t i0) +{ + movi(_jit, rn(_V0), i0); + ret(_jit); +} + +static void +retval_c(jit_state_t * _jit, int32_t r0) +{ + extr_c(_jit, r0, rn(_V0)); +} + +static void +retval_uc(jit_state_t * _jit, int32_t r0) +{ + extr_uc(_jit, r0, rn(_V0)); +} + +static void +retval_s(jit_state_t * _jit, int32_t r0) +{ + extr_s(_jit, r0, rn(_V0)); +} + +static void +retval_us(jit_state_t * _jit, int32_t r0) +{ + extr_us(_jit, r0, rn(_V0)); +} + +static void +retval_i(jit_state_t * _jit, int32_t r0) +{ +#if __WORDSIZE == 64 + extr_i(_jit, r0, rn(_V0)); +#else + if (r0 != rn(_V0)) + movr(_jit, r0, rn(_V0)); +#endif +} + +#if __WORDSIZE == 64 +static void +retval_ui(jit_state_t * _jit, int32_t r0) +{ + extr_ui(_jit, r0, rn(_V0)); +} + +static void +retval_l(jit_state_t * _jit, int32_t r0) +{ + if (r0 != rn(_V0)) + movr(_jit, r0, rn(_V0)); +} +#endif + +static void +ldr_atomic(jit_state_t * _jit, int32_t dst, int32_t loc) +{ + em_wp(_jit, _SYNC(0x00)); +#if __WORDSIZE == 64 + ldr_l(_jit, dst, loc); +#else + ldr_i(_jit, dst, loc); +#endif + em_wp(_jit, _SYNC(0x00)); +} + +static void +str_atomic(jit_state_t * _jit, int32_t loc, int32_t val) +{ + em_wp(_jit, _SYNC(0x00)); +#if __WORDSIZE == 64 + str_l(_jit, loc, val); +#else + str_i(_jit, loc, val); +#endif + em_wp(_jit, _SYNC(0x00)); +} + +static void +swap_atomic(jit_state_t * _jit, int32_t dst, int32_t loc, int32_t val) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + jit_gpr_t t1 = loc == dst ? get_temp_gpr(_jit) : JIT_GPR(loc); + jit_gpr_t t2 = val == dst ? get_temp_gpr(_jit) : JIT_GPR(val); + + movr(_jit, rn(t1), loc); + movr(_jit, rn(t2), val); + em_wp(_jit, _SYNC(0x00)); + + void *retry = jit_address(_jit); + movr(_jit, rn(t0), rn(t2)); + em_wp(_jit, _WLL(dst, 0, rn(t1))); + em_wp(_jit, _WSC(rn(t0), 0, rn(t1))); + jit_patch_there(_jit, beqr(_jit, rn(t0), rn(_ZERO)), retry); + + em_wp(_jit, _SYNC(0x00)); + + if (val == dst) unget_temp_gpr(_jit); + if (loc == dst) unget_temp_gpr(_jit); + unget_temp_gpr(_jit); +} + +static void +cas_atomic(jit_state_t * _jit, int32_t dst, int32_t loc, int32_t expected, + int32_t desired) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + jit_gpr_t t1 = get_temp_gpr(_jit); + + em_wp(_jit, _SYNC(0x00)); + void *retry = jit_address(_jit); + + movr(_jit, rn(t1), desired); + em_wp(_jit, _WLL(rn(t0), 0, loc)); + jit_reloc_t fail = bner(_jit, rn(t0), expected); + em_wp(_jit, _WSC(rn(t1), 0, loc)); + + jit_patch_there(_jit, beqr(_jit, rn(t1), rn(_ZERO)), retry); + jit_patch_here(_jit, fail); + em_wp(_jit, _SYNC(0x00)); + + movr(_jit, dst, rn(t0)); + + unget_temp_gpr(_jit); + unget_temp_gpr(_jit); +} + +static void +nop(jit_state_t * _jit, int32_t i0) +{ + for (; i0 > 0; i0 -= 4) + em_wp(_jit, _NOP()); + + assert(i0 == 0); +} + +static void +breakpoint(jit_state_t * _jit) +{ + /* + * interesting, Linux on qemu-system-mips64el 6.1.0 crashes when + * executing a breakpoint? + */ + em_wp(_jit, _SDBBP()); +} diff --git a/deps/lightening/lightening/mips-fpu.c b/deps/lightening/lightening/mips-fpu.c new file mode 100644 index 0000000..01c24cb --- /dev/null +++ b/deps/lightening/lightening/mips-fpu.c @@ -0,0 +1,1015 @@ +/* + * Copyright (C) 2012-2017 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ +#define FMT_S 0x10 /* float32 */ +#define FMT_D 0x11 /* float64 */ +#define FMT_W 0x14 /* int32 */ +#define FMT_L 0x15 /* int64 */ +#define FMT_P 0x16 /* 2 x float32 */ +#define FMT_S_PU 0x20 +#define FMT_S_PL 0x26 +#define FUN_ADD 0x00 +#define FUN_LWXC1 0x00 +#define FUN_SUB 0x01 +#define FUN_LDXC1 0x01 +#define FUN_MUL 0x02 +#define FUN_DIV 0x03 +#define FUN_SQRT 0x04 +#define FUN_ABS 0x05 +#define FUN_LUXC1 0x05 +#define FUN_MOV 0x06 +#define FUN_NEG 0x07 +#define FUN_SWXC1 0x08 +#define FUN_ROUND_L 0x08 +#define FUN_TRUNC_L 0x09 +#define FUN_SDXC1 0x09 +#define FUN_CEIL_L 0x0a +#define FUN_FLOOR_L 0x0b +#define FUN_ROUND_W 0x0c +#define FUN_TRUNC_W 0x0d +#define FUN_SUXC1 0x0d +#define FUN_CEIL_W 0x0e +#define FUN_FLOOR_W 0x0f +#define FUN_RECIP 0x15 +#define FUN_RSQRT 0x16 +#define FUN_ALNV_P 0x1e +#define FUN_CVT_S 0x20 +#define FUN_CVT_D 0x21 +#define FUN_CVT_W 0x24 +#define FUN_CVT_L 0x25 +#define FUN_PLL 0x2c +#define FUN_PLU 0x2d +#define FUN_PUL 0x2e +#define FUN_PUU 0x2f +#define FUN_MADD_S (0x20 | FMT_S) +#define FUN_MADD_D (0x20 | FMT_D) +#define FUN_MADD_P (0x20 | FMT_P) +#define FUN_MSUB_S (0x28 | FMT_S) +#define FUN_MSUB_D (0x28 | FMT_D) +#define FUN_MSUB_P (0x28 | FMT_P) +#define FUN_NMADD_S (0x30 | FMT_S) +#define FUN_NMADD_D (0x30 | FMT_D) +#define FUN_NMADD_P (0x30 | FMT_P) +#define FUN_NMSUB_S (0x38 | FMT_S) +#define FUN_NMSUB_D (0x38 | FMT_D) +#define FUN_NMSUB_P (0x38 | FMT_P) +#define COND_F 0x30 +#define COND_UN 0x31 +#define COND_EQ 0x32 +#define COND_UEQ 0x33 +#define COND_OLT 0x34 +#define COND_ULT 0x35 +#define COND_OLE 0x36 +#define COND_ULE 0x37 +#define COND_SF 0x38 +#define COND_NGLE 0x39 +#define COND_SEQ 0x3a +#define COND_NGL 0x3b +#define COND_LT 0x3c +#define COND_NGE 0x3d +#define COND_LE 0x3e +#define COND_UGT 0x3f + +#define _ADD_S(fd, fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, fd, FUN_ADD) +#define _ADD_D(fd, fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, fd, FUN_ADD) +#define _SUB_S(fd, fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, fd, FUN_SUB) +#define _SUB_D(fd, fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, fd, FUN_SUB) +#define _MUL_S(fd, fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, fd, FUN_MUL) +#define _MUL_D(fd, fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, fd, FUN_MUL) +#define _DIV_S(fd, fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, fd, FUN_DIV) +#define _DIV_D(fd, fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, fd, FUN_DIV) +#define _ABS_S(fd, fs) Rtype(OP_COP1, FMT_S, 00, fs, fd, FUN_ABS) +#define _ABS_D(fd, fs) Rtype(OP_COP1, FMT_D, 00, fs, fd, FUN_ABS) +#define _NEG_S(fd, fs) Rtype(OP_COP1, FMT_S, 00, fs, fd, FUN_NEG) +#define _NEG_D(fd, fs) Rtype(OP_COP1, FMT_D, 00, fs, fd, FUN_NEG) +#define _SQRT_S(fd, fs) Rtype(OP_COP1, FMT_S, 00, fs, fd, FUN_SQRT) +#define _SQRT_D(fd, fs) Rtype(OP_COP1, FMT_D, 00, fs, fd, FUN_SQRT) +#define _MFC1(rt, fs) Rtype(OP_COP1, OP_MF, rt, fs, 00, 00) +#define _MFHC1(rt, fs) Rtype(OP_COP1, OP_MFH, rt, fs, 00, 00) +#define _MTC1(rt, fs) Rtype(OP_COP1, OP_MT, rt, fs, 00, 00) +#define _MTHC1(rt, fs) Rtype(OP_COP1, OP_MTH, rt, fs, 00, 00) +#define _DMFC1(rt, fs) Rtype(OP_COP1, OP_DMF, rt, fs, 00, 00) +#define _DMTC1(rt, fs) Rtype(OP_COP1, OP_DMT, rt, fs, 00, 00) +#define _CVT_D_S(fd, fs) Rtype(OP_COP1, FMT_S, 0, fs, fd, FUN_CVT_D) +#define _CVT_D_W(fd, fs) Rtype(OP_COP1, FMT_W, 0, fs, fd, FUN_CVT_D) +#define _CVT_D_L(fd, fs) Rtype(OP_COP1, FMT_L, 0, fs, fd, FUN_CVT_D) +#define _CVT_L_S(fd, fs) Rtype(OP_COP1, FMT_S, 0, fs, fd, FUN_CVT_L) +#define _CVT_L_D(fd, fs) Rtype(OP_COP1, FMT_D, 0, fs, fd, FUN_CVT_L) +#define _CVT_P_S(fd, fs) Rtype(OP_COP1, FMT_S, 0, fs, fd, FUN_CVT_P) +#define _CVT_S_D(fd, fs) Rtype(OP_COP1, FMT_D, 0, fs, fd, FUN_CVT_S) +#define _CVT_S_W(fd, fs) Rtype(OP_COP1, FMT_W, 0, fs, fd, FUN_CVT_S) +#define _CVT_S_L(fd, fs) Rtype(OP_COP1, FMT_L, 0, fs, fd, FUN_CVT_S) +#define _CVT_S_PL(fd, fs) Rtype(OP_COP1, FMT_P, 0, fs, fd, FUN_CVT_S_PL) +#define _CVT_S_PU(fd, fs) Rtype(OP_COP1, FMT_P, 0, fs, fd, FUN_CVT_S_PU) +#define _CVT_W_S(fd, fs) Rtype(OP_COP1, FMT_S, 0, fs, fd, FUN_CVT_W) +#define _CVT_W_D(fd, fs) Rtype(OP_COP1, FMT_D, 0, fs, fd, FUN_CVT_W) +#define _TRUNC_L_S(fd, fs) Rtype(OP_COP1, FMT_S, 0, fs, fd, FUN_TRUNC_L) +#define _TRUNC_L_D(fd, fs) Rtype(OP_COP1, FMT_D, 0, fs, fd, FUN_TRUNC_L) +#define _TRUNC_W_S(fd, fs) Rtype(OP_COP1, FMT_S, 0, fs, fd, FUN_TRUNC_W) +#define _TRUNC_W_D(fd, fs) Rtype(OP_COP1, FMT_D, 0, fs, fd, FUN_TRUNC_W) +#define _LWC1(rt, of, rb) Itype(OP_LWC1, rb, rt, of) +#define _SWC1(rt, of, rb) Itype(OP_SWC1, rb, rt, of) +#define _LDC1(rt, of, rb) Itype(OP_LDC1, rb, rt, of) +#define _SDC1(rt, of, rb) Itype(OP_SDC1, rb, rt, of) +#define _MOV_S(fd, fs) Rtype(OP_COP1, FMT_S, 0, fs, fd, FUN_MOV) +#define _MOV_D(fd, fs) Rtype(OP_COP1, FMT_D, 0, fs, fd, FUN_MOV) +#define _BC1F(im) Itype(OP_COP1, OP_BC, OP_BCF, im) +#define _BC1T(im) Itype(OP_COP1, OP_BC, OP_BCT, im) +#define _C_F_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_F) +#define _C_F_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_F) +#define _C_F_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_F) +#define _C_UN_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_UN) +#define _C_UN_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_UN) +#define _C_UN_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_UN) +#define _C_EQ_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_EQ) +#define _C_EQ_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_EQ) +#define _C_EQ_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_EQ) +#define _C_UEQ_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_UEQ) +#define _C_UEQ_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_UEQ) +#define _C_UEQ_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_UEQ) +#define _C_OLT_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_OLT) +#define _C_OLT_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_OLT) +#define _C_OLT_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_OLT) +#define _C_ULT_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_ULT) +#define _C_ULT_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_ULT) +#define _C_ULT_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_ULT) +#define _C_OLE_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_OLE) +#define _C_OLE_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_OLE) +#define _C_OLE_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_OLE) +#define _C_ULE_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_ULE) +#define _C_ULE_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_ULE) +#define _C_ULE_P(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_ULE) +#define _C_SF_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_SF) +#define _C_SF_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_SF) +#define _C_SF_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_SF) +#define _C_NGLE_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_NGLE) +#define _C_NGLE_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_NGLE) +#define _C_NGLE_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_NGLE) +#define _C_SEQ_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_SEQ) +#define _C_SEQ_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_SEQ) +#define _C_SEQ_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_SEQ) +#define _C_NGL_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_NGL) +#define _C_NGL_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_NGL) +#define _C_NGL_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_NGL) +#define _C_NLT_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_NLT) +#define _C_NLT_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_NLT) +#define _C_NLT_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_NLT) +#define _C_NGE_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_NGE) +#define _C_NGE_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_NGE) +#define _C_NGE_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_NGE) +#define _C_NLE_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_NLE) +#define _C_NLE_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_NLE) +#define _C_NLE_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_NLE) +#define _C_UGT_S(fs, ft) Rtype(OP_COP1, FMT_S, ft, fs, 0, COND_UGT) +#define _C_UGT_D(fs, ft) Rtype(OP_COP1, FMT_D, ft, fs, 0, COND_UGT) +#define _C_UGT_P(fs, ft) Rtype(OP_COP1, FMT_P, ft, fs, 0, COND_UGT) + +#define fn(x) jit_fpr_regno(x) + +static void addr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void addr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void subr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void subr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void mulr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void mulr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void divr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void divr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); + +static void sqrtr_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static void sqrtr_d(jit_state_t * _jit, int32_t r0, int32_t r1); +static void negr_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static void negr_d(jit_state_t * _jit, int32_t r0, int32_t r1); +static void absr_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static void absr_d(jit_state_t * _jit, int32_t r0, int32_t r1); + +static void movr_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static void movr_d(jit_state_t * _jit, int32_t r0, int32_t r1); +#if JIT_PASS_DOUBLES_IN_GPR_PAIRS +static void movr_d_ww(jit_state_t * _jit, int32_t r0, int32_t r1, + int32_t r2); +static void movr_ww_d(jit_state_t * _jit, int32_t r0, int32_t r1, + int32_t r2); +static void stxi_ww(jit_state_t * _jit, jit_word_t o0, int32_t r0, + int32_t r1, int32_t r2); +static void ldxi_ww(jit_state_t * _jit, int32_t r0, int32_t r1, + int32_t r2, jit_word_t o0); +#endif +#if JIT_PASS_FLOATS_IN_GPRS +static void movr_f_w(jit_state_t * _jit, int32_t r0, int32_t r1); +static void movr_w_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static void stxi_w(jit_state_t * _jit, jit_word_t o0, int32_t r0, + int32_t r1); +static void ldxi_w(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t o0); +#endif + +static void retval_f(jit_state_t * _jit, int32_t r0); +static void retval_d(jit_state_t * _jit, int32_t r0); +static void retr_f(jit_state_t * _jit, int32_t u); +static void retr_d(jit_state_t * _jit, int32_t u); + +static void ldr_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static void ldr_d(jit_state_t * _jit, int32_t r0, int32_t r1); +static void ldi_f(jit_state_t * _jit, int32_t r0, jit_word_t i0); +static void ldxr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void ldxi_f(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); +static void ldi_d(jit_state_t * _jit, int32_t r0, jit_word_t i0); +static void ldxr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void ldxi_d(jit_state_t * _jit, int32_t r0, int32_t r1, + jit_word_t i0); + +static void str_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static void str_d(jit_state_t * _jit, int32_t r0, int32_t r1); +static void sti_f(jit_state_t * _jit, jit_word_t i0, int32_t r0); +static void stxr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void stxi_f(jit_state_t * _jit, jit_word_t i0, int32_t r0, + int32_t r1); +static void sti_d(jit_state_t * _jit, jit_word_t i0, int32_t r0); +static void stxr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2); +static void stxi_d(jit_state_t * _jit, jit_word_t i0, int32_t r0, + int32_t r1); + +static jit_reloc_t bltr_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bler_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t beqr_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bger_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bgtr_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bner_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bunltr_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bunler_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t buneqr_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bunger_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bungtr_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bltgtr_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bordr_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bunordr_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bltr_d(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bler_d(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t beqr_d(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bger_d(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bgtr_d(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bner_d(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bunltr_d(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bunler_d(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t buneqr_d(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bunger_d(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bungtr_d(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bltgtr_d(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bordr_d(jit_state_t * _jit, int32_t r0, int32_t r1); +static jit_reloc_t bunordr_d(jit_state_t * _jit, int32_t r0, int32_t r1); + +static void truncr_f_i(jit_state_t * _jit, int32_t r0, int32_t i0); +static void truncr_d_i(jit_state_t * _jit, int32_t r0, int32_t i0); +#if __WORDSIZE == 64 +static void truncr_f_l(jit_state_t * _jit, int32_t r0, int32_t r1); +static void truncr_d_l(jit_state_t * _jit, int32_t r0, int32_t r1); +#endif + +static void extr_f(jit_state_t * _jit, int32_t r0, int32_t r1); +static void extr_d(jit_state_t * _jit, int32_t r0, int32_t r1); +static void extr_f_d(jit_state_t * _jit, int32_t r0, int32_t r1); +static void extr_d_f(jit_state_t * _jit, int32_t t0, int32_t r1); + +static void movi_f(jit_state_t * _jit, int32_t r0, jit_float32_t i0); +static void movi_d(jit_state_t * _jit, int32_t r0, jit_float64_t i0); + +static void +addr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _ADD_S(r0, r1, r2)); +} + +static void +addr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _ADD_D(r0, r1, r2)); +} + +static void +subr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _SUB_S(r0, r1, r2)); +} + +static void +subr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _SUB_D(r0, r1, r2)); +} + +static void +mulr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _MUL_S(r0, r1, r2)); +} + +static void +mulr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _MUL_D(r0, r1, r2)); +} + +static void +divr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _DIV_S(r0, r1, r2)); +} + +static void +divr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _DIV_D(r0, r1, r2)); +} + +static void +sqrtr_f(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _SQRT_S(r0, r1)); +} + +static void +sqrtr_d(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _SQRT_D(r0, r1)); +} + +static void +negr_f(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _NEG_S(r0, r1)); +} + +static void +negr_d(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _NEG_D(r0, r1)); +} + +static void +absr_f(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _ABS_S(r0, r1)); +} + +static void +absr_d(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _ABS_D(r0, r1)); +} + +static void +extr_f(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_fpr_t t0 = get_temp_fpr(_jit); +#if __WORDSIZE == 32 + em_wp(_jit, _MTC1(r1, fn(t0))); + em_wp(_jit, _CVT_S_W(r0, fn(t0))); +#else + em_wp(_jit, _DMTC1(r1, fn(t0))); + em_wp(_jit, _CVT_S_L(r0, fn(t0))); +#endif + unget_temp_fpr(_jit); +} + +static void +truncr_f_i(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_fpr_t t0 = get_temp_fpr(_jit); + em_wp(_jit, _TRUNC_W_S(fn(t0), r1)); + em_wp(_jit, _MFC1(r0, fn(t0))); + unget_temp_fpr(_jit); +} + +#if __WORDSIZE == 64 +static void +truncr_f_l(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_fpr_t t0 = get_temp_fpr(_jit); + em_wp(_jit, _TRUNC_L_S(fn(t0), r1)); + em_wp(_jit, _DMFC1(r0, fn(t0))); + unget_temp_fpr(_jit); +} +#endif + +static void +ldi_f(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _LWC1(r0, i0, rn(_ZERO))); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + ldr_f(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + addr(_jit, rn(t0), r1, r2); + ldr_f(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +ldxi_f(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _LWC1(r0, i0, r1)); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + addi(_jit, rn(t0), r1, i0); + ldr_f(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + } +} + +static void +str_f(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _SWC1(r1, 0, r0)); +} + +static void +sti_f(jit_state_t * _jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _SWC1(r0, i0, rn(_ZERO))); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + str_f(_jit, rn(t0), r0); + unget_temp_gpr(_jit); + } +} + +static void +stxr_f(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + addr(_jit, rn(t0), r0, r1); + str_f(_jit, rn(t0), r2); + unget_temp_gpr(_jit); +} + +static void +stxi_f(jit_state_t * _jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _SWC1(r1, i0, r0)); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + addi(_jit, rn(t0), r0, i0); + str_f(_jit, rn(t0), r1); + unget_temp_gpr(_jit); + } +} + +static void +movr_f(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + if (r0 != r1) + em_wp(_jit, _MOV_S(r0, r1)); +} + +static void +movi_f(jit_state_t * _jit, int32_t r0, jit_float32_t i0) +{ + union { + int32_t i; + jit_float32_t f; + } u = {.f = i0}; + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), u.i); + em_wp(_jit, _MTC1(rn(t0), r0)); + unget_temp_gpr(_jit); +} + +static void +extr_f_d(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _CVT_D_S(r0, r1)); +} + +static void +extr_d_f(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _CVT_S_D(r0, r1)); +} + +static void +extr_d(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_fpr_t t0 = get_temp_fpr(_jit); +#if __WORDSIZE == 32 + em_wp(_jit, _MTC1(r1, fn(t0))); + em_wp(_jit, _CVT_D_W(r0, fn(t0))); +#else + em_wp(_jit, _DMTC1(r1, fn(t0))); + em_wp(_jit, _CVT_D_L(r0, fn(t0))); +#endif + unget_temp_fpr(_jit); +} + +static void +truncr_d_i(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_fpr_t t0 = get_temp_fpr(_jit); + em_wp(_jit, _TRUNC_W_D(fn(t0), r1)); + em_wp(_jit, _MFC1(r0, fn(t0))); + unget_temp_fpr(_jit); +} + +#if __WORDSIZE == 64 +static void +truncr_d_l(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + jit_fpr_t t0 = get_temp_fpr(_jit); + em_wp(_jit, _TRUNC_L_D(fn(t0), r1)); + em_wp(_jit, _DMFC1(r0, fn(t0))); + unget_temp_fpr(_jit); +} +#endif + +static void +ldr_f(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _LWC1(r0, 0, r1)); +} + +static void +ldr_d(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _LDC1(r0, 0, r1)); +} + +static void +ldi_d(jit_state_t * _jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _LDC1(r0, i0, rn(_ZERO))); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + em_wp(_jit, _LDC1(r0, 0, rn(t0))); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + addr(_jit, rn(t0), r1, r2); + ldr_d(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +ldxi_d(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _LDC1(r0, i0, r1)); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + addi(_jit, rn(t0), r1, i0); + ldr_d(_jit, r0, rn(t0)); + unget_temp_gpr(_jit); + } +} + +static void +str_d(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _SDC1(r1, 0, r0)); +} + +static void +sti_d(jit_state_t * _jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _SDC1(r0, i0, rn(_ZERO))); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + movi(_jit, rn(t0), i0); + str_d(_jit, rn(t0), r0); + unget_temp_gpr(_jit); + } +} + +static void +stxr_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + addr(_jit, rn(t0), r0, r1); + str_d(_jit, rn(t0), r2); + unget_temp_gpr(_jit); +} + +static void +stxi_d(jit_state_t * _jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _SDC1(r1, i0, r0)); + else { + jit_gpr_t t0 = get_temp_gpr(_jit); + addi(_jit, rn(t0), r0, i0); + str_d(_jit, rn(t0), r1); + unget_temp_gpr(_jit); + } +} + +static void +movr_d(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + if (r0 != r1) + em_wp(_jit, _MOV_D(r0, r1)); +} + +#if JIT_PASS_DOUBLES_IN_GPR_PAIRS +static void +movr_d_ww(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + assert(r1 == r2 - 1); + em_wp(_jit, _MTC1(r1, r0)); + em_wp(_jit, _MTHC1(r2, r0)); +} + +static void +movr_ww_d(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2) +{ + assert(r0 == r1 - 1); + em_wp(_jit, _MFC1(r0, r2)); + em_wp(_jit, _MFHC1(r1, r2)); +} + +static void +stxi_ww(jit_state_t * _jit, jit_word_t o0, int32_t r0, int32_t r1, + int32_t r2) +{ + jit_fpr_t t0 = get_temp_fpr(_jit); + movr_d_ww(_jit, fn(t0), r1, r2); + stxi_d(_jit, o0, r0, fn(t0)); + unget_temp_fpr(_jit); +} + +static void +ldxi_ww(jit_state_t * _jit, int32_t r0, int32_t r1, int32_t r2, + jit_word_t o0) +{ + jit_fpr_t t0 = get_temp_fpr(_jit); + ldxi_d(_jit, fn(t0), r2, o0); + movr_ww_d(_jit, r0, r1, fn(t0)); + unget_temp_fpr(_jit); +} +#endif + +#if JIT_PASS_FLOATS_IN_GPRS +static void +movr_f_w(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _MTC1(r1, r0)); +} + +static void +movr_w_f(jit_state_t * _jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _MFC1(r0, r1)); +} + +static void +stxi_w(jit_state_t * _jit, jit_word_t o0, int32_t r0, int32_t r1) +{ + jit_fpr_t t0 = get_temp_fpr(_jit); + movr_f_w(_jit, fn(t0), r1); + stxi_f(_jit, o0, r0, fn(t0)); + unget_temp_fpr(_jit); +} + +static void +ldxi_w(jit_state_t * _jit, int32_t r0, int32_t r1, jit_word_t o0) +{ + jit_fpr_t t0 = get_temp_fpr(_jit); + ldxi_f(_jit, fn(t0), r1, o0); + movr_w_f(_jit, r0, fn(t0)); + unget_temp_fpr(_jit); +} +#endif + +static void +movi_d(jit_state_t * _jit, int32_t r0, jit_float64_t i0) +{ + union { + int64_t l; + struct { + int32_t l; + int32_t h; + } i; + jit_float64_t d; + } data = {.d = i0}; + + jit_gpr_t t0 = get_temp_gpr(_jit); +#if __WORDSIZE == 64 + movi(_jit, rn(t0), data.l); + em_wp(_jit, _DMTC1(rn(t0), r0)); +#else + if (data.i.l) { + movi(_jit, rn(t0), data.i.l); + em_wp(_jit, _MTC1(rn(t0), r0)); + } else + em_wp(_jit, _MTC1(rn(_ZERO), r0)); + + if (data.i.h) { + movi(_jit, rn(t0), data.i.h); + em_wp(_jit, _MTHC1(rn(t0), r0)); + } else + em_wp(_jit, _MTHC1(rn(_ZERO), r0)); +#endif + unget_temp_gpr(_jit); +} + +static void +retval_f(jit_state_t * _jit, int32_t r0) +{ + if (fn(_F0) != r0) + movr_f(_jit, r0, fn(_F0)); +} + +static void +retval_d(jit_state_t * _jit, int32_t r0) +{ + if (fn(_F0) != r0) + movr_d(_jit, r0, fn(_F0)); +} + +static void +retr_f(jit_state_t * _jit, int32_t r0) +{ + if (fn(_F0) != r0) + movr_f(_jit, fn(_F0), r0); + + ret(_jit); +} + +static void +retr_d(jit_state_t * _jit, int32_t r0) +{ + if (fn(_F0) != r0) + movr_d(_jit, fn(_F0), r0); + + ret(_jit); +} + +static jit_reloc_t +bct(jit_state_t * _jit) +{ + return emit_jump(_jit, _BC1T(0), _NOP(1)); +} + +static jit_reloc_t +bcf(jit_state_t * _jit) +{ + return emit_jump(_jit, _BC1F(0), _NOP(1)); +} + +static jit_reloc_t +bltr_f(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_OLT_S(r1, r2)); + jit_reloc_t w = bct(_jit); + return (w); +} + +static jit_reloc_t +bler_f(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_OLE_S(r1, r2)); + jit_reloc_t w = bct(_jit); + return (w); +} + +static jit_reloc_t +beqr_f(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_EQ_S(r1, r2)); + jit_reloc_t w = bct(_jit); + return (w); +} + +static jit_reloc_t +bger_f(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_ULT_S(r1, r2)); + jit_reloc_t w = bcf(_jit); + return (w); +} + +static jit_reloc_t +bgtr_f(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_ULE_S(r1, r2)); + jit_reloc_t w = bcf(_jit); + return (w); +} + +static jit_reloc_t +bner_f(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_EQ_S(r1, r2)); + jit_reloc_t w = bcf(_jit); + return (w); +} + +static jit_reloc_t +bunltr_f(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_ULT_S(r1, r2)); + jit_reloc_t w = bct(_jit);; + return (w); +} + +static jit_reloc_t +bunler_f(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_ULE_S(r1, r2)); + jit_reloc_t w = bct(_jit); + return (w); +} + +static jit_reloc_t +buneqr_f(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_UEQ_S(r1, r2)); + jit_reloc_t w = bct(_jit); + return (w); +} + +static jit_reloc_t +bunger_f(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_OLT_S(r1, r2)); + jit_reloc_t w = bcf(_jit); + return (w); +} + +static jit_reloc_t +bungtr_f(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_OLE_S(r1, r2)); + jit_reloc_t w = bcf(_jit); + return (w); +} + +static jit_reloc_t +bltgtr_f(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_UEQ_S(r1, r2)); + jit_reloc_t w = bcf(_jit); + return (w); +} + +static jit_reloc_t +bordr_f(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_UN_S(r1, r2)); + jit_reloc_t w = bcf(_jit); + return (w); +} + +static jit_reloc_t +bunordr_f(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_UN_S(r1, r2)); + jit_reloc_t w = bct(_jit); + return (w); +} + +static jit_reloc_t +bltr_d(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_OLT_D(r1, r2)); + jit_reloc_t w = bct(_jit); + return (w); +} + +static jit_reloc_t +bler_d(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_OLE_D(r1, r2)); + jit_reloc_t w = bct(_jit); + return (w); +} + +static jit_reloc_t +beqr_d(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_EQ_D(r1, r2)); + jit_reloc_t w = bct(_jit); + return (w); +} + +static jit_reloc_t +bger_d(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_ULT_D(r1, r2)); + jit_reloc_t w = bcf(_jit); + return (w); +} + +static jit_reloc_t +bgtr_d(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_ULE_D(r1, r2)); + jit_reloc_t w = bcf(_jit); + return (w); +} + +static jit_reloc_t +bner_d(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_EQ_D(r1, r2)); + jit_reloc_t w = bcf(_jit); + return (w); +} + +static jit_reloc_t +bunltr_d(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_ULT_D(r1, r2)); + jit_reloc_t w = bct(_jit); + return (w); +} + +static jit_reloc_t +bunler_d(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_ULE_D(r1, r2)); + jit_reloc_t w = bct(_jit); + return (w); +} + +static jit_reloc_t +buneqr_d(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_UEQ_D(r1, r2)); + jit_reloc_t w = bct(_jit); + return (w); +} + +static jit_reloc_t +bunger_d(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_OLT_D(r1, r2)); + jit_reloc_t w = bcf(_jit); + return (w); +} + +static jit_reloc_t +bungtr_d(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_OLE_D(r1, r2)); + jit_reloc_t w = bcf(_jit); + return (w); +} + +static jit_reloc_t +bltgtr_d(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_UEQ_D(r1, r2)); + jit_reloc_t w = bcf(_jit); + return (w); +} + +static jit_reloc_t +bordr_d(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_UN_D(r1, r2)); + jit_reloc_t w = bcf(_jit); + return (w); +} + +static jit_reloc_t +bunordr_d(jit_state_t * _jit, int32_t r1, int32_t r2) +{ + em_wp(_jit, _C_UN_D(r1, r2)); + jit_reloc_t w = bct(_jit); + return (w); +} diff --git a/deps/lightening/lightening/mips.c b/deps/lightening/lightening/mips.c new file mode 100644 index 0000000..ac77e2f --- /dev/null +++ b/deps/lightening/lightening/mips.c @@ -0,0 +1,282 @@ +/* + * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#include "mips-cpu.c" +#include "mips-fpu.c" + +static const jit_gpr_t abi_gpr_args[] = { + _A0, _A1, _A2, _A3, +#if NEW_ABI + _A4, _A5, _A6, _A7, +#endif +}; + +static const jit_fpr_t abi_fpr_args[] = { +#if NEW_ABI + _F12, _F13, _F14, _F15, _F16, _F17, _F18, _F19 +#else + _F12, _F14 +#endif +}; + +static const int abi_gpr_arg_count = + sizeof(abi_gpr_args) / sizeof(abi_gpr_args[0]); +static const int abi_fpr_arg_count = + sizeof(abi_fpr_args) / sizeof(abi_fpr_args[0]); + +struct abi_arg_iterator { + const jit_operand_t *args; + int argc; + + int arg_idx; +#if !NEW_ABI + int gpr_used; + int gpr_idx; + int fpr_idx; +#endif + jit_word_t stack_size; + int stack_padding; +}; + +static size_t page_size; + +/* + * Implementation + */ + +static jit_bool_t +has_fpu() +{ +#if __mips_hard_float + return 1; +#else + return 0; +#endif +} + +jit_bool_t +jit_get_cpu(void) +{ + page_size = sysconf(_SC_PAGE_SIZE); + /* + * TODO: extensions? + */ + return has_fpu(); +} + +jit_bool_t +jit_init(jit_state_t * _jit) +{ + (void) _jit; + return has_fpu(); +} + +static size_t +jit_initial_frame_size(void) +{ + return 0; +} + +static void +reset_abi_arg_iterator(struct abi_arg_iterator *iter, size_t argc, + const jit_operand_t * args) +{ + memset(iter, 0, sizeof(*iter)); + iter->argc = argc; + iter->args = args; +#if !NEW_ABI + iter->stack_size = 16; +#endif +} + +#if !NEW_ABI +static int +jit_operand_abi_sizeof(enum jit_operand_abi abi) +{ + switch (abi) { + case JIT_OPERAND_ABI_UINT8: + case JIT_OPERAND_ABI_INT8: + return 1; + case JIT_OPERAND_ABI_UINT16: + case JIT_OPERAND_ABI_INT16: + return 2; + case JIT_OPERAND_ABI_UINT32: + case JIT_OPERAND_ABI_INT32: + return 4; + case JIT_OPERAND_ABI_UINT64: + case JIT_OPERAND_ABI_INT64: + return 8; + case JIT_OPERAND_ABI_POINTER: + return CHOOSE_32_64(4, 8); + case JIT_OPERAND_ABI_FLOAT: + return 4; + case JIT_OPERAND_ABI_DOUBLE: + return 8; + default: + abort(); + } +} +#endif + +static void +next_abi_arg(struct abi_arg_iterator *iter, jit_operand_t * arg) +{ + ASSERT(iter->arg_idx < iter->argc); + enum jit_operand_abi abi = iter->args[iter->arg_idx].abi; +#if NEW_ABI + int idx = iter->arg_idx++; + /* + * on new abi the first eight arguments of any type are passed in + * registers + */ + if (is_gpr_arg(abi) && idx < 8) { + *arg = jit_operand_gpr(abi, abi_gpr_args[idx]); + return; + } + + if (is_fpr_arg(abi) && idx < 8) { + *arg = jit_operand_fpr(abi, abi_fpr_args[idx]); + return; + } + + *arg = jit_operand_mem(abi, JIT_SP, iter->stack_size); + iter->stack_size += 8; +#else + /* + * O32 argument passing is a bit of a mess + */ + iter->arg_idx++; + if (is_gpr_arg(abi) && iter->gpr_idx < abi_gpr_arg_count) { + *arg = jit_operand_gpr(abi, abi_gpr_args[iter->gpr_idx]); + iter->gpr_used = 1; + iter->gpr_idx++; + return; + } + + if (is_fpr_arg(abi) && iter->gpr_idx <= 3) { + if (abi == JIT_OPERAND_ABI_DOUBLE && iter->gpr_idx % 2 != 0) + iter->gpr_idx++; + + if (!iter->gpr_used && iter->fpr_idx < abi_fpr_arg_count) + *arg = jit_operand_fpr(abi, abi_fpr_args[iter->fpr_idx]); + else if (abi == JIT_OPERAND_ABI_FLOAT) { + *arg = jit_operand_gpr(abi, abi_gpr_args[iter->gpr_idx]); + } else { + *arg = jit_operand_gpr_pair(abi, + abi_gpr_args[iter->gpr_idx + 0], + abi_gpr_args[iter->gpr_idx + 1]); + } + + iter->fpr_idx++; + iter->gpr_idx += abi == JIT_OPERAND_ABI_DOUBLE ? 2 : 1; + return; + } + + size_t abi_size = jit_operand_abi_sizeof(abi); + abi_size = jit_align_up(abi_size, 4); + + iter->stack_size = jit_align_up(iter->stack_size, abi_size); + *arg = jit_operand_mem(abi, JIT_SP, iter->stack_size); + + iter->stack_size += abi_size; +#endif + +} + +static void +jit_flush(void *fptr, void *tptr) +{ + jit_word_t f = (jit_word_t) fptr & -page_size; + jit_word_t t = (((jit_word_t) tptr) + page_size - 1) & -page_size; + /* libgcc's __clear_cache is apparently in some situations a no-op: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90929 + * + * use __builtin_ instead, seems to work on real hardware + */ + __builtin___clear_cache((void *) f, (void *) t); +} + +static inline size_t +jit_stack_alignment(void) +{ +#if NEW_ABI + return 16; +#else + return 8; +#endif +} + +static void +jit_try_shorten(jit_state_t * _jit, jit_reloc_t reloc, jit_pointer_t addr) +{ + (void) _jit; + (void) reloc; + (void) addr; +} + +static void * +bless_function_pointer(void *ptr) +{ + return ptr; +} + +/* + * Jumps around the veneer + */ + +static void +patch_jmp_without_veneer(jit_state_t * _jit, uint32_t * loc) +{ + patch_jmp_offset(loc, _jit->pc.ui - loc); +} + +static uint32_t * +jmp_without_veneer(jit_state_t * _jit) +{ + uint32_t *loc = _jit->pc.ui; + emit_u32(_jit, _BEQ(rn(_ZERO), rn(_ZERO), 0)); + /* branch delay slot */ + emit_u32(_jit, _NOP(1)); + return loc; +} + +/* + * Load from pool offset + */ +static void +patch_load_from_pool_offset(uint32_t * loc, int32_t v) +{ + /* + * not used by this backend + */ + (void) loc; + (void) v; + abort(); +} + +static int32_t +read_load_from_pool_offset(uint32_t * loc) +{ + /* + * not used by this backend + */ + (void) loc; + abort(); + return 0; +} diff --git a/deps/lightening/lightening/mips.h b/deps/lightening/lightening/mips.h new file mode 100644 index 0000000..62950f4 --- /dev/null +++ b/deps/lightening/lightening/mips.h @@ -0,0 +1,203 @@ +/* + * Copyright (C) 2012-2017 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#ifndef _jit_mips_h +#define _jit_mips_h + +#if _MIPS_SIM != _ABIO32 +#define NEW_ABI 1 +#endif + +#define JIT_NEEDS_LITERAL_POOL 1 +#define JIT_USE_IMMEDIATE_RELOC 1 + +#if !NEW_ABI +#define JIT_PASS_DOUBLES_IN_GPR_PAIRS 1 +#define JIT_PASS_FLOATS_IN_GPRS 1 +#endif + +#define _ZERO JIT_GPR(0) +#define _AT JIT_GPR(1) +#define _V0 JIT_GPR(2) +#define _V1 JIT_GPR(3) +#define _A0 JIT_GPR(4) +#define _A1 JIT_GPR(5) +#define _A2 JIT_GPR(6) +#define _A3 JIT_GPR(7) +#define _A4 JIT_GPR(8) + +#if NEW_ABI +#define _A5 JIT_GPR(9) +#define _A6 JIT_GPR(10) +#define _A7 JIT_GPR(11) +#else +#define _T0 JIT_GPR(9) +#define _T1 JIT_GPR(10) +#define _T2 JIT_GPR(11) +#endif + +#define _T4 JIT_GPR(12) +#define _T5 JIT_GPR(13) +#define _T6 JIT_GPR(14) +#define _T7 JIT_GPR(15) + +#define _S0 JIT_GPR(16) +#define _S1 JIT_GPR(17) +#define _S2 JIT_GPR(18) +#define _S3 JIT_GPR(19) +#define _S4 JIT_GPR(20) +#define _S5 JIT_GPR(21) +#define _S6 JIT_GPR(22) +#define _S7 JIT_GPR(23) + +#define _T8 JIT_GPR(24) +#define _T9 JIT_GPR(25) + +/* + * k0-k1 reserved for kernel usage + */ + +#define _GP JIT_GPR(28) +#define _SP JIT_GPR(29) +#define _FP JIT_GPR(30) + +#define _RA JIT_GPR(31) + +#define _F0 JIT_FPR(0) +#define _F1 JIT_FPR(1) +#define _F2 JIT_FPR(2) +#define _F3 JIT_FPR(3) +#define _F4 JIT_FPR(4) +#define _F5 JIT_FPR(5) +#define _F6 JIT_FPR(6) +#define _F7 JIT_FPR(7) +#define _F8 JIT_FPR(8) +#define _F9 JIT_FPR(9) +#define _F10 JIT_FPR(10) +#define _F11 JIT_FPR(11) +#define _F12 JIT_FPR(12) +#define _F13 JIT_FPR(13) +#define _F14 JIT_FPR(14) +#define _F15 JIT_FPR(15) +#define _F16 JIT_FPR(16) +#define _F17 JIT_FPR(17) +#define _F18 JIT_FPR(18) +#define _F19 JIT_FPR(19) +#define _F20 JIT_FPR(20) +#define _F21 JIT_FPR(21) +#define _F22 JIT_FPR(22) +#define _F23 JIT_FPR(23) +#define _F24 JIT_FPR(24) +#define _F25 JIT_FPR(25) +#define _F26 JIT_FPR(26) +#define _F27 JIT_FPR(27) +#define _F28 JIT_FPR(28) +#define _F29 JIT_FPR(29) +#define _F30 JIT_FPR(30) +#define _F31 JIT_FPR(31) + +#define JIT_LR _RA +#define JIT_SP _SP +#define JIT_FP _FP + +#define JIT_R0 _A0 +#define JIT_R1 _A1 +#define JIT_R2 _A2 +#define JIT_R3 _A3 + +#if NEW_ABI +#define JIT_R4 _A4 +#define JIT_R5 _A5 +#define JIT_R6 _A6 +#define JIT_R7 _A7 +#else +#define JIT_R4 _T0 +#define JIT_R5 _T1 +#define JIT_R6 _T2 +#define JIT_R7 _T3 +#endif + +#define JIT_CARRY _T4 +#define JIT_TMP0 _T5 +#define JIT_TMP1 _T6 +#define JIT_TMP2 _T7 +#define JIT_TMP3 _T8 +/* _T9 is used as a kind of link register, and as such can be overwritten whenever */ +//#define JIT_TMP4 _T9 + +#define JIT_V0 _S0 +#define JIT_V1 _S1 +#define JIT_V2 _S2 +#define JIT_V3 _S3 +#define JIT_V4 _S4 +#define JIT_V5 _S5 +#define JIT_V6 _S6 +#define JIT_V7 _S7 + +#define JIT_F0 _F0 +#define JIT_F1 _F2 +#define JIT_F2 _F6 +#define JIT_F3 _F8 +#define JIT_F4 _F10 +#define JIT_F5 _F12 +#define JIT_F6 _F14 +#define JIT_F7 _F16 +#define JIT_F8 _F18 +#define JIT_FTMP _F4 + +#if NEW_ABI +#define JIT_F9 _F20 +#define JIT_F10 _F22 +#else +#define JIT_VF4 _F20 +#define JIT_VF5 _F22 +#endif + +#define JIT_VF0 _F24 +#define JIT_VF1 _F26 +#define JIT_VF2 _F28 +#define JIT_VF3 _F30 + +#if NEW_ABI +/* + * _RA is in some documents referred to as caller-save, but storing it in + * the function stack frame works equally well, which is what we do here + * (gcc apparently does this) + */ +#define JIT_PLATFORM_CALLEE_SAVE_GPRS _SP, _FP, _GP, _RA +#else +#define JIT_PLATFORM_CALLEE_SAVE_GPRS _SP, _FP, _RA +#endif + +#define JIT_PLATFORM_CALLEE_SAVE_FPRS + +#define JIT_JMP_MAX_SIZE (sizeof(uint32_t) * 2) + +#if __WORDSIZE == 64 +#define JIT_LITERAL_MAX_SIZE ((sizeof(uint32_t) * 6) + JIT_JMP_MAX_SIZE) +#else +#define JIT_LITERAL_MAX_SIZE ((sizeof(uint32_t) * 2) + JIT_JMP_MAX_SIZE) +#endif + +#define JIT_INST_MAX_SIZE JIT_JMP_MAX_SIZE + +// A little bit overkill, but just to be safe +#define JIT_EXTRA_SPACE (16 * sizeof(uint32_t)) + +#endif /* _jit_mips_h */ diff --git a/deps/lightening/lightening/ppc-cpu.c b/deps/lightening/lightening/ppc-cpu.c new file mode 100644 index 0000000..a56c207 --- /dev/null +++ b/deps/lightening/lightening/ppc-cpu.c @@ -0,0 +1,3136 @@ +/* + * Copyright (C) 2012-2017, 2019 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +# if __WORDSIZE == 32 +# define gpr_save_area 72 /* r14~r31 = 18 * 4 */ +# define params_offset 24 +# define can_sign_extend_int_p(im) 1 +# define can_zero_extend_int_p(im) 1 +# define fits_uint32_p(im) 1 +# else +# define gpr_save_area 144 /* r14~r31 = 18 * 8 */ +# if ABI_ELFv2 +# define params_offset 32 +# else +# define params_offset 48 +# endif +# define can_sign_extend_int_p(im) \ + (((im) >= 0 && (long)(im) <= 0x7fffffffL) || \ + ((im) < 0 && (long)(im) >= -0x80000000L)) +# define can_zero_extend_int_p(im) \ + ((im) >= 0 && (im) < 0x80000000L) +# define fits_uint32_p(im) ((im & 0xffffffff00000000L) == 0) +# endif +# define fpr_save_area 64 + +# define can_sign_extend_short_p(im) ((im) >= -32768 && (im) <= 32767) +# define can_zero_extend_short_p(im) ((im) >= 0 && (im) <= 65535) +# define can_sign_extend_jump_p(im) ((im) >= -33554432 && (im) <= 33554431) + +#define simm14_p(i0) ((i0) <= 0x1fff && (i0) >= -0x2000) +#define simm24_p(i0) ((i0) <= 0x7ffff && (i0) >= -0x800000) + +# define _FXO(o,d,a,b,e,x) FXO(o,d,a,b,e,x,0) +# define _FXO_(o,d,a,b,e,x) FXO(o,d,a,b,e,x,1) +# define _FDs(o,d,a,s) FDs(o,d,a,s) +# define _FDu(o,d,a,s) FDu(o,d,a,s) +# define _FX(o,d,a,b,x) FX(o,d,a,b,x,0) +# define _FX_(o,d,a,b,x) FX(o,d,a,b,x,1) +# define _FI(o,t,a,k) FI(o,t,a,k) +# define _FB(o,bo,bi,t,a,k) FB(o,bo,bi,t,a,k) +# define _FXL(o,bo,bi,x) FXL(o,bo,bi,x,0) +# define _FXL_(o,bo,bi,x) FXL(o,bo,bi,x,1) +# define _FC(o,d,l,a,b,x) FC(o,d,l,a,b,x) +# define _FCI(o,d,l,a,s) FCI(o,d,l,a,s) +# define _FXFX(o,s,x,f) FXFX(o,s,x,f) +# define _FM(o,s,a,h,b,e,r) FM(o,s,a,h,b,e,r) +# if __WORDSIZE == 64 +# define _FMDS(o,s,a,b,e,x) FMDS(o,s,a,b,e,x,0) +# define _FMDS_(o,s,a,b,e,x) FMDS(o,s,a,b,e,x,1) +# define _FMD(o,s,a,h,b,x,i) FMD(o,s,a,h,b,x,i,0) +# define _FMD_(o,s,a,h,b,x,i) FMD(o,s,a,h,b,x,i,1) +# define _FXS(o,d,a,h,x,i) FXS(o,d,a,h,x,i,0) +# define _FXS_(o,d,a,h,x,i) FXS(o,d,a,h,x,i,1) +# endif + +static uint32_t FXO(int,int,int,int,int,int,int); +static uint32_t FDs(int,int,int,int); +static uint32_t FDu(int,int,int,int); +static uint32_t FX(int,int,int,int,int,int); +static uint32_t FI(int,int,int,int); +static uint32_t FB(int,int,int,int,int,int); +static uint32_t FXL(int,int,int,int,int); +static uint32_t FC(int,int,int,int,int,int); +static uint32_t FCI(int,int,int,int,int); +static uint32_t FXFX(int,int,int,int); +static uint32_t FM(int,int,int,int,int,int,int); + +# if __WORDSIZE == 64 +static uint32_t FMD(int,int,int,int,int,int,int,int); +static uint32_t FXS(int,int,int,int,int,int,int); +# endif + +# define CR_0 0 +# define CR_1 1 +# define CR_2 2 +# define CR_3 3 +# define CR_4 4 +# define CR_5 5 +# define CR_6 6 +# define CR_7 7 +# define CR_LT 0 +# define CR_GT 1 +# define CR_EQ 2 +# define CR_SO 3 +# define CR_UN 3 +# define BCC_F 4 +# define BCC_T 12 +# define _ADD(d,a,b) _FXO(31,d,a,b,0,266) +# define _ADD_(d,a,b) _FXO_(31,d,a,b,0,266) +# define _ADDO(d,a,b) _FXO(31,d,a,b,1,266) +# define _ADDO_(d,a,b) _FXO_(31,d,a,b,1,266) +# define _ADDC(d,a,b) _FXO_(31,d,a,b,0,10) +# define _ADDC_(d,a,b) _FXO_(31,d,a,b,0,10) +# define _ADDCO(d,a,b) _FXO(31,d,a,b,1,10) +# define _ADDCO_(d,a,b) _FXO_(31,d,a,b,1,10) +# define _ADDE(d,a,b) _FXO(31,d,a,b,0,138) +# define _ADDE_(d,a,b) _FXO_(31,d,a,b,0,138) +# define _ADDEO(d,a,b) _FXO(31,d,a,b,1,138) +# define _ADDEO_(d,a,b) _FXO_(31,d,a,b,1,138) +# define _ADDI(d,a,s) _FDs(14,d,a,s) +# define _ADDIC(d,a,s) _FDs(12,d,a,s) +# define _ADDIC_(d,a,s) _FDs(13,d,a,s) +# define _ADDIS(d,a,s) _FDs(15,d,a,s) +# define _LIS(d,s) _ADDIS(d,0,s) +# define _ADDME(d,a) _FXO(31,d,a,0,0,234) +# define _ADDME_(d,a) _FXO_(31,d,a,0,0,234) +# define _ADDMEO(d,a) _FXO(31,d,a,0,1,234) +# define _ADDMEO_(d,a) _FXO_(31,d,a,0,1,234) +# define _ADDZE(d,a) _FXO(31,d,a,0,0,202) +# define _ADDZE_(d,a) _FXO_(31,d,a,0,0,202) +# define _ADDZEO(d,a) _FXO(31,d,a,0,1,202) +# define _ADDZEO_(d,a) _FXO_(31,d,a,0,1,202) +# define _AND(d,a,b) _FX(31,a,d,b,28) +# define _ANDC(d,a,b) _FXO(31,a,d,b,0,60) +# define _ANDC_(d,a,b) _FXO_(31,a,d,b,0,60) +# define _AND_(d,a,b) _FX_(31,a,b,d,28) +# define _ANDI_(d,a,u) _FDu(28,a,d,u) +# define _ANDIS_(d,a,u) _FDu(29,a,d,u) +# define _B(t) _FI(18,t,0,0) +# define _BA(t) _FI(18,t,1,0) +# define _BL(t) _FI(18,t,0,1) +# define _BLA(t) _FI(18,t,1,1) +# define _BC(o,i,t) _FB(16,o,i,t,0,0) +# define _BCA(o,i,t) _FB(16,o,i,t,1,0) +# define _BCL(o,i,t) _FB(16,o,i,t,0,1) +# define _BCLA(o,i,t) _FB(16,o,i,t,1,1) +# define _BLT(t) _BC(BCC_T,CR_LT,t) +# define _BLE(t) _BC(BCC_F,CR_GT,t) +# define _BEQ(t) _BC(BCC_T,CR_EQ,t) +# define _BGE(t) _BC(BCC_F,CR_LT,t) +# define _BGT(t) _BC(BCC_T,CR_GT,t) +# define _BNE(t) _BC(BCC_F,CR_EQ,t) +# define _BUN(t) _BC(BCC_T,CR_UN,t) +# define _BNU(t) _BC(BCC_F,CR_UN,t) +# define _BCCTR(o,i) _FXL(19,o,i,528) +# define _BCCTRL(o,i) _FXL_(19,o,i,528) +# define _BLTCTR() _BCCTR(BCC_T,CR_LT) +# define _BLECTR() _BCCTR(BCC_F,CR_GT) +# define _BEQCTR() _BCCTR(BCC_T,CR_EQ) +# define _BGECTR() _BCCTR(BCC_F,CR_LT) +# define _BGTCTR() _BCCTR(BCC_T,CR_GT) +# define _BNECTR() _BCCTR(BCC_F,CR_EQ) +# define _BCTR() _BCCTR(20,0) +# define _BCTRL() _BCCTRL(20,0) +# define _BCLR(o,i) _FXL(19,o,i,16) +# define _BCLRL(o,i) _FXL_(19,o,i,16) +# define _BLTLR() _BCLR(BCC_T,CR_LT) +# define _BLELR() _BCLR(BCC_F,CR_GT) +# define _BEQLR() _BCLR(BCC_T,CR_EQ) +# define _BGELR() _BCLR(BCC_F,CR_LT) +# define _BGTLR() _BCLR(BCC_T,CR_GT) +# define _BNELR() _BCLR(BCC_F,CR_EQ) +# define _BLR() _BCLR(20,0) +# define _BLRL() _BCLRL(20,0) +# define _XCMP(cr,l,a,b) _FC(31,cr,l,a,b,0) +# define _CMPD(a,b) _XCMP(0,1,a,b) +# define _CMPW(a,b) _XCMP(0,0,a,b) +# define _XCMPI(cr,l,a,s) _FCI(11,cr,l,a,s) +# define _CMPDI(a,s) _XCMPI(0,1,a,s) +# define _CMPWI(a,s) _XCMPI(0,0,a,s) +# define _XCMPL(cr,l,a,b) _FC(31,cr,l,a,b,32) +# define _CMPLD(a,b) _XCMPL(0,1,a,b) +# define _CMPLW(a,b) _XCMPL(0,0,a,b) +# define _XCMPLI(cr,l,a,u) _FCI(10,cr,l,a,u) +# define _CMPLDI(a,s) _XCMPLI(0,1,a,s) +# define _CMPLWI(a,s) _XCMPLI(0,0,a,s) +# define _CNTLZW(a,s) _FX(31,s,a,0,26) +# define _CNTLZW_(a,s) _FX_(31,s,a,0,26) +# define _CRAND(d,a,b) _FX(19,d,a,b,257) +# define _CRANDC(d,a,b) _FX(19,d,a,b,129) +# define _CREQV(d,a,b) _FX(19,d,a,b,289) +# define _CRSET(d) _CREQV(d,d,d) +# define _CRNAND(d,a,b) _FX(19,d,a,b,225) +# define _CRNOR(d,a,b) _FX(19,d,a,b,33) +# define _CRNOT(d,a) _CRNOR(d,a,a) +# define _CROR(d,a,b) _FX(19,d,a,b,449) +# define _CRMOVE(d,a) _CROR(d,a,a) +# define _CRORC(d,a,b) _FX(19,d,a,b,417) +# define _CRXOR(d,a,b) _FX(19,d,a,b,193) +# define _CRCLR(d) _CRXOR(d,d,d) +# define _DCBA(a,b) _FX(31,0,a,b,758) +# define _DCBF(a,b) _FX(31,0,a,b,86) +# define _DCBI(a,b) _FX(31,0,a,b,470) +# define _DCBST(a,b) _FX(31,0,a,b,54) +# define _DCBT(a,b) _FX(31,0,a,b,278) +# define _DCBTST(a,b) _FX(31,0,a,b,246) +# define _DCBZ(a,b) _FX(31,0,a,b,1014) +# define _DIVW(d,a,b) _FXO(31,d,a,b,0,491) +# define _DIVW_(d,a,b) _FXO_(31,d,a,b,0,491) +# define _DIVWO(d,a,b) _FXO(31,d,a,b,1,491) +# define _DIVWO_(d,a,b) _FXO_(31,d,a,b,1,491) +# define _DIVWU(d,a,b) _FXO(31,d,a,b,0,459) +# define _DIVWU_(d,a,b) _FXO_(31,d,a,b,0,459) +# define _DIVWUO(d,a,b) _FXO(31,d,a,b,1,459) +# define _DIVWUO_(d,a,b) _FXO_(31,d,a,b,1,459) +# define _DIVD(d,a,b) _FXO(31,d,a,b,0,489) +# define _DIVD_(d,a,b) _FXO_(31,d,a,b,0,489) +# define _DIVDO(d,a,b) _FXO(31,d,a,b,1,489) +# define _DIVDO_(d,a,b) _FXO_(31,d,a,b,1,489) +# define _DIVDU(d,a,b) _FXO(31,d,a,b,0,457) +# define _DIVDU_(d,a,b) _FXO_(31,d,a,b,0,457) +# define _DIVDUO(d,a,b) _FXO(31,d,a,b,1,457) +# define _DIVDUO_(d,a,b) _FXO_(31,d,a,b,1,457) +# define _ECIWX(d,a,b) _FX(31,d,a,b,310) +# define _ECOWX(s,a,b) _FX(31,s,a,b,438) +# define _EIEIO() _FX(31,0,0,0,854) +# define _EQV(d,a,b) _FX(31,a,d,b,284) +# define _EQV_(d,a,b) _FX_(31,a,d,b,284) +# define _EXTSB(d,a) _FX(31,a,d,0,954) +# define _EXTSB_(d,a) _FX_(31,a,d,0,954) +# define _EXTSH(d,a) _FX(31,a,d,0,922) +# define _EXTSH_(d,a) _FX_(31,a,d,0,922) +# define _EXTSW(d,a) _FX(31,a,d,0,986) +# define _EXTSW_(d,a) _FX_(31,a,d,0,986) +# define _ICIB(a,b) _FX(31,0,a,b,982) +# define _ISYNC() _FXL(19,0,0,150) +# define _LBZ(d,a,s) _FDs(34,d,a,s) +# define _LBZU(d,a,s) _FDs(35,d,a,s) +# define _LBZUX(d,a,b) _FX(31,d,a,b,119) +# define _LBZX(d,a,b) _FX(31,d,a,b,87) +# define _LHA(d,a,s) _FDs(42,d,a,s) +# define _LHAU(d,a,s) _FDs(43,d,a,s) +# define _LHAUX(d,a,b) _FX(31,d,a,b,375) +# define _LHAX(d,a,b) _FX(31,d,a,b,343) +# define _LHRBX(d,a,b) _FX(31,d,a,b,790) +# define _LHZ(d,a,s) _FDs(40,d,a,s) +# define _LHZU(d,a,s) _FDs(41,d,a,s) +# define _LHZUX(d,a,b) _FX(31,d,a,b,311) +# define _LHZX(d,a,b) _FX(31,d,a,b,279) +# define _LA(d,a,s) _ADDI(d,a,s) +# define _LI(d,s) _ADDI(d,0,s) +# define _LMW(d,a,s) _FDs(46,d,a,s) +# define _LSWI(d,a,n) _FX(31,d,a,n,597) +# define _LSWX(d,a,b) _FX(31,d,a,b,533) +# define _LWARX(d,a,b) _FX(31,d,a,b,20) +# define _LWBRX(d,a,b) _FX(31,d,a,b,534) +# define _LWA(d,a,s) _FDs(58,d,a,s|2) +# define _LWAUX(d,a,b) _FX(31,d,a,b,373) +# define _LWAX(d,a,b) _FX(31,d,a,b,341) +# define _LWZ(d,a,s) _FDs(32,d,a,s) +# define _LWZU(d,a,s) _FDs(33,d,a,s) +# define _LWZUX(d,a,b) _FX(31,d,a,b,55) +# define _LWZX(d,a,b) _FX(31,d,a,b,23) +# define _LD(d,a,s) _FDs(58,d,a,s) +# define _LDX(d,a,b) _FX(31,d,a,b,21) +# define _MCRF(d,s) _FXL(19,d<<2,(s)<<2,0) +/* In case instruction is emulated, check the kernel can handle it. + Will only generate it if DEBUG is enabled. +""" +Chapter 6. Optional Facilities and Instructions that are being +Phased Out of the Architecture +... +6.1 Move To Condition Register from XER +The mcrxr instruction is being phased out of the archi- +tecture. Its description is included here as an aid to +constructing operating system code to emulate it. + +Move to Condition Register from XER +X-form +mcrxr BF +31 BF // /// /// 512 / +0 6 9 11 16 21 31 +CR(4xBF:4xBF+3) <- XER(32:35) +XER(32:35) <- 0b0000 +The contents of XER(32:35) are copied to Condition Reg- +ister field BF. XER(32:35) are set to zero. +Special Registers Altered: +CR field BF XER(32:35) + +Programming Note +Warning: This instruction has been phased out of +the architecture. Attempting to execute this +instruction will cause the system illegal instruction +error handler to be invoked +""" + */ +static void mcrxr(jit_state_t*, int32_t); +# define _MFCR(d) _FX(31,d,0,0,19) +# define _MFMSR(d) _FX(31,d,0,0,83) +# define _MFSPR(d,s) _FXFX(31,d,s<<5,339) +# define _MFXER(d) _MFSPR(d,1) +# define _MFLR(d) _MFSPR(d,8) +# define _MFCTR(d) _MFSPR(d,9) +# define _MFSR(d,s) _FX(31,d,s,0,595) +# define _MFSRIN(d,b) _FX(31,d,0,b,659) +# define _MFTB(d,x,y) _FXFX(31,d,(x)|((y)<<5),371) +# define _MFTBL(d) _MFTB(d,8,12) +# define _MFTBU(d) _MFTB(d,8,13) +# define _MTCRF(c,s) _FXFX(31,s,c<<1,144) +# define _MTCR(s) _MTCRF(0xff,s) +# define _MTMSR(s) _FX(31,s,0,0,146) +# define _MTSPR(d,s) _FXFX(31,d,s<<5,467) +# define _MTXER(d) _MTSPR(d,1) +# define _MTLR(d) _MTSPR(d,8) +# define _MTCTR(d) _MTSPR(d,9) +# define _MTSR(r,s) _FX(31,s<<1,r,0,210) +# define _MTSRIN(r,b) _FX(31,r<<1,0,b,242) +# define _MULLI(d,a,s) _FDs(07,d,a,s) +# define _MULHW(d,a,b) _FXO(31,d,a,b,0,75) +# define _MULHW_(d,a,b) _FXO_(31,d,a,b,0,75) +# define _MULHWU(d,a,b) _FXO(31,d,a,b,0,11) +# define _MULHWU_(d,a,b) _FXO_(31,d,a,b,0,11) +# define _MULLW(d,a,b) _FXO(31,d,a,b,0,235) +# define _MULLW_(d,a,b) _FXO_(31,d,a,b,0,235) +# define _MULLWO(d,a,b) _FXO(31,d,a,b,1,235) +# define _MULLWO_(d,a,b) _FXO_(31,d,a,b,1,235) +# define _MULHD(d,a,b) _FXO(31,d,a,b,0,73) +# define _MULHD_(d,a,b) _FXO_(31,d,a,b,0,73) +# define _MULHDU(d,a,b) _FXO(31,d,a,b,0,9) +# define _MULHDU_(d,a,b) _FXO_(31,d,a,b,0,9) +# define _MULLD(d,a,b) _FXO(31,d,a,b,0,233) +# define _MULLD_(d,a,b) _FXO_(31,d,a,b,0,233) +# define _MULLDO(d,a,b) _FXO(31,d,a,b,1,233) +# define _MULLDO_(d,a,b) _FXO_(31,d,a,b,1,233) +# define _NAND(d,a,b) _FX(31,a,d,b,476) +# define _NAND_(d,a,b) _FX_(31,a,d,b,476) +# define _NEG(d,a) _FXO(31,d,a,0,0,104) +# define _NEG_(d,a) _FXO_(31,d,a,0,0,104) +# define _NEGO(d,a) _FXO(31,d,a,0,1,104) +# define _NEGO_(d,a) _FXO_(31,d,a,0,1,104) +# define _NOR(d,a,b) _FX(31,a,d,b,124) +# define _NOR_(d,a,b) _FX_(31,a,d,b,124) +# define _NOT(d,s) _NOR(d,s,s) +# define _OR(d,a,b) _FX(31,a,d,b,444) +# define _OR_(d,a,b) _FX_(31,a,d,b,444) +# define _MR(d,a) _OR(d,a,a) +# define _ORC(d,a,b) _FX(31,a,d,b,412) +# define _ORC_(d,a,b) _FX_(31,a,d,b,412) +# define _ORI(d,a,u) _FDu(24,a,d,u) +# define _NOP() _ORI(0,0,0) +# define _ORIS(d,a,u) _FDu(25,a,d,u) +# define _RFI() _FXL(19,0,0,50) +# define _RLWIMI(d,s,h,b,e) _FM(20,s,d,h,b,e,0) +# define _RLWIMI_(d,s,h,b,e) _FM(20,s,d,h,b,e,1) +# define _INSLWI(a,s,n,b) _RLWIMI(a,s,32-b,b,b+n-1) +# define _INSRWI(a,s,n,b) _RLWIMI(a,s,32-(b+n),b,(b+n)-1) +# define _RLWINM(a,s,h,b,e) _FM(21,s,a,h,b,e,0) +# define _RLWINM_(a,s,h,b,e) _FM(21,s,a,h,b,e,1) +# define _EXTLWI(a,s,n,b) _RLWINM(a,s,b,0,n-1) +# define _EXTRWI(a,s,n,b) _RLWINM(a,s,b+n,32-n,31) +# define _ROTLWI(a,s,n) _RLWINM(a,s,n,0,31) +# define _ROTRWI(a,s,n) _RLWINM(a,s,32-n,0,31) +# define _SLWI(a,s,n) _RLWINM(a,s,n,0,31-n) +# define _SRWI(a,s,n) _RLWINM(a,s,32-n,n,31) +# define _CLRLWI(a,s,n) _RLWINM(a,s,0,n,31) +# define _CLRRWI(a,s,n) _RLWINM(a,s,0,0,31-n) +# define _CLRLSWI(a,s,b,n) _RLWINM(a,s,n,b-n,31-n) +# define _RLWNM(a,s,b,m,e) _FM(23,s,a,b,m,e,0) +# define _RLWNM_(a,s,b,m,e) _FM(23,s,a,b,m,e,1) +# define _ROTLW(a,s,b) _RLWNM(a,s,b,0,31) +# define _SC() _FDu(17,0,0,2) +# define _SLW(a,s,b) _FX(31,s,a,b,24) +# define _SLW_(a,s,b) _FX_(31,s,a,b,24) +# define _SRAW(a,s,b) _FX(31,s,a,b,792) +# define _SRAW_(a,s,b) _FX_(31,s,a,b,792) +# define _SRAWI(a,s,h) _FX(31,s,a,h,824) +# define _SRAWI_(a,s,h) _FX_(31,s,a,h,824) +# define _SRW(a,s,b) _FX(31,s,a,b,536) +# define _SRW_(a,s,b) _FX_(31,s,a,b,536) +# if __WORDSIZE == 64 +# define _RLDICL(a,s,h,b) _FMD(30,s,a,h&~32,b,0,h>>5) +# define _RLDICL_(a,s,h,b) _FMD_(30,s,a,h&~32,b,0,h>>5) +# define _EXTRDI(x,y,n,b) _RLDICL(x,y,(b+n),(64-n)) +# define _SRDI(x,y,n) _RLDICL(x,y,(64-n),n) +# define _CLRLDI(x,y,n) _RLDICL(x,y,0,n) +# define _RLDICR(a,s,h,e) _FMD(30,s,a,h&~32,e,1,h>>5) +# define _RLDICR_(a,s,h,e) _FMD_(30,s,a,h&~32,e,1,h>>5) +# define _EXTRLI(x,y,n,b) _RLDICR(x,y,b,(n-1)) +# define _SLDI(x,y,n) _RLDICR(x,y,n,(63-n)) +# define _CLRRDI(x,y,n) _RLDICR(x,y,0,(63-n)) +# define _RLDIC(a,s,h,b) _FMD(30,s,a,h&~32,b,2,h>>5) +# define _RLDIC_(a,s,h,b) _FMD_(30,s,a,h&~32,b,2,h>>5) +# define _CLRLSLDI(x,y,b,n) _RLDIC(x,y,n,(b-n)) +# define _RLDCL(a,s,h,b) _FMDS(30,s,a,h,b,8) +# define _RLDCL_(a,s,h,b) _FMDS_(30,s,a,h,b,8) +# define _ROTLD(x,y,z) _RLDCL(x,y,z,0) +# define _RLDCR(a,s,b,e) _FMDS(30,s,a,b,e,0) +# define _RLDCR_(a,s,b,e) _FMDS_(30,s,a,b,e,0) +# define _RLDIMI(a,s,h,b) _FMD(30,s,a,h&~32,b,3,h>>5) +# define _RLDIMI_(a,s,h,b) _FMD_(30,s,a,h&~32,b,3,h>>5) +# define _INSRDI(x,y,n,b) _RLDIMI(x,y,(64-(b+n)),b) +# define _SLD(a,s,b) _FX(31,s,a,b,27) +# define _SLD_(a,s,b) _FX_(31,s,a,b,27) +# define _SRD(a,s,b) _FX(31,s,a,b,539) +# define _SRD_(a,s,b) _FX_(31,s,a,b,539) +# define _SRADI(a,s,h) _FXS(31,s,a,h&~32,413,h>>5) +# define _SRADI_(a,s,h) _FXS_(31,s,a,h&~32,413,h>>5) +# define _SRAD(a,s,b) _FX(31,s,a,b,794) +# define _SRAD_(a,s,b) _FX_(31,s,a,b,794) +# endif +# define _STB(s,a,d) _FDs(38,s,a,d) +# define _STBU(s,a,d) _FDs(39,s,a,d) +# define _STBUX(s,a,b) _FX(31,s,a,b,247) +# define _STBX(s,a,b) _FX(31,s,a,b,215) +# define _STH(s,a,d) _FDs(44,s,a,d) +# define _STHBRX(s,a,b) _FX(31,s,a,b,918) +# define _STHU(s,a,d) _FDs(45,s,a,d) +# define _STHUX(s,a,b) _FX(31,s,a,b,439) +# define _STHX(s,a,b) _FX(31,s,a,b,407) +# define _STMW(s,a,d) _FDs(47,s,a,d) +# define _STWSI(s,a,nb) _FX(31,s,a,nb,725) +# define _STSWX(s,a,b) _FX(31,s,a,b,661) +# define _STW(s,a,d) _FDs(36,s,a,d) +# define _STWBRX(s,a,b) _FX(31,s,a,b,662) +# define _STWCX_(s,a,b) _FX_(31,s,a,b,150) +# define _STWU(s,a,d) _FDs(37,s,a,d) +# define _STWUX(s,a,b) _FX(31,s,a,b,183) +# define _STWX(s,a,b) _FX(31,s,a,b,151) +# define _STD(s,a,d) _FDs(62,s,a,d) +# define _STDX(s,a,b) _FX(31,s,a,b,149) +# define _STDCX(s,a,b) _FX_(31,s,a,b,214) +# define _STDU(s,a,d) _FDs(62,s,a,d|1) +# define _STDUX(s,a,b) _FX(31,s,a,b,181) +# define _SUBF(d,a,b) _FXO(31,d,a,b,0,40) +# define _SUBF_(d,a,b) _FXO_(31,d,a,b,0,40) +# define _SUBFO(d,a,b) _FXO(31,d,a,b,1,40) +# define _SUBFO_(d,a,b) _FXO_(31,d,a,b,1,40) +# define _SUB(d,a,b) _SUBF(d,b,a) +# define _SUB_(d,a,b) _SUBF_(d,b,a) +# define _SUBO(d,a,b) _SUBFO(d,b,a) +# define _SUBO_(d,a,b) _SUBFO_(d,b,a) +# define _SUBI(d,a,s) _ADDI(d,a,-s) +# define _SUBIS(d,a,s) _ADDIS(d,a,-s) +# define _SUBFC(d,a,b) _FXO(31,d,a,b,0,8) +# define _SUBFC_(d,a,b) _FXO_(31,d,a,b,0,8) +# define _SUBFCO(d,a,b) _FXO(31,d,a,b,1,8) +# define _SUBFCO_(d,a,b) _FXO_(31,d,a,b,1,8) +# define _SUBC(d,a,b) _SUBFC(d,b,a) +# define _SUBIC(d,a,s) _ADDIC(d,a,-s) +# define _SUBIC_(d,a,s) _ADDIC_(d,a,-s) +# define _SUBFE(d,a,b) _FXO(31,d,a,b,0,136) +# define _SUBFE_(d,a,b) _FXO_(31,d,a,b,0,136) +# define _SUBFEO(d,a,b) _FXO(31,d,a,b,1,136) +# define _SUBFEO_(d,a,b) _FXO_(31,d,a,b,1,136) +# define _SUBE(d,a,b) _SUBFE(d,b,a) +# define _SUBFIC(d,a,s) _FDs(8,d,a,s) +# define _SUBFME(d,a) _FXO(31,d,a,0,0,232) +# define _SUBFME_(d,a) _FXO_(31,d,a,0,0,232) +# define _SUBFMEO(d,a) _FXO(31,d,a,0,1,232) +# define _SUBFMEO_(d,a) _FXO_(31,d,a,0,1,232) +# define _SUBFZE(d,a) _FXO(31,d,a,0,0,200) +# define _SUBFZE_(d,a) _FXO_(31,d,a,0,0,200) +# define _SUBFZEO(d,a) _FXO(31,d,a,0,1,200) +# define _SUBFZEO_(d,a) _FXO_(31,d,a,0,1,200) +# define _SYNC(l, sc) _FX(31,l,sc,0,598) +# define _TLBIA() _FX(31,0,0,0,370) +# define _TLBIE(b) _FX(31,0,0,b,306) +# define _TLBSYNC() _FX(31,0,0,0,566) +# define _TW(t,a,b) _FX(31,t,a,b,4) +# define _TWEQ(a,b) _FX(31,4,a,b,4) +# define _TWLGE(a,b) _FX(31,5,a,b,4) +# define _TRAP() _FX(31,31,0,0,4) +# define _TWI(t,a,s) _FDs(3,t,a,s) +# define _TWGTI(a,s) _TWI(8,a,s) +# define _TWLLEI(a,s) _TWI(6,a,s) +# define _XOR(d,a,b) _FX(31,a,d,b,316) +# define _XOR_(d,a,b) _FX_(31,a,d,b,316) +# define _XORI(s,a,u) _FDu(26,a,s,u) +# define _XORIS(s,a,u) _FDu(27,a,s,u) + +// Atomics +# define _LDARX(rt, ra, rb) _FX(31, rt, ra, rb, 84) +# define _HWSYNC() _SYNC(0, 0) + +#if __WORDSIZE == 64 +# define _STX(r0, r1, o) _STD(r0, r1, o) +# define _LXX(r0, r1, o) _LD(r0, r1, o) +# define _CMPX(r0, r1) _CMPD(r0, r1) +# define _CMPXI(r0, i0) _CMPDI(r0, i0) +# define _CMPLX(r0, r1) _CMPLD(r0, r1) +# define _CMPLXI(r0, u0) _CMPLDI(r0, u0) +# define _LXARX(r0, r1) _LDARX(r0, 0, r1) +# define _STXCX(r0, r1) _STDCX(r0, 0, r1) +#else +# define _STX(r0, r1, o) _STW(r0, r1, o) +# define _LXX(r0, r1, o) _LW(r0, r1, o) +# define _CMPX(r0, r1) _CMPW(r0, r1) +# define _CMPXI(r0, i0) _CMPWI(r0, i0) +# define _CMPLX(r0, r1) _CMPLW(r0, r1) +# define _CMPLXI(r0, u0) _CMPLWI(r0, u0) +# define _LXARX(r0, r1) _LWARX(r0, 0, r1) +# define _STXCX(r0, r1) _STWCX(r0, 0, r1) +#endif + +static void nop(jit_state_t*,int32_t); +static void movr(jit_state_t*,int32_t,int32_t); +static void movi(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t mov_addr(jit_state_t *,int32_t); +static jit_reloc_t movi_from_immediate(jit_state_t*,int32_t); +static void emit_immediate_reloc(jit_state_t*,int32_t,jit_bool_t); + +static void bswapr_us(jit_state_t*,int32_t,int32_t); +static void bswapr_ui(jit_state_t*,int32_t,int32_t); +static void bswapr_ul(jit_state_t*,int32_t,int32_t); + +static void addr(jit_state_t*,int32_t,int32_t,int32_t); +static void addi(jit_state_t*,int32_t,int32_t,jit_word_t); + +static void addcr(jit_state_t*,int32_t,int32_t,int32_t); +static void addci(jit_state_t*,int32_t,int32_t,jit_word_t); + +static void addxr(jit_state_t*,int32_t,int32_t,int32_t); +static void addxi(jit_state_t*,int32_t,int32_t,jit_word_t); + +static void subr(jit_state_t*,int32_t,int32_t,int32_t); +static void subi(jit_state_t*,int32_t,int32_t,jit_word_t); + +static void subcr(jit_state_t*,int32_t,int32_t,int32_t); +static void subci(jit_state_t*,int32_t,int32_t,jit_word_t); + +static void subxr(jit_state_t*,int32_t,int32_t,int32_t); +static void subxi(jit_state_t*,int32_t,int32_t,jit_word_t); + +# if __WORDSIZE == 32 +# define _MULR(r0,r1,r2) _MULLW(r0,r1,r2) +# define _MULLR(r0,r1,r2) _MULLW(r0,r1,r2) +# define _MULHR(r0,r1,r2) _MULHW(r0,r1,r2) +# define _MULHR_U(r0,r1,r2) _MULHWU(r0,r1,r2) +# else +# define _MULR(r0,r1,r2) _MULLD(r0,r1,r2) +# define _MULLR(r0,r1,r2) _MULLD(r0,r1,r2) +# define _MULHR(r0,r1,r2) _MULHD(r0,r1,r2) +# define _MULHR_U(r0,r1,r2) _MULHDU(r0,r1,r2) +# endif + +static void mulr(jit_state_t*,int32_t,int32_t,int32_t); +static void muli(jit_state_t*,int32_t,int32_t,jit_word_t); + +static void qmulr(jit_state_t*,int32_t,int32_t,int32_t,int32_t); +static void qmulr_u(jit_state_t*,int32_t,int32_t,int32_t,int32_t); +static void iqmulr(jit_state_t*,int32_t,int32_t, + int32_t,int32_t,jit_bool_t); + +static void qmuli(jit_state_t*,int32_t,int32_t,int32_t,jit_word_t); +static void qmuli_u(jit_state_t*,int32_t,int32_t,int32_t,jit_word_t); +static void iqmuli(jit_state_t*,int32_t,int32_t, + int32_t,jit_word_t,jit_bool_t); +# if __WORDSIZE == 32 +# define _DIVR(r0,r1,r2) _DIVW(r0,r1,r2) +# else +# define _DIVR(r0,r1,r2) _DIVD(r0,r1,r2) +# endif + +static void divr(jit_state_t*,int32_t,int32_t,int32_t); +static void divi(jit_state_t*,int32_t,int32_t,jit_word_t); + +# if __WORDSIZE == 32 +# define _DIVR_U(r0,r1,r2) _DIVWU(r0,r1,r2) +# else +# define _DIVR_U(r0,r1,r2) _DIVDU(r0,r1,r2) +# endif + +static void divr_u(jit_state_t*,int32_t,int32_t,int32_t); +static void divi_u(jit_state_t*,int32_t,int32_t,jit_word_t); + +static void qdivr(jit_state_t*,int32_t,int32_t,int32_t,int32_t); +static void qdivr_u(jit_state_t*,int32_t,int32_t,int32_t,int32_t); +static void iqdivr(jit_state_t*,int32_t,int32_t, + int32_t,int32_t,jit_bool_t); + +static void qdivi(jit_state_t*,int32_t,int32_t,int32_t,jit_word_t); +static void qdivi_u(jit_state_t*,int32_t,int32_t,int32_t,jit_word_t); +static void iqdivi(jit_state_t*,int32_t,int32_t, + int32_t,jit_word_t,jit_bool_t); + +static void remr(jit_state_t*,int32_t,int32_t,int32_t); +static void remi(jit_state_t*,int32_t,int32_t,jit_word_t); +static void remr_u(jit_state_t*,int32_t,int32_t,int32_t); +static void remi_u(jit_state_t*,int32_t,int32_t,jit_word_t); + +static void andr(jit_state_t*,int32_t,int32_t,int32_t); +static void andi(jit_state_t*,int32_t,int32_t,jit_word_t); + +static void orr(jit_state_t*,int32_t,int32_t,int32_t); +static void ori(jit_state_t*,int32_t,int32_t,jit_word_t); + +static void xorr(jit_state_t*,int32_t,int32_t,int32_t); +static void xori(jit_state_t*,int32_t,int32_t,jit_word_t); + +# if __WORDSIZE == 32 +# define _LSHR(r0,r1,r2) _SLW(r0,r1,r2) +# else +# define _LSHR(r0,r1,r2) _SLD(r0,r1,r2) +# endif + +static void lshr(jit_state_t*,int32_t,int32_t,int32_t); +static void lshi(jit_state_t*,int32_t,int32_t,jit_word_t); + +# if __WORDSIZE == 32 +# define _RSHR(r0,r1,r2) _SRAW(r0,r1,r2) +# else +# define _RSHR(r0,r1,r2) _SRAD(r0,r1,r2) +# endif + +static void rshr(jit_state_t*,int32_t,int32_t,int32_t); +static void rshi(jit_state_t*,int32_t,int32_t,jit_word_t); + +# if __WORDSIZE == 32 +# define _RSHR_U(r0,r1,r2) _SRW(r0,r1,r2) +# else +# define _RSHR_U(r0,r1,r2) _SRD(r0,r1,r2) +# endif + +static void rshr_u(jit_state_t*,int32_t,int32_t,int32_t); +static void rshi_u(jit_state_t*,int32_t,int32_t,jit_word_t); + +static jit_reloc_t bltr(jit_state_t*,int32_t,int32_t); +static jit_reloc_t blti(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t bltr_u(jit_state_t*,int32_t,int32_t); +static jit_reloc_t blti_u(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t bler(jit_state_t*,int32_t,int32_t); +static jit_reloc_t blei(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t bler_u(jit_state_t*,int32_t,int32_t); +static jit_reloc_t blei_u(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t beqr(jit_state_t*,int32_t,int32_t); +static jit_reloc_t beqi(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t bger(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bgei(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t bger_u(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bgei_u(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t bgtr(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bgti(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t bgtr_u(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bgti_u(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t bner(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bnei(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t bmsr(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bmsi(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t bmcr(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bmci(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t boaddr(jit_state_t*,int32_t,int32_t); +static jit_reloc_t boaddi(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t bxaddr(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bxaddi(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t bosubr(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bosubi(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t bxsubr(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bxsubi(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t boaddr_u(jit_state_t*,int32_t,int32_t); +static jit_reloc_t boaddi_u(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t bxaddr_u(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bxaddi_u(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t bosubr_u(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bosubi_u(jit_state_t*,int32_t,jit_word_t); +static jit_reloc_t bxsubr_u(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bxsubi_u(jit_state_t*,int32_t,jit_word_t); + +static void ldr_c(jit_state_t*,int32_t,int32_t); +static void ldi_c(jit_state_t*,int32_t,jit_word_t); +static void ldxr_c(jit_state_t*,int32_t,int32_t,int32_t); +static void ldxi_c(jit_state_t*,int32_t,int32_t,jit_word_t); +static void ldr_uc(jit_state_t*,int32_t,int32_t); +static void ldi_uc(jit_state_t*,int32_t,jit_word_t); +static void ldxr_uc(jit_state_t*,int32_t,int32_t,int32_t); +static void ldxi_uc(jit_state_t*,int32_t,int32_t,jit_word_t); +static void ldr_s(jit_state_t*,int32_t,int32_t); +static void ldi_s(jit_state_t*,int32_t,jit_word_t); +static void ldxr_s(jit_state_t*,int32_t,int32_t,int32_t); +static void ldxi_s(jit_state_t*,int32_t,int32_t,jit_word_t); +static void ldr_us(jit_state_t*,int32_t,int32_t); +static void ldi_us(jit_state_t*,int32_t,jit_word_t); +static void ldxr_us(jit_state_t*,int32_t,int32_t,int32_t); +static void ldxi_us(jit_state_t*,int32_t,int32_t,jit_word_t); + +# if __WORDSIZE == 32 +# define _LDR_I(r0,r1) _LWZX(r0, rn(_R0), r1) +# else +# define _LDR_I(r0,r1) _LWAX(r0, rn(_R0), r1) +# endif + +static void ldr_i(jit_state_t*,int32_t,int32_t); +static void ldi_i(jit_state_t*,int32_t,jit_word_t); +static void ldxr_i(jit_state_t*,int32_t,int32_t,int32_t); +static void ldxi_i(jit_state_t*,int32_t,int32_t,jit_word_t); + +# if __WORDSIZE == 64 +static void ldr_ui(jit_state_t*,int32_t,int32_t); +static void ldi_ui(jit_state_t*,int32_t,jit_word_t); +static void ldxr_ui(jit_state_t*,int32_t,int32_t,int32_t); +static void ldxi_ui(jit_state_t*,int32_t,int32_t,jit_word_t); +static void ldr_l(jit_state_t*,int32_t,int32_t); +static void ldi_l(jit_state_t*,int32_t,jit_word_t); +static void ldxr_l(jit_state_t*,int32_t,int32_t,int32_t); +static void ldxi_l(jit_state_t*,int32_t,int32_t,jit_word_t); +# endif + +static void str_c(jit_state_t*,int32_t,int32_t); +static void sti_c(jit_state_t*,jit_word_t,int32_t); +static void stxr_c(jit_state_t*,int32_t,int32_t,int32_t); +static void stxi_c(jit_state_t*,jit_word_t,int32_t,int32_t); +static void str_s(jit_state_t*,int32_t,int32_t); +static void sti_s(jit_state_t*,jit_word_t,int32_t); +static void stxr_s(jit_state_t*,int32_t,int32_t,int32_t); +static void stxi_s(jit_state_t*,jit_word_t,int32_t,int32_t); +static void str_i(jit_state_t*,int32_t,int32_t); +static void sti_i(jit_state_t*,jit_word_t,int32_t); +static void stxr_i(jit_state_t*,int32_t,int32_t,int32_t); +static void stxi_i(jit_state_t*,jit_word_t,int32_t,int32_t); + +# if __WORDSIZE == 64 +static void str_l(jit_state_t*,int32_t,int32_t); +static void sti_l(jit_state_t*,jit_word_t,int32_t); +static void stxr_l(jit_state_t*,int32_t,int32_t,int32_t); +static void stxi_l(jit_state_t*,jit_word_t,int32_t,int32_t); +# endif + +static jit_reloc_t jmp(jit_state_t*); +static void jmpr(jit_state_t*,int32_t); +static void jmpi(jit_state_t*,jit_word_t); +static void callr(jit_state_t*,int32_t); +static void calli(jit_state_t*,jit_word_t); + +static void push_link_register(jit_state_t *); +static void pop_link_register(jit_state_t *); + +# define _u16(v) ((v) & 0xffff) +# define _u26(v) ((v) & 0x3ffffff) +static uint32_t +FXO(int o, int d, int a, int b, int e, int x, int r) +{ + assert(!(o & ~((1 << 6) - 1))); + assert(!(d & ~((1 << 5) - 1))); + assert(!(a & ~((1 << 5) - 1))); + assert(!(b & ~((1 << 5) - 1))); + assert(!(e & ~((1 << 1) - 1))); + assert(!(x & ~((1 << 9) - 1))); + assert(!(r & ~((1 << 1) - 1))); + instr_t ins = {.XO = {.po = o, .rt = d, .ra = a, .rb = b, .u0 = e, .xo = x, .u1 = r}}; + return ins.w; +} + +static uint32_t +FDs(int o, int d, int a, int s) +{ + assert(!(o & ~((1 << 6) - 1))); + assert(!(d & ~((1 << 5) - 1))); + assert(!(a & ~((1 << 5) - 1))); + assert(can_sign_extend_short_p(s)); + instr_t ins = {.D = {.po = o, .rx = d, .ra = a, .d = _u16(s)}}; + return ins.w; +} + +static uint32_t +FDu(int o, int d, int a, int s) +{ + assert(!(o & ~((1 << 6) - 1))); + assert(!(d & ~((1 << 5) - 1))); + assert(!(a & ~((1 << 5) - 1))); + assert(can_zero_extend_short_p(s)); + instr_t ins = {.D = {.po = o, .rx = d, .ra = a, .d = _u16(s)}}; + return ins.w; +} + +static uint32_t +FX(int o, int s, int a, int b, int x, int r) +{ + assert(!(o & ~((1 << 6) - 1))); + assert(!(s & ~((1 << 5) - 1))); + assert(!(a & ~((1 << 5) - 1))); + assert(!(b & ~((1 << 5) - 1))); + assert(!(x & ~((1 << 10) - 1))); + assert(!(r & ~((1 << 1) - 1))); + instr_t ins = {.X = {.po = o, .f0 = s, .ra = a, .rb = b, .xo = x, .u0 = r}}; + return ins.w; +} + +static uint32_t +FI(int o, int t, int a, int k) +{ + assert(!(o & ~(( 1 << 6) - 1))); + assert(!(t & 3) && can_sign_extend_jump_p(t)); + assert(!(a & ~(( 1 << 1) - 1))); + assert(!(k & ~(( 1 << 1) - 1))); + instr_t ins = {.I = {.po = o, .li = _u26(t) >> 2, .aa = a, .lk = k}}; + return ins.w; +} + +static uint32_t +FB(int o, int bo, int bi, int t, int a, int k) +{ + assert(!( o & ~((1 << 6) - 1))); + assert(!(bo & ~((1 << 5) - 1))); + assert(!(bi & ~((1 << 5) - 1))); + assert(!(t & 3) && can_sign_extend_short_p(t)); + assert(!(a & ~(( 1 << 1) - 1))); + assert(!(k & ~(( 1 << 1) - 1))); + instr_t ins = {.B = {.po = o, .bo = bo, .bi = bi, .bd = _u16(t) >> 2, .aa = a, .lk = k}}; + return ins.w; +} + +static uint32_t +FXL(int o, int bo, int bi, int x, int k) +{ + assert(!( o & ~((1 << 6) - 1))); + assert(!(bo & ~((1 << 5) - 1))); + assert(!(bi & ~((1 << 5) - 1))); + assert(!(x & ~(( 1 << 10) - 1))); + assert(!(k & ~(( 1 << 1) - 1))); + instr_t ins = {.XL = {.po = o, .bo = bo, .ba = bi, .bb = 0, .xo = x, .lk = k}}; + return ins.w; +} + +static uint32_t +FC(int o, int d, int l, int a, int b, int x) +{ + // NOTE: this seems to one variation on the X format + assert(!(o & ~((1 << 6) - 1))); + assert(!(d & ~((1 << 3) - 1))); + assert(!(l & ~((1 << 1) - 1))); + assert(!(a & ~((1 << 5) - 1))); + assert(!(b & ~((1 << 5) - 1))); + assert(!(x & ~((1 << 10) - 1))); + instr_t ins = {.X = {.po = o, .f0 = d << 3 | l, .ra = a, .rb = b, .xo = x, .u0 = 0}}; + return ins.w; +} + +static uint32_t +FCI(int o, int d, int l, int a, int s) +{ + assert(!(o & ~((1 << 6) - 1))); + assert(!(d & ~((1 << 3) - 1))); + assert(!(l & ~((1 << 1) - 1))); + assert(!(a & ~((1 << 5) - 1))); + if (o == 11) assert(can_sign_extend_short_p(s)); + else if (o == 10) assert(can_zero_extend_short_p(s)); +#if DEBUG + else abort(); +#endif + instr_t ins = {.D = {.po = o, .rx = d << 2 | l, .ra = a, .d = _u16(s)}}; + return ins.w; +} + +static uint32_t +FXFX(int o, int d, int x, int f) +{ + assert(!(o & ~((1 << 6) - 1))); + assert(!(d & ~((1 << 5) - 1))); + assert(!(x & ~((1 << 10) - 1))); + assert(!(f & ~((1 << 10) - 1))); + instr_t ins = {.XFX = {.po = o, .rs = d, .fx = x, .xo = f, .u0 = 0}}; + return ins.w; +} + +static uint32_t +FM(int o, int s, int a, int h, int b, int e, int r) +{ + assert(!(o & ~((1 << 6) - 1))); + assert(!(s & ~((1 << 5) - 1))); + assert(!(a & ~((1 << 5) - 1))); + assert(!(h & ~((1 << 5) - 1))); + assert(!(b & ~((1 << 5) - 1))); + assert(!(e & ~((1 << 5) - 1))); + assert(!(r & ~((1 << 1) - 1))); + instr_t ins = {.M = {.po = o, .rs = s, .ra = a, .rb = h, .mb = b, .me = e, .rc = r}}; + return ins.w; +} + +# if __WORDSIZE == 64 +static uint32_t +FMD(int o, int s, int a, int h, int e, int x, int i, int r) +{ + assert(!(o & ~((1 << 6) - 1))); + assert(!(s & ~((1 << 5) - 1))); + assert(!(a & ~((1 << 5) - 1))); + assert(!(h & ~((1 << 5) - 1))); + assert(!(e & ~((1 << 6) - 1))); + assert(!(x & ~((1 << 3) - 1))); + assert(!(i & ~((1 << 1) - 1))); + assert(!(r & ~((1 << 1) - 1))); + e = (e >> 5) | ((e << 1) & 63); + instr_t ins = {.MD = {.po = o, .rs = s, .ra = a, .s0 = h, .mx = e, .xo = x, .s1 = i, .rc = r}}; + return ins.w; +} + +static uint32_t +FXS(int o, int s, int a, int h, int x, int i, int r) +{ + assert(!(o & ~((1 << 6) - 1))); + assert(!(s & ~((1 << 5) - 1))); + assert(!(a & ~((1 << 5) - 1))); + assert(!(h & ~((1 << 5) - 1))); + assert(!(x & ~((1 << 9) - 1))); + assert(!(i & ~((1 << 1) - 1))); + assert(!(r & ~((1 << 1) - 1))); + instr_t ins = {.XS = {.po = o, .rs = s, .ra = a, .s0 = h, .xo = x, .s1 = i, .rc = r}}; + return ins.w; +} +#endif + +#if !DEBUG +/* + * Use the sequence commented at + * http://tenfourfox.blogspot.com/2011/04/attention-g5-owners-your-javascript-no.html + */ +static void +mcrxr(jit_state_t *_jit, int32_t cr) +{ + int32_t reg = rn(get_temp_gpr(_jit)); + em_wp(_jit, _MFXER(reg)); + em_wp(_jit, _MTCRF(128, reg)); + em_wp(_jit, _RLWINM(reg, reg, 0, 0, 28)); + em_wp(_jit, _MTXER(reg)); + unget_temp_gpr(_jit); +} + +#else +static void +mcrxr(jit_state_t *_jit, int32_t cr) +{ + em_wp(_jit, _FX(31, cr << 2, 0, 0, 512)); +} +#endif + +static int32_t +read_jmp_offset(uint32_t *loc) +{ + // FIXME unsigned to signed? + instr_t *i = (instr_t *)loc; + return i->I.li; +} + +static int32_t +read_jcc_offset(uint32_t *loc) +{ + instr_t *i = (instr_t *)loc; + return i->B.bd; +} + +static void +patch_jmp_offset(uint32_t *loc, ptrdiff_t offset) +{ + assert(simm24_p(offset)); + instr_t *i = (instr_t *)loc; + i->I.li = offset; +} + +static void +patch_veneer_jmp_offset(uint32_t *loc, ptrdiff_t offset) +{ + patch_jmp_offset(loc, offset); +} + +static void +patch_veneer(uint32_t *loc, jit_pointer_t addr) +{ + patch_immediate_reloc(loc, addr); +} + +static void +emit_veneer(jit_state_t *_jit, jit_pointer_t target) +{ + jit_pointer_t veneer = jit_address(_jit); + + jit_gpr_t reg = get_temp_gpr(_jit); + emit_immediate_reloc(_jit, rn(reg), 1); + + // see mips-cpu.c:emit_veneer() + if (!jit_has_overflow(_jit)) + patch_veneer(veneer, target); + + emit_u32(_jit, _MTCTR(rn(reg))); + emit_u32(_jit, _BCTR()); + unget_temp_gpr(_jit); +} + +static void +patch_veneer_jcc_offset(uint32_t *loc, ptrdiff_t offset) +{ + patch_jcc_offset(loc, offset); +} + +static int +offset_in_jmp_range(ptrdiff_t offset, int flags) +{ + (void)flags; + return simm24_p(offset); +} + +static int +offset_in_jcc_range(ptrdiff_t offset, int flags) +{ + (void)flags; + return simm14_p(offset); +} + +static void +patch_jcc_offset(uint32_t *loc, ptrdiff_t offset) +{ + assert(simm14_p(offset)); + instr_t *i = (instr_t *)loc; + i->B.bd = offset; +} + +static uint32_t +patch_cc_jump(uint32_t inst, int32_t offset) +{ + instr_t i; + i.w = inst; + i.B.bd = offset; + return i.w; +} + +static jit_reloc_t +emit_cc_jump(jit_state_t *_jit, uint32_t inst) +{ + while (1) { + uint8_t *pc_base = _jit->pc.uc; + int32_t off = ((uint8_t *)jit_address(_jit)) - pc_base; + jit_reloc_t w = jit_reloc(_jit, JIT_RELOC_JCC_WITH_VENEER, 0, _jit->pc.uc, + pc_base, + 2); + uint8_t jump_width = 14; + + if (add_pending_literal(_jit, w, jump_width - 1)) { + em_wp(_jit, patch_cc_jump(inst, off >> 2)); + return w; + } + } +} + +static jit_reloc_t +emit_atomic_jump(jit_state_t *_jit, uint32_t inst) +{ + while (1) { + uint8_t *pc_base = _jit->pc.uc; + int32_t off = ((uint8_t *)jit_address(_jit)) - pc_base; + jit_reloc_t w = jit_reloc(_jit, JIT_RELOC_JCC_WITH_VENEER, 0, _jit->pc.uc, + pc_base, + 2); + uint8_t jump_width = 14; + + // TODO is JCC_WITH_VENEER fine here? + if (add_pending_literal(_jit, w, jump_width - 1)) { + emit_u32(_jit, patch_cc_jump(inst, off >> 2)); + return w; + } + } +} + +static uint32_t patch_jump(uint32_t inst, int32_t offset) +{ + instr_t i; + i.w = inst; + i.I.li = offset; + return i.w; +} + +static jit_reloc_t +emit_jump(jit_state_t *_jit, uint32_t inst) +{ + while(1) { + uint8_t *pc_base = _jit->pc.uc; + int32_t off = ((uint8_t *)jit_address(_jit)) - pc_base; + jit_reloc_t w = jit_reloc(_jit, JIT_RELOC_JMP_WITH_VENEER, 0, _jit->pc.uc, + pc_base, + 2); + uint8_t jump_width = 24; + + if (add_pending_literal(_jit, w, jump_width - 1)) { + em_wp(_jit, patch_jump(inst, off >> 2)); + return w; + } + } +} + +static void +nop(jit_state_t *_jit, int32_t i0) +{ + for (; i0 > 0; i0 -= 4) + em_wp(_jit, _NOP()); + assert(i0 == 0); +} + +static void +movr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 != r1) + em_wp(_jit, _MR(r0, r1)); +} + +static void +movi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _LI(r0, i0)); + else { + if (can_sign_extend_int_p(i0)) + em_wp(_jit, _LIS(r0, (int16_t)(i0 >> 16))); + else if (can_zero_extend_int_p(i0)) { + if (i0 & 0xffff0000) { + em_wp(_jit, _ORI(r0, r0, (uint16_t)(i0 >> 16))); + em_wp(_jit, _SLWI(r0, r0, 16)); + } + } +# if __WORDSIZE == 64 + else { + movi(_jit, r0, (uint32_t)(i0 >> 32)); + if (i0 & 0xffff0000) { + em_wp(_jit, _SLDI(r0, r0, 16)); + em_wp(_jit, _ORI(r0, r0, (uint16_t)(i0 >> 16))); + em_wp(_jit, _SLDI(r0, r0, 16)); + } + else + em_wp(_jit, _SLDI(r0, r0, 32)); + } +# endif + if (i0 & 0xffff) + em_wp(_jit, _ORI(r0, r0, (uint16_t)i0)); + } +} + +typedef struct { +#if __WORDSIZE == 64 + instr_t lis0; + instr_t ori0; + instr_t sldi0; + + instr_t ori1; + instr_t sldi1; + instr_t ori2; +#else + instr_t lis; + instr_t ori; +#endif +} immediate_t; + +static void +patch_immediate_reloc(uint32_t *loc, jit_pointer_t addr) +{ + immediate_t *i = (immediate_t *)loc; + jit_word_t a = (jit_word_t)addr; +#if __WORDSIZE == 64 + i->lis0.D.d = a >> 48; + i->ori0.D.d = a >> 32; + i->ori1.D.d = a >> 16; + i->ori2.D.d = a & 0xffff; +#else + i->lis.D.d = a >> 16; + i->ori.D.d = a & 0xffff; +#endif +} + +static void +emit_immediate_reloc(jit_state_t *_jit, int32_t r0, jit_bool_t in_veneer) +{ + void (*emit)(jit_state_t * _jit, uint32_t u32) = + in_veneer ? emit_u32 : emit_u32_with_pool; + + emit(_jit, _LIS(r0, 0)); + emit(_jit, _ORI(r0, r0, 0)); +# if __WORDSIZE == 64 + emit(_jit, _SLDI(r0, r0, 16)); + emit(_jit, _ORI(r0, r0, 0)); + emit(_jit, _SLDI(r0, r0, 16)); + emit(_jit, _ORI(r0, r0, 0)); +# endif +} + +static jit_reloc_t +movi_from_immediate(jit_state_t *_jit, int32_t r0) +{ + uint8_t *pc_base = _jit->pc.uc; + jit_reloc_t w = jit_reloc(_jit, JIT_RELOC_IMMEDIATE, 0, _jit->pc.uc, pc_base, 0); + emit_immediate_reloc(_jit, r0, 0); + return w; +} + +static jit_reloc_t +mov_addr(jit_state_t *_jit, int32_t r0) +{ + return movi_from_immediate(_jit, r0); +} + +static void +negr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _NEG(r0, r1)); +} + +static void +comr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _NOT(r0, r1)); +} + +static void +extr_c(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _EXTSB(r0, r1)); +} + +static void +extr_uc(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _ANDI_(r0, r1, 0xff)); +} + +static void +extr_s(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _EXTSH(r0, r1)); +} + +static void +extr_us(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _ANDI_(r0, r1, 0xffff)); +} + +static void +bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + rshi(_jit, rn(t0), r1, 8); + andi(_jit, r0, r1, 0xff); + andi(_jit, rn(t0), rn(t0), 0xff); + lshi(_jit, r0, r0, 8); + orr(_jit, r0, r0, rn(t0)); + unget_temp_gpr(_jit); +} + +static void +bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + em_wp(_jit, _ROTLWI(rn(reg), r1, 8)); + em_wp(_jit, _RLWIMI(rn(reg), r1, 24, 0, 7)); + em_wp(_jit, _RLWIMI(rn(reg), r1, 24, 16, 23)); + em_wp(_jit, _CLRLDI(r0, rn(reg), 32)); + unget_temp_gpr(_jit); +} + +#if __WORDSIZE == 64 +static void +bswapr_ul(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + rshi_u(_jit, rn(reg), r1, 32); + bswapr_ui(_jit, r0, r1); + bswapr_ui(_jit, rn(reg), rn(reg)); + lshi(_jit, r0, r0, 32); + orr(_jit, r0, r0, rn(reg)); + unget_temp_gpr(_jit); +} +#endif + +static void +addr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _ADD(r0, r1, r2)); +} + +static void +addi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _ADDI(r0, r1, i0)); + else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff)) + em_wp(_jit, _ADDIS(r0, r1, i0 >> 16)); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + em_wp(_jit, _ADD(r0, r1, rn(reg))); + unget_temp_gpr(_jit); + } +} + +static void +addcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _ADDC(r0, r1, r2)); +} + +static void +addci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _ADDIC(r0, r1, i0)); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + addcr(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +addxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _ADDE(r0, r1, r2)); +} + +static void +addxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + addxr(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); +} + +static void +subr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _SUB(r0, r1, r2)); +} + +static void +subi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_word_t ni0 = -i0; + if (can_sign_extend_short_p(ni0)) { + em_wp(_jit, _ADDI(r0, r1, ni0)); + } else if (can_zero_extend_int_p(ni0) && !(ni0 & 0x0000ffff)) { + em_wp(_jit, _ADDIS(r0, r1, ni0 >> 16)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + subr(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +subcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _SUBC(r0, r1, r2)); +} + +static void +subci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + subcr(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); +} + +static void +subxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _SUBFE(r0, r2, r1)); +} + +static void +subxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + subxr(_jit, r0, rn(reg), r1); + unget_temp_gpr(_jit); +} + +static void +mulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _MULR(r0, r1, r2)); +} + +static void +muli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) + em_wp(_jit, _MULLI(r0, r1, i0)); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + mulr(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +iqmulr(jit_state_t *_jit, int32_t r0, int32_t r1, + int32_t r2, int32_t r3, jit_bool_t sign) +{ + jit_gpr_t reg; + if (r0 == r2 || r0 == r3) { + reg = get_temp_gpr(_jit); + em_wp(_jit, _MULLR(rn(reg), r2, r3)); + } else { + em_wp(_jit, _MULLR(r0, r2, r3)); + } + + if (sign) + em_wp(_jit, _MULHR(r1, r2, r3)); + else + em_wp(_jit, _MULHR_U(r1, r2, r3)); + + if (r0 == r2 || r0 == r3) { + movr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +qmulr(jit_state_t *_jit, int32_t r0, int32_t r1, + int32_t r2, int32_t r3) +{ + iqmulr(_jit, r0, r1, r2, r3, 1); +} + +static void +qmulr_u(jit_state_t *_jit, int32_t r0, int32_t r1, + int32_t r2, int32_t r3) +{ + iqmulr(_jit, r0, r1, r2, r3, 0); +} + +static void +iqmuli(jit_state_t *_jit, int32_t r0, int32_t r1, + int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + iqmulr(_jit, r0, r1, r2, rn(reg), sign); + unget_temp_gpr(_jit); +} + +static void +qmuli(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, + jit_word_t i0) +{ + iqmuli(_jit, r0, r1, r2, i0, 1); +} + +static void +qmuli_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, + jit_word_t i0) +{ + iqmuli(_jit, r0, r1, r2, i0, 0); +} + +static void +divr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _DIVR(r0, r1, r2)); +} + +static void +divi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + divr(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); +} + +static void +divr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _DIVR_U(r0, r1, r2)); +} + +static void +divi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + divr_u(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); +} + +static void +iqdivr(jit_state_t *_jit, int32_t r0, int32_t r1, + int32_t r2, int32_t r3, jit_bool_t sign) +{ + jit_gpr_t sv0; int32_t rg0; + jit_gpr_t sv1; int32_t rg1; + + if (r0 == r2 || r0 == r3) { + sv0 = get_temp_gpr(_jit); + rg0 = rn(sv0); + } else { + rg0 = r0; + } + + if (r1 == r2 || r1 == r3) { + sv1 = get_temp_gpr(_jit); + rg1 = rn(sv1); + } else { + rg1 = r1; + } + + if (sign) + divr(_jit, rg0, r2, r3); + else + divr_u(_jit, rg0, r2, r3); + + mulr(_jit, rg1, r3, rg0); + subr(_jit, rg1, r2, rg1); + + if (rg0 != r0) { + movr(_jit, r0, rg0); + unget_temp_gpr(_jit); + } + + if (rg1 != r1) { + movr(_jit, r1, rg1); + unget_temp_gpr(_jit); + } +} + +static void +qdivr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + iqdivr(_jit, r0, r1, r2, r3, 1); +} + +static void +qdivr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + iqdivr(_jit, r0, r1, r2, r3, 0); +} + +static void +iqdivi(jit_state_t *_jit, int32_t r0, int32_t r1, + int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + iqdivr(_jit, r0, r1, r2, rn(reg), sign); + unget_temp_gpr(_jit); +} + +static void +qdivi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0) +{ + iqdivi(_jit, r0, r1, r2, i0, 1); +} + +static void +qdivi_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t u0) +{ + iqdivi(_jit, r0, r1, r2, u0, 0); +} + +static void +remr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1 || r0 == r2) { + jit_gpr_t reg = get_temp_gpr(_jit); + divr(_jit, rn(reg), r1, r2); + mulr(_jit, rn(reg), r2, rn(reg)); + subr(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } + else { + divr(_jit, r0, r1, r2); + mulr(_jit, r0, r2, r0); + subr(_jit, r0, r1, r0); + } +} + +static void +remi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + remr(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); +} + +static void +remr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1 || r0 == r2) { + jit_gpr_t reg = get_temp_gpr(_jit); + divr_u(_jit, rn(reg), r1, r2); + mulr(_jit, rn(reg), r2, rn(reg)); + subr(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } + else { + divr_u(_jit, r0, r1, r2); + mulr(_jit, r0, r2, r0); + subr(_jit, r0, r1, r0); + } +} + +static void +remi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + remr_u(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); +} + +static void +andr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _AND(r0, r1, r2)); +} + +static void +andi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_zero_extend_short_p(i0)) { + em_wp(_jit, _ANDI_(r0, r1, i0)); + } else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff)) { + em_wp(_jit, _ANDIS_(r0, r1, (jit_uword_t)i0 >> 16)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + em_wp(_jit, _AND(r0, r1, rn(reg))); + unget_temp_gpr(_jit); + } +} + +static void +orr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _OR(r0, r1, r2)); +} + +static void +ori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_zero_extend_short_p(i0)) { + em_wp(_jit, _ORI(r0, r1, i0)); + } else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff)) { + em_wp(_jit, _ORIS(r0, r1, (jit_uword_t)i0 >> 16)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + orr(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +xorr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _XOR(r0, r1, r2)); +} + +static void +xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_zero_extend_short_p(i0)) { + em_wp(_jit, _XORI(r0, r1, i0)); + } else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff)) { + em_wp(_jit, _XORIS(r0, r1, (jit_uword_t)i0 >> 16)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + xorr(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +lshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _LSHR(r0, r1, r2)); +} + +static void +lshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) { + movr(_jit, r0, r1); + } else { +# if __WORDSIZE == 32 + em_wp(_jit, _SLWI(r0, r1, i0)); +# else + em_wp(_jit, _SLDI(r0, r1, i0)); +# endif + } +} + +static void +rshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _RSHR(r0, r1, r2)); +} + +static void +rshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) { + movr(_jit, r0, r1); + } else { +# if __WORDSIZE == 32 + em_wp(_jit, _SRAWI(r0, r1, i0)); +# else + em_wp(_jit, _SRADI(r0, r1, i0)); +# endif + } +} + +static void +rshr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _RSHR_U(r0, r1, r2)); +} + +static void +rshi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) { + movr(_jit, r0, r1); + } else { +# if __WORDSIZE == 32 + em_wp(_jit, _SRWI(r0, r1, i0)); +# else + em_wp(_jit, _SRDI(r0, r1, i0)); +# endif + } +} + +static jit_reloc_t +bltr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _CMPX(r0, r1)); + return emit_cc_jump(_jit, _BLT(0)); +} + +static jit_reloc_t +blti(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_short_p(i1)) { + em_wp(_jit, _CMPXI(r0, i1)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + em_wp(_jit, _CMPX(r0, rn(reg))); + unget_temp_gpr(_jit); + } + + return emit_cc_jump(_jit, _BLT(0)); +} + +static jit_reloc_t +bltr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _CMPLX(r0, r1)); + return emit_cc_jump(_jit, _BLT(0)); +} + +static jit_reloc_t +blti_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_zero_extend_short_p(i1)) { + em_wp(_jit, _CMPLXI(r0, i1)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + em_wp(_jit, _CMPLX(r0, rn(reg))); + unget_temp_gpr(_jit); + } + + return emit_cc_jump(_jit, _BLT(0)); +} + +static jit_reloc_t +bler(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _CMPX(r0, r1)); + return emit_cc_jump(_jit, _BLE(0)); +} + +static jit_reloc_t +blei(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_short_p(i1)) { + em_wp(_jit, _CMPXI(r0, i1)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + em_wp(_jit, _CMPX(r0, rn(reg))); + unget_temp_gpr(_jit); + } + + return emit_cc_jump(_jit, _BLE(0)); +} + +static jit_reloc_t +bler_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _CMPLX(r0, r1)); + return emit_cc_jump(_jit, _BLE(0)); +} + +static jit_reloc_t +blei_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_zero_extend_short_p(i1)) { + em_wp(_jit, _CMPLXI(r0, i1)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + em_wp(_jit, _CMPLX(r0, rn(reg))); + unget_temp_gpr(_jit); + } + + return emit_cc_jump(_jit, _BLE(0)); +} + +static jit_reloc_t +beqr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _CMPX(r0, r1)); + return emit_cc_jump(_jit, _BEQ(0)); +} + +static jit_reloc_t +beqi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_short_p(i1)) { + em_wp(_jit, _CMPXI(r0, i1)); + } else if (can_zero_extend_short_p(i1)) { + em_wp(_jit, _CMPLXI(r0, i1)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + em_wp(_jit, _CMPX(r0, rn(reg))); + unget_temp_gpr(_jit); + } + + return emit_cc_jump(_jit, _BEQ(0)); +} + +static jit_reloc_t +bger(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _CMPX(r0, r1)); + return emit_cc_jump(_jit, _BGE(0)); +} + +static jit_reloc_t +bgei(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_short_p(i1)) { + em_wp(_jit, _CMPXI(r0, i1)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + em_wp(_jit, _CMPX(r0, rn(reg))); + unget_temp_gpr(_jit); + } + + return emit_cc_jump(_jit, _BGE(0)); +} + +static jit_reloc_t +bger_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _CMPLX(r0, r1)); + return emit_cc_jump(_jit, _BGE(0)); +} + +static jit_reloc_t +bgei_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_zero_extend_short_p(i1)) { + em_wp(_jit, _CMPLXI(r0, i1)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + em_wp(_jit, _CMPLX(r0, rn(reg))); + unget_temp_gpr(_jit); + } + return emit_cc_jump(_jit, _BGE(0)); +} + +static jit_reloc_t +bgtr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _CMPX(r0, r1)); + return emit_cc_jump(_jit, _BGT(0)); +} + +static jit_reloc_t +bgti(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_short_p(i1)) { + em_wp(_jit, _CMPXI(r0, i1)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + em_wp(_jit, _CMPX(r0, rn(reg))); + unget_temp_gpr(_jit); + } + return emit_cc_jump(_jit, _BGT(0)); +} + +static jit_reloc_t +bgtr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _CMPLX(r0, r1)); + return emit_cc_jump(_jit, _BGT(0)); +} + +static jit_reloc_t +bgti_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_zero_extend_short_p(i1)) { + em_wp(_jit, _CMPLXI(r0, i1)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + em_wp(_jit, _CMPLX(r0, rn(reg))); + unget_temp_gpr(_jit); + } + + return emit_cc_jump(_jit, _BGT(0)); +} + +static jit_reloc_t +bner(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _CMPX(r0, r1)); + return emit_cc_jump(_jit, _BNE(0)); +} + +static jit_reloc_t +bnei(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_short_p(i1)) { + em_wp(_jit, _CMPXI(r0, i1)); + } else if (can_zero_extend_short_p(i1)) { + em_wp(_jit, _CMPLXI(r0, i1)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + em_wp(_jit, _CMPX(r0, rn(reg))); + unget_temp_gpr(_jit); + } + + return emit_cc_jump(_jit, _BNE(0)); +} + +static jit_reloc_t +bmsr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + andr(_jit, rn(reg), r0, r1); + jit_reloc_t w = bnei(_jit, rn(reg), 0); + unget_temp_gpr(_jit); + return w; +} + +static jit_reloc_t +bmsi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + andi(_jit, rn(reg), r0, i1); + jit_reloc_t w = bnei(_jit, rn(reg), 0); + unget_temp_gpr(_jit); + return w; +} + +static jit_reloc_t +bmcr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + andr(_jit, rn(reg), r0, r1); + jit_reloc_t w = beqi(_jit, rn(reg), 0); + unget_temp_gpr(_jit); + return w; +} + +static jit_reloc_t +bmci(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + andi(_jit, rn(reg), r0, i1); + jit_reloc_t w = beqi(_jit, rn(reg), 0); + unget_temp_gpr(_jit); + return w; +} + +static jit_reloc_t +boaddr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _ADDO(r0, r0, r1)); + mcrxr(_jit, CR_0); + return emit_cc_jump(_jit, _BGT(0)); /* GT = bit 1 of XER = OV */ +} + +static jit_reloc_t +boaddi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + jit_reloc_t w = boaddr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + return w; +} + +static jit_reloc_t +bxaddr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _ADDO(r0, r0, r1)); + mcrxr(_jit, CR_0); + return emit_cc_jump(_jit, _BLE(0)); +} + +static jit_reloc_t +bxaddi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + jit_reloc_t w = bxaddr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + return w; +} + +static jit_reloc_t +bosubr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _SUBO(r0, r0, r1)); + mcrxr(_jit, CR_0); + return emit_cc_jump(_jit, _BGT(0)); +} + +static jit_reloc_t +bosubi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + jit_reloc_t w = bosubr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + return w; +} + +static jit_reloc_t +bxsubr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _SUBO(r0, r0, r1)); + mcrxr(_jit, CR_0); + return emit_cc_jump(_jit, _BLE(0)); +} + +static jit_reloc_t +bxsubi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + jit_reloc_t w = bxsubr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + return w; +} + +static jit_reloc_t +boaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _ADDC(r0, r0, r1)); + mcrxr(_jit, CR_0); + return emit_cc_jump(_jit, _BEQ(0)); /* EQ = bit 2 of XER = CA */ +} + +static jit_reloc_t +boaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_short_p(i1)) { + em_wp(_jit, _ADDIC(r0, r0, i1)); + mcrxr(_jit, CR_0); + return emit_cc_jump(_jit, _BEQ(0)); + } + + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + jit_reloc_t w = boaddr_u(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + return w; +} + +static jit_reloc_t +bxaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _ADDC(r0, r0, r1)); + mcrxr(_jit, CR_0); + return emit_cc_jump(_jit, _BNE(0)); +} + +static jit_reloc_t +bxaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_short_p(i1)) { + em_wp(_jit, _ADDIC(r0, r0, i1)); + mcrxr(_jit, CR_0); + return emit_cc_jump(_jit, _BNE(0)); + } + + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + jit_reloc_t w = bxaddr_u(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + return w; +} + +static jit_reloc_t +bosubr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _SUBC(r0, r0, r1)); + mcrxr(_jit, CR_0); + return emit_cc_jump(_jit, _BNE(0)); /* PPC uses "carry" not "borrow" */ +} + +static jit_reloc_t +bosubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + jit_reloc_t w = bosubr_u(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + return w; +} + +static jit_reloc_t +bxsubr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _SUBC(r0, r0, r1)); + mcrxr(_jit, CR_0); + return emit_cc_jump(_jit, _BEQ(0)); +} + +static jit_reloc_t +bxsubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + jit_reloc_t w = bxsubr_u(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + return w; +} + +#if __WORDSIZE == 64 +static void +extr_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _EXTSW(r0, r1)); +} + +static void +extr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _CLRLDI(r0, r1, 32)); +} +#endif + +static void +ldr_c(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ldr_uc(_jit, r0, r1); + extr_c(_jit, r0, r0); +} + +static void +ldi_c(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + ldi_uc(_jit, r0, i0); + extr_c(_jit, r0, r0); +} + +static void +ldxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + ldxr_uc(_jit, r0, r1, r2); + extr_c(_jit, r0, r0); +} + +static void +ldxi_c(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + ldxi_uc(_jit, r0, r1, i0); + extr_c(_jit, r0, r0); +} + +static void +ldr_uc(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _LBZX(r0, rn(_R0), r1)); +} + +static void +ldi_uc(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) { + em_wp(_jit, _LBZ(r0, rn(_R0), i0)); + } else if (can_sign_extend_int_p(i0)) { + jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15)); + jit_word_t lo = (int16_t)(i0 - (hi << 16)); + jit_gpr_t reg = get_temp_gpr(_jit); + em_wp(_jit, _LIS(rn(reg), hi)); + em_wp(_jit, _LBZ(r0, rn(reg), lo)); + unget_temp_gpr(_jit); + } + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldr_uc(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_uc(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r1 == rn(_R0)) { + if (r2 != rn(_R0)) { + em_wp(_jit, _LBZX(r0, r2, r1)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + em_wp(_jit, _LBZX(r0, rn(reg), r2)); + unget_temp_gpr(_jit); + } + } else { + em_wp(_jit, _LBZX(r0, r1, r2)); + } +} + +static void +ldxi_uc(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) { + ldr_uc(_jit, r0, r1); + } else if (can_sign_extend_short_p(i0)) { + if (r1 == rn(_R0)) { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + em_wp(_jit, _LBZ(r0, rn(reg), i0)); + unget_temp_gpr(_jit); + } else { + em_wp(_jit, _LBZ(r0, r1, i0)); + } + } + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldxr_uc(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldr_s(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _LHAX(r0, rn(_R0), r1)); +} + +static void +ldi_s(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) { + em_wp(_jit, _LHA(r0, rn(_R0), i0)); + } else if (can_sign_extend_int_p(i0)) { + jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15)); + jit_word_t lo = (int16_t)(i0 - (hi << 16)); + jit_gpr_t reg = get_temp_gpr(_jit); + em_wp(_jit, _LIS(rn(reg), hi)); + em_wp(_jit, _LHA(r0, rn(reg), lo)); + unget_temp_gpr(_jit); + } + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldr_s(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r1 == rn(_R0)) { + if (r2 != rn(_R0)) { + em_wp(_jit, _LHAX(r0, r2, r1)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + em_wp(_jit, _LHAX(r0, rn(reg), r2)); + unget_temp_gpr(_jit); + } + } else { + em_wp(_jit, _LHAX(r0, r1, r2)); + } +} + +static void +ldxi_s(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) { + ldr_s(_jit, r0, r1); + } else if (can_sign_extend_short_p(i0)) { + if (r1 == rn(_R0)) { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + em_wp(_jit, _LHA(r0, rn(reg), i0)); + unget_temp_gpr(_jit); + } else { + em_wp(_jit, _LHA(r0, r1, i0)); + } + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldxr_s(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldr_us(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _LHZX(r0, rn(_R0), r1)); +} + +static void +ldi_us(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) { + em_wp(_jit, _LHZ(r0, rn(_R0), i0)); + } else if (can_sign_extend_int_p(i0)) { + jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15)); + jit_word_t lo = (int16_t)(i0 - (hi << 16)); + jit_gpr_t reg = get_temp_gpr(_jit); + em_wp(_jit, _LIS(rn(reg), hi)); + em_wp(_jit, _LHZ(r0, rn(reg), lo)); + unget_temp_gpr(_jit); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldr_us(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_us(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r1 == rn(_R0)) { + if (r2 != rn(_R0)) { + em_wp(_jit, _LHZX(r0, r2, r1)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + em_wp(_jit, _LHZX(r0, rn(reg), r2)); + unget_temp_gpr(_jit); + } + } else { + em_wp(_jit, _LHZX(r0, r1, r2)); + } +} + +static void +ldxi_us(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) { + ldr_us(_jit, r0, r1); + } else if (can_sign_extend_short_p(i0)) { + if (r1 == rn(_R0)) { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + em_wp(_jit, _LHZ(r0, rn(reg), i0)); + unget_temp_gpr(_jit); + } else { + em_wp(_jit, _LHZ(r0, r1, i0)); + } + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldxr_us(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldr_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _LDR_I(r0, r1)); +} + +# if __WORDSIZE == 32 +static void +ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) { + LWZ(r0, rn(_R0), i0); + } else if (can_sign_extend_int_p(i0)) { + jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15)); + jit_word_t lo = (int16_t)(i0 - (hi << 16)); + jit_gpr_t reg = get_temp_gpr(_jit); + em_wp(_jit, _LIS(rn(reg), hi)); + em_wp(_jit, _LWZ(r0, rn(reg), lo)); + unget_temp_gpr(_jit); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldr_i(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r1 == rn(_R0)) { + if (r2 != rn(_R0)) { + em_wp(_jit, _LWZX(r0, r2, r1)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + em_wp(_jit, _LWZX(r0, rn(reg), r2)); + unget_temp_gpr(_jit); + } + } else { + em_wp(_jit, _LWZX(r0, r1, r2)); + } +} + +static void +ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) { + ldr_i(_jit, r0, r1); + } else if (can_sign_extend_short_p(i0)) { + if (r1 == rn(_R0)) { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + em_wp(_jit, _LW(r0, rn(reg), i0)); + unget_temp_gpr(_jit); + } else { + em_wp(_jit, _LW(r0, r1, i0)); + } + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldxr_i(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } +} + +# else +static void +ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) { + em_wp(_jit, _LWA(r0, rn(_R0), i0)); + } else if (can_sign_extend_int_p(i0)) { + jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15)); + jit_word_t lo = (int16_t)(i0 - (hi << 16)); + jit_gpr_t reg = get_temp_gpr(_jit); + em_wp(_jit, _LIS(rn(reg), hi)); + em_wp(_jit, _LWA(r0, rn(reg), lo)); + unget_temp_gpr(_jit); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldr_i(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r1 == rn(_R0)) { + if (r2 != rn(_R0)) { + em_wp(_jit, _LWAX(r0, r2, r1)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + em_wp(_jit, _LWAX(r0, rn(reg), r2)); + unget_temp_gpr(_jit); + } + } else { + em_wp(_jit, _LWAX(r0, r1, r2)); + } +} + +static void +ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) { + ldr_i(_jit, r0, r1); + } else if (can_sign_extend_short_p(i0)) { + if (r1 == rn(_R0)) { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + em_wp(_jit, _LWA(r0, rn(reg), i0)); + unget_temp_gpr(_jit); + } else { + em_wp(_jit, _LWA(r0, r1, i0)); + } + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldxr_i(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _LWZX(r0, rn(_R0), r1)); +} + +static void +ldi_ui(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) { + em_wp(_jit, _LWZ(r0, rn(_R0), i0)); + } else if (can_sign_extend_int_p(i0)) { + jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15)); + jit_word_t lo = (int16_t)(i0 - (hi << 16)); + jit_gpr_t reg = get_temp_gpr(_jit); + em_wp(_jit, _LIS(rn(reg), hi)); + em_wp(_jit, _LWZ(r0, rn(reg), lo)); + unget_temp_gpr(_jit); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldr_ui(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_ui(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r1 == rn(_R0)) { + if (r2 != rn(_R0)) { + em_wp(_jit, _LWZX(r0, r2, r1)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + em_wp(_jit, _LWZX(r0, rn(reg), r2)); + unget_temp_gpr(_jit); + } + } else { + em_wp(_jit, _LWZX(r0, r1, r2)); + } +} + +static void +ldxi_ui(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) { + ldr_i(_jit, r0, r1); + } else if (can_sign_extend_short_p(i0)) { + if (r1 == rn(_R0)) { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + em_wp(_jit, _LWZ(r0, rn(reg), i0)); + unget_temp_gpr(_jit); + } else { + em_wp(_jit, _LWZ(r0, r1, i0)); + } + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldxr_ui(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldr_l(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _LDX(r0, rn(_R0), r1)); +} + +static void +ldi_l(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) { + em_wp(_jit, _LD(r0, rn(_R0), i0)); + } else if (can_sign_extend_int_p(i0)) { + jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15)); + jit_word_t lo = (int16_t)(i0 - (hi << 16)); + jit_gpr_t reg = get_temp_gpr(_jit); + em_wp(_jit, _LIS(rn(reg), hi)); + em_wp(_jit, _LD(r0, rn(reg), lo)); + unget_temp_gpr(_jit); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldr_l(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r1 == rn(_R0)) { + if (r2 != rn(_R0)) { + em_wp(_jit, _LDX(r0, r2, r1)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + em_wp(_jit, _LDX(r0, rn(reg), r2)); + unget_temp_gpr(_jit); + } + } else { + em_wp(_jit, _LDX(r0, r1, r2)); + } +} + +static void +ldxi_l(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) { + ldr_l(_jit, r0, r1); + } else if (can_sign_extend_short_p(i0)) { + if (r1 == rn(_R0)) { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + em_wp(_jit, _LD(r0, rn(reg), i0)); + unget_temp_gpr(_jit); + } else { + em_wp(_jit, _LD(r0, r1, i0)); + } + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldxr_l(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } +} +# endif + +static void +str_c(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _STBX(r1, rn(_R0), r0)); +} + +static void +sti_c(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_short_p(i0)) { + em_wp(_jit, _STB(r0, rn(_R0), i0)); + } else if (can_sign_extend_int_p(i0)) { + jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15)); + jit_word_t lo = (int16_t)(i0 - (hi << 16)); + jit_gpr_t reg = get_temp_gpr(_jit); + em_wp(_jit, _LIS(rn(reg), hi)); + em_wp(_jit, _STB(r0, rn(reg), lo)); + unget_temp_gpr(_jit); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + str_c(_jit, rn(reg), r0); + unget_temp_gpr(_jit); + } +} + +static void +stxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == rn(_R0)) { + if (r1 != rn(_R0)) { + em_wp(_jit, _STBX(r2, r1, r0)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r0); + em_wp(_jit, _STBX(r2, rn(reg), r1)); + unget_temp_gpr(_jit); + } + } else { + em_wp(_jit, _STBX(r2, r0, r1)); + } +} + +static void +stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (i0 == 0) { + str_c(_jit, r0, r1); + } else if (can_sign_extend_short_p(i0)) { + if (r0 == rn(_R0)) { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), i0); + em_wp(_jit, _STB(r1, rn(reg), i0)); + unget_temp_gpr(_jit); + } else { + em_wp(_jit, _STB(r1, r0, i0)); + } + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + stxr_c(_jit, rn(reg), r0, r1); + unget_temp_gpr(_jit); + } +} + +static void +str_s(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _STHX(r1, rn(_R0), r0)); +} + +static void +sti_s(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_short_p(i0)) { + em_wp(_jit, _STH(r0, rn(_R0), i0)); + } else if (can_sign_extend_int_p(i0)) { + jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15)); + jit_word_t lo = (int16_t)(i0 - (hi << 16)); + jit_gpr_t reg = get_temp_gpr(_jit); + em_wp(_jit, _LIS(rn(reg), hi)); + em_wp(_jit, _STH(r0, rn(reg), lo)); + unget_temp_gpr(_jit); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit,rn(reg), i0); + str_s(_jit, rn(reg), r0); + unget_temp_gpr(_jit); + } +} + +static void +stxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == rn(_R0)) { + if (r1 != rn(_R0)) { + em_wp(_jit, _STHX(r2, r1, r0)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r0); + em_wp(_jit, _STHX(r2, rn(reg), r1)); + unget_temp_gpr(_jit); + } + } else { + em_wp(_jit, _STHX(r2, r0, r1)); + } +} + +static void +stxi_s(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (i0 == 0) { + str_s(_jit, r0, r1); + } else if (can_sign_extend_short_p(i0)) { + if (r0 == rn(_R0)) { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), i0); + em_wp(_jit, _STH(r1, rn(reg), i0)); + unget_temp_gpr(_jit); + } else { + em_wp(_jit, _STH(r1, r0, i0)); + } + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + stxr_s(_jit, rn(reg), r0, r1); + unget_temp_gpr(_jit); + } +} + +static void +str_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _STWX(r1, rn(_R0), r0)); +} + +static void +sti_i(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_short_p(i0)) { + em_wp(_jit, _STW(r0, rn(_R0), i0)); + } else if (can_sign_extend_int_p(i0)) { + jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15)); + jit_word_t lo = (int16_t)(i0 - (hi << 16)); + jit_gpr_t reg = get_temp_gpr(_jit); + em_wp(_jit, _LIS(rn(reg), hi)); + em_wp(_jit, _STW(r0, rn(reg), lo)); + unget_temp_gpr(_jit); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + str_i(_jit, rn(reg), r0); + unget_temp_gpr(_jit); + } +} + +static void +stxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == rn(_R0)) { + if (r1 != rn(_R0)) { + em_wp(_jit, _STWX(r2, r1, r0)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r0); + em_wp(_jit, _STWX(r2, rn(reg), r1)); + unget_temp_gpr(_jit); + } + } else { + em_wp(_jit, _STWX(r2, r0, r1)); + } +} + +static void +stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (i0 == 0) { + str_i(_jit, r0, r1); + } else if (can_sign_extend_short_p(i0)) { + if (r0 == rn(_R0)) { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), i0); + em_wp(_jit, _STW(r1, rn(reg), i0)); + unget_temp_gpr(_jit); + } else { + em_wp(_jit, _STW(r1, r0, i0)); + } + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + stxr_i(_jit, rn(reg), r0, r1); + unget_temp_gpr(_jit); + } +} + +# if __WORDSIZE == 64 +static void +str_l(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _STDX(r1, rn(_R0), r0)); +} + +static void +sti_l(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_short_p(i0)) { + em_wp(_jit, _STD(r0, rn(_R0), i0)); + } else if (can_sign_extend_int_p(i0)) { + jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15)); + jit_word_t lo = (int16_t)(i0 - (hi << 16)); + jit_gpr_t reg = get_temp_gpr(_jit); + em_wp(_jit, _LIS(rn(reg), hi)); + em_wp(_jit, _STD(r0, rn(reg), lo)); + unget_temp_gpr(_jit); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + str_l(_jit, rn(reg), r0); + unget_temp_gpr(_jit); + } +} + +static void +stxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == rn(_R0)) { + if (r1 != rn(_R0)) { + em_wp(_jit, _STDX(r2, r1, r0)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r0); + em_wp(_jit, _STDX(r2, rn(reg), r1)); + unget_temp_gpr(_jit); + } + } else { + em_wp(_jit, _STDX(r2, r0, r1)); + } +} + +static void +stxi_l(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (i0 == 0) { + str_l(_jit, r0, r1); + } else if (can_sign_extend_short_p(i0)) { + if (r0 == rn(_R0)) { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), i0); + em_wp(_jit, _STD(r1, rn(reg), i0)); + unget_temp_gpr(_jit); + } else { + em_wp(_jit, _STD(r1, r0, i0)); + } + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + stxr_l(_jit, rn(reg), r0, r1); + unget_temp_gpr(_jit); + } +} +# endif + +static void +jmpr(jit_state_t *_jit, int32_t r0) +{ + emit_u32(_jit, _MTCTR(r0)); + emit_u32(_jit, _BCTR()); +} + +static void +jmpr_with_link(jit_state_t *_jit, int32_t r0) +{ + // Some kind of linking stuff? + if (r0 != rn(_R12)) + emit_u32(_jit, _MR(rn(_R12), r0)); + + emit_u32(_jit, _MR(rn(JIT_LR), r0)); + emit_u32(_jit, _MTCTR(rn(_R12))); + emit_u32(_jit, _BCTRL()); +} + +static void +jmpi_with_link(jit_state_t *_jit, jit_word_t i0) +{ + movi(_jit, rn(_R12), i0); + jmpr_with_link(_jit, rn(_R12)); +} + +static void +jmpi(jit_state_t *_jit, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + jmpr(_jit, rn(reg)); + unget_temp_gpr(_jit); +} + +static jit_reloc_t +jmp(jit_state_t *_jit) +{ + return emit_jump(_jit, _B(0)); +} + +static void +build_tmp_frame(jit_state_t *_jit) +{ + emit_u32(_jit, _STX(rn(JIT_FP), rn(JIT_SP), 0)); + emit_u32(_jit, _STX(rn(JIT_LR), rn(JIT_SP), 16)); + emit_u32(_jit, _STX(rn(_R2), rn(JIT_SP), 24)); + emit_u32(_jit, _MR(rn(JIT_FP), rn(JIT_SP))); +} + +static void +destroy_tmp_frame(jit_state_t *_jit) +{ + emit_u32(_jit, _LXX(rn(_R2), rn(JIT_SP), 24)); + emit_u32(_jit, _LXX(rn(JIT_FP), rn(JIT_SP), 0)); +} + +// Heavily assumes prepare_call_args() has been called beforehand +static void +callr(jit_state_t *_jit, int32_t r0) +{ + build_tmp_frame(_jit); + + if (r0 != rn(_R12)) + emit_u32(_jit, _MR(rn(_R12), r0)); + + emit_u32(_jit, _MTCTR(rn(_R12))); + emit_u32(_jit, _BCTRL()); + + destroy_tmp_frame(_jit); +} + +/* assume fixed address or reachable address */ +static void +calli(jit_state_t *_jit, jit_word_t i0) +{ + movi(_jit, rn(_R12), i0); + callr(_jit, rn(_R12)); +} + +static void +ret(jit_state_t *_jit) +{ + em_wp(_jit, _BLR()); +} + +static void +retr(jit_state_t *_jit, int32_t u) +{ + if (rn(JIT_RET) != u) + movr(_jit, rn(JIT_RET), u); + + ret(_jit); +} + +static void +reti(jit_state_t *_jit, jit_word_t u) +{ + movi(_jit, rn(JIT_RET), u); + ret(_jit); +} + + +static void +retval_c(jit_state_t *_jit, int32_t r0) +{ + extr_c(_jit, r0, rn(JIT_RET)); +} + +static void +retval_uc(jit_state_t *_jit, int32_t r0) +{ + extr_uc(_jit, r0, rn(JIT_RET)); +} + +static void +retval_s(jit_state_t *_jit, int32_t r0) +{ + extr_s(_jit, r0, rn(JIT_RET)); +} + +static void +retval_us(jit_state_t *_jit, int32_t r0) +{ + extr_us(_jit, r0, rn(JIT_RET)); +} + +static void +retval_i(jit_state_t *_jit, int32_t r0) +{ +#if __WORDSIZE == 32 + if (r0 != rn(JIT_RET)) + movr(_jit, r0, rn(JIT_RET)); +#else + extr_i(_jit, r0, rn(JIT_RET)); +#endif +} + +#if __WORDSIZE == 64 +static void +retval_ui(jit_state_t *_jit, int32_t r0) +{ + extr_ui(_jit, r0, rn(JIT_RET)); +} + +static void +retval_l(jit_state_t *_jit, int32_t r0) +{ + if (r0 != rn(JIT_RET)) + movr(_jit, r0, rn(JIT_RET)); +} +#endif + +static void +ldr_atomic(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + emit_u32(_jit, _HWSYNC()); + emit_u32(_jit, _LXX(r0, r1, 0)); + emit_u32(_jit, _CMPX(r0, r0)); + jit_reloc_t w = emit_atomic_jump(_jit, _BNE(0)); + jit_patch_here(_jit, w); + emit_u32(_jit, _ISYNC()); +} + +static void +str_atomic(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + emit_u32(_jit, _HWSYNC()); + emit_u32(_jit, _STX(r1, r0, 0)); +} + +static void +swap_atomic(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + // if r0 == r1, we might overwrite something if we didn't use temporaries + jit_gpr_t t0 = get_temp_gpr(_jit); + + emit_u32(_jit, _HWSYNC()); + jit_pointer_t a = jit_address(_jit); + emit_u32(_jit, _LXARX(rn(t0), r1)); + emit_u32(_jit, _STXCX(r2, r1)); + jit_reloc_t w = emit_atomic_jump(_jit, _BNE(0)); + jit_patch_there(_jit, w, a); + emit_u32(_jit, _ISYNC()); + movr(_jit, r0, rn(t0)); + + unget_temp_gpr(_jit); +} + +static void +cas_atomic(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + jit_gpr_t t0 = get_temp_gpr(_jit); + + emit_u32(_jit, _HWSYNC()); + jit_pointer_t loop = jit_address(_jit); + emit_u32(_jit, _LXARX(rn(t0), r1)); + emit_u32(_jit, _CMPX(r2, rn(t0))); + + jit_reloc_t s = emit_atomic_jump(_jit, _BNE(0)); + + emit_u32(_jit, _STXCX(r3, r1)); + + jit_reloc_t w = emit_atomic_jump(_jit, _BNE(0)); + + jit_patch_here(_jit, s); + + jit_patch_there(_jit, w, loop); + + emit_u32(_jit, _ISYNC()); + movr(_jit, r0, r1); + unget_temp_gpr(_jit); +} + +static void +pop_link_register(jit_state_t *_jit) +{ + em_wp(_jit, _MFLR(rn(_R0))); +} + +static void +push_link_register(jit_state_t *_jit) +{ + em_wp(_jit, _MTLR(rn(_R0))); +} + +static void +breakpoint(jit_state_t *_jit) +{ + (void)_jit; +} diff --git a/deps/lightening/lightening/ppc-fpu.c b/deps/lightening/lightening/ppc-fpu.c new file mode 100644 index 0000000..392e9ea --- /dev/null +++ b/deps/lightening/lightening/ppc-fpu.c @@ -0,0 +1,935 @@ +/* + * Copyright (C) 2012-2017 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +# define fn(x) jit_fpr_regno(x) + +#define _FA(o,d,a,b,c,x) FA(o,d,a,b,c,x,0) +#define _FA_(o,d,a,b,c,x) FA(o,d,a,b,c,x,1) +static uint32_t FA(int,int,int,int,int,int,int); + +#define _FXFL(o,m,b,x) FXFL(o,m,b,x,0) +#define _FXFL_(o,m,b,x) FXFL(o,m,b,x,1) +static uint32_t FXFL(int,int,int,int,int) maybe_unused; + +# define _FABS(d,b) _FX(63,d,0,b,264) +# define _FABS_(d,b) _FX_(63,d,0,b,264) +# define _FADD(d,a,b) _FA(63,d,a,b,0,21) +# define _FADD_(d,a,b) _FA_(63,d,a,b,0,21) +# define _FADDS(d,a,b) _FA(59,d,a,b,0,21) +# define _FADDS_(d,a,b) _FA_(59,d,a,b,0,21) +# define _FCFID(d,b) _FX(63,d,0,b,846) +# define _FCMPO(cr,a,b) _FC(63,cr,0,a,b,32) +# define _FCMPU(cr,a,b) _FC(63,cr,0,a,b,0) +# define _FCTIW(d,b) _FX(63,d,0,b,14) +# define _FCTIW_(d,b) _FX_(63,d,0,b,14) +# define _FCTIWZ(d,b) _FX(63,d,0,b,15) +# define _FCTIWZ_(d,b) _FX_(63,d,0,b,15) +# define _FCTID(d,b) _FX(63,d,0,b,814) +# define _FCTID_(d,b) _FX_(63,d,0,b,814) +# define _FCTIDZ(d,b) _FX(63,d,0,b,815) +# define _FCTIDZ_(d,b) _FX_(63,d,0,b,815) +# define _FDIV(d,a,b) _FA(63,d,a,b,0,18) +# define _FDIV_(d,a,b) _FA_(63,d,a,b,0,18) +# define _FDIVS(d,a,b) _FA(59,d,a,b,0,18) +# define _FDIVS_(d,a,b) _FA_(59,d,a,b,0,18) +# define _FMADD(d,a,b,c) _FA(63,d,a,b,c,29) +# define _FMADD_(d,a,b,c) _FA(63,d,a,b,c,29) +# define _FMADDS(d,a,b,c) _FA(59,d,a,b,c,29) +# define _FMADDS_(d,a,b,c) _FA(59,d,a,b,c,29) +# define _FMR(d,b) _FX(63,d,0,b,72) +# define _FMR_(d,b) _FX_(63,d,0,b,72) +# define _FMSUB(d,a,b,c) _FA(63,d,a,b,c,28) +# define _FMSUB_(d,a,b,c) _FA(63,d,a,b,c,28) +# define _FMSUBS(d,a,b,c) _FA(59,d,a,b,c,28) +# define _FMSUBS_(d,a,b,c) _FA(59,d,a,b,c,28) +# define _FMUL(d,a,c) _FA(63,d,a,0,c,25) +# define _FMUL_(d,a,c) _FA_(63,d,a,0,c,25) +# define _FMULS(d,a,c) _FA(59,d,a,0,c,25) +# define _FMULS_(d,a,c) _FA_(59,d,a,0,c,25) +# define _FNABS(d,b) _FX(63,d,0,b,136) +# define _FNABS_(d,b) _FX_(63,d,0,b,136) +# define _FNEG(d,b) _FX(63,d,0,b,40) +# define _FNEG_(d,b) _FX_(63,d,0,b,40) +# define _FNMADD(d,a,b,c) _FA(63,d,a,b,c,31) +# define _FNMADD_(d,a,b,c) _FA_(63,d,a,b,c,31) +# define _FNMADDS(d,a,b,c) _FA(59,d,a,b,c,31) +# define _FNMADDS_(d,a,b,c) _FA_(59,d,a,b,c,31) +# define _FNMSUB(d,a,b,c) _FA(63,d,a,b,c,30) +# define _FNMSUB_(d,a,b,c) _FA_(63,d,a,b,c,30) +# define _FNMSUBS(d,a,b,c) _FA(59,d,a,b,c,30) +# define _FNMSUBS_(d,a,b,c) _FA_(59,d,a,b,c,30) +# define _FRES(d,b) _FA(59,d,0,b,0,24) +# define _FRES_(d,b) _FA_(59,d,0,b,0,24) +# define _FRSP(d,b) _FA(63,d,0,b,0,12) +# define _FRSP_(d,b) _FA_(63,d,0,b,0,12) +# define _FRSQTRE(d,b) _FA(63,d,0,b,0,26) +# define _FRSQTRE_(d,b) _FA_(63,d,0,b,0,26) +# define _FSEL(d,a,b,c) _FA(63,d,a,b,c,23) +# define _FSEL_(d,a,b,c) _FA_(63,d,a,b,c,23) +# define _FSQRT(d,b) _FA(63,d,0,b,0,22) +# define _FSQRT_(d,b) _FA_(63,d,0,b,0,22) +# define _FSQRTS(d,b) _FA(59,d,0,b,0,22) +# define _FSQRTS_(d,b) _FA_(59,d,0,b,0,22) +# define _FSUB(d,a,b) _FA(63,d,a,b,0,20) +# define _FSUB_(d,a,b) _FA(63,d,a,b,0,20) +# define _FSUBS(d,a,b) _FA(59,d,a,b,0,20) +# define _FSUBS_(d,a,b) _FA(59,d,a,b,0,20) +# define _LFD(d,a,s) _FDs(50,d,a,s) +# define _LFDU(d,a,s) _FDs(51,d,a,s) +# define _LFDUX(d,a,b) _FX(31,d,a,b,631) +# define _LFDX(d,a,b) _FX(31,d,a,b,599) +# define _LFS(d,a,s) _FDs(48,d,a,s) +# define _LFSU(d,a,s) _FDs(49,d,a,s) +# define _LFSUX(d,a,b) _FX(31,d,a,b,567) +# define _LFSX(d,a,b) _FX(31,d,a,b,535) +# define _MCRFS(d,s) _FXL(63,d<<2,(s)<<2,64) +# define _MFFS(d) _FX(63,d,0,0,583) +# define _MFFS_(d) _FX_(63,d,0,0,583) +# define _MTFSB0(d) _FX(63,d,0,0,70) +# define _MTFSB0_(d) _FX_(63,d,0,0,70) +# define _MTFSB1(d) _FX(63,d,0,0,38) +# define _MTFSB1_(d) _FX_(63,d,0,0,38) +# define _MTFSF(m,b) _FXFL(63,m,b,711) +# define _MTFSF_(m,b) _FXFL_(63,m,b,711) +# define _MTFSFI(d,i) _FX(63,d<<2,0,i<<1,134) +# define _MTFSFI_(d,i) _FX_(63,d<<2,0,i<<1,134) +# define _STFD(s,a,d) _FDs(54,s,a,d) +# define _STFDU(s,a,d) _FDs(55,s,a,d) +# define _STFDUX(s,a,b) _FX(31,s,a,b,759) +# define _STFDX(s,a,b) _FX(31,s,a,b,727) +# define _STFIWX(s,a,b) _FX(31,s,a,b,983) +# define _STFS(s,a,d) _FDs(52,s,a,d) +# define _STFSU(s,a,d) _FDs(53,s,a,d) +# define _STFSUX(s,a,b) _FX(31,s,a,b,695) +# define _STFSX(s,a,b) _FX(31,s,a,b,663) + +static void movr_f(jit_state_t*,int32_t,int32_t); +static void movr_d(jit_state_t*,int32_t,int32_t); + +static void movi_f(jit_state_t*,int32_t,jit_float32_t); +static void movi_d(jit_state_t*,int32_t,jit_float64_t); + +static void extr_f(jit_state_t*,int32_t,int32_t); +static void extr_d(jit_state_t*,int32_t,int32_t); + +static void truncr_f_i(jit_state_t*,int32_t,int32_t); +static void truncr_d_i(jit_state_t*,int32_t,int32_t); + +# if __WORDSIZE == 64 +static void truncr_f_l(jit_state_t*,int32_t,int32_t); +static void truncr_d_l(jit_state_t*,int32_t,int32_t); +# endif + +static void extr_d_f(jit_state_t*,int32_t,int32_t); +static void extr_f_d(jit_state_t*,int32_t,int32_t); + +static void absr_f(jit_state_t*,int32_t,int32_t); +static void absr_d(jit_state_t*,int32_t,int32_t); + +static void negr_f(jit_state_t*,int32_t,int32_t); +static void negr_d(jit_state_t*,int32_t,int32_t); + +static void sqrtr_f(jit_state_t*,int32_t,int32_t); +static void sqrtr_d(jit_state_t*,int32_t,int32_t); + +static void addr_f(jit_state_t*,int32_t,int32_t,int32_t); +static void addr_d(jit_state_t*,int32_t,int32_t,int32_t); + +static void subr_f(jit_state_t*,int32_t,int32_t,int32_t); +static void subr_d(jit_state_t*,int32_t,int32_t,int32_t); + +static void mulr_f(jit_state_t*,int32_t,int32_t,int32_t); +static void mulr_d(jit_state_t*,int32_t,int32_t,int32_t); + +static void divr_f(jit_state_t*,int32_t,int32_t,int32_t); +static void divr_d(jit_state_t*,int32_t,int32_t,int32_t); + +static jit_reloc_t bltr_f(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bltr_d(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bler_f(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bler_d(jit_state_t*,int32_t,int32_t); +static jit_reloc_t beqr_f(jit_state_t*,int32_t,int32_t); +static jit_reloc_t beqr_d(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bger_f(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bger_d(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bgtr_f(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bgtr_d(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bner_f(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bner_d(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bunltr_f(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bunltr_d(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bunler_f(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bunler_d(jit_state_t*,int32_t,int32_t); +static jit_reloc_t buneqr_f(jit_state_t*,int32_t,int32_t); +static jit_reloc_t buneqr_d(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bunger_f(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bunger_d(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bungtr_f(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bungtr_d(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bltgtr_f(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bltgtr_d(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bordr_f(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bordr_d(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bunordr_f(jit_state_t*,int32_t,int32_t); +static jit_reloc_t bunordr_d(jit_state_t*,int32_t,int32_t); + +static void ldr_f(jit_state_t*,int32_t,int32_t); +static void ldi_f(jit_state_t*,int32_t,jit_word_t); + +static void ldxr_f(jit_state_t*,int32_t,int32_t,int32_t); +static void ldxi_f(jit_state_t*,int32_t,int32_t,jit_word_t); + +static void str_f(jit_state_t*,int32_t,int32_t); +static void sti_f(jit_state_t*,jit_word_t,int32_t); + +static void stxr_f(jit_state_t*,int32_t,int32_t,int32_t); +static void stxi_f(jit_state_t*,jit_word_t,int32_t,int32_t); + +static void ldr_d(jit_state_t*,int32_t,int32_t); +static void ldi_d(jit_state_t*,int32_t,jit_word_t); + +static void ldxr_d(jit_state_t*,int32_t,int32_t,int32_t); +static void ldxi_d(jit_state_t*,int32_t,int32_t,jit_word_t); + +static void str_d(jit_state_t*,int32_t,int32_t); +static void sti_d(jit_state_t*,jit_word_t,int32_t); + +static void stxr_d(jit_state_t*,int32_t,int32_t,int32_t); +static void stxi_d(jit_state_t*,jit_word_t,int32_t,int32_t); + +# define _u16(v) ((v) & 0xffff) +static uint32_t +FA(int o, int d, int a, int b, int c, int x, int r) +{ + assert(!(o & ~((1 << 6) - 1))); + assert(!(d & ~((1 << 5) - 1))); + assert(!(a & ~((1 << 5) - 1))); + assert(!(b & ~((1 << 5) - 1))); + assert(!(c & ~((1 << 5) - 1))); + assert(!(x & ~((1 << 5) - 1))); + assert(!(r & ~((1 << 1) - 1))); + instr_t i = {.A = {.po = o, .ft = d, .fa = a, .fb = b, .fc = c, .xo = x, .rc = r}}; + return i.w; +} + +static uint32_t +FXFL(int o, int m, int b, int x, int r) +{ + assert(!(o & ~((1 << 6) - 1))); + assert(!(m & ~((1 << 8) - 1))); + assert(!(b & ~((1 << 5) - 1))); + assert(!(x & ~((1 << 10) - 1))); + assert(!(r & ~((1 << 1) - 1))); + instr_t i = {.XFL = {.po = o, .l = 0, .fm = m, .w = 0, .fb = b, .xo = x, .rc = r}}; + return i.w; +} + +static void +movr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 != r1) + em_wp(_jit, _FMR(r0,r1)); +} + +static void +movr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + movr_d(_jit, r0, r1); +} + +static void +movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t i0) +{ + union { + int32_t i; + jit_float32_t f; + } data; + + data.f = i0; + jit_fpr_t reg = get_temp_fpr(_jit); + movi(_jit, fn(reg), data.i & 0xffffffff); + stxi_i(_jit, -8, rn(_FP), fn(reg)); + unget_temp_fpr(_jit); + + ldxi_f(_jit, r0, rn(_FP), -8); +} + +static void +movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t i0) +{ + union { + int32_t i[2]; + jit_word_t w; + jit_float64_t d; + } data; + + data.d = i0; + jit_gpr_t reg = get_temp_gpr(_jit); +# if __WORDSIZE == 32 + movi(_jit, rn(reg), data.i[0]); + stxi_i(_jit, -4, rn(_FP), rn(reg)); + movi(_jit, rn(reg), data.i[1]); + stxi_i(_jit, -8, rn(_FP), rn(reg)); +# else + movi(_jit, rn(reg), data.w); + stxi_l(_jit, -8, rn(_FP), rn(reg)); +# endif + unget_temp_gpr(_jit); + ldxi_d(_jit, r0, rn(_FP), -8); +} + +/* should only work on newer ppc (fcfid is a ppc64 instruction) */ +static void +extr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ +# if __WORDSIZE == 32 + jit_gpr_t reg = get_temp_gpr(_jit); + rshi(_jit, rn(reg), r1, 31); + /* use reserved 8 bytes area */ + stxi_i(_jit, -8, rn(_FP), r1); + stxi_i(_jit, -4, rn(_FP), rn(reg)); + unget_temp_gpr(_jit); +# else + stxi_l(_jit, -8, rn(_FP), r1); +# endif + ldxi_d(_jit, r0, rn(_FP), -8); + em_wp(_jit, _FCFID(r0, r0)); +} + +static void +extr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + extr_d(_jit, r0, r1); +} + +static void +extr_d_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _FRSP(r0, r1)); +} + +static void +extr_f_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + movr_d(_jit, r0, r1); +} + +static void +absr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _FABS(r0, r1)); +} + +static void +absr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + absr_d(_jit, r0, r1); +} + +static void +sqrtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _FSQRT(r0, r1)); +} + +static void +sqrtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _FSQRTS(r0, r1)); +} + +static void +addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _FADD(r0, r1, r2)); +} + +static void +addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _FADDS(r0, r1, r2)); +} + +static void +subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _FSUBS(r0, r1, r2)); +} + +static void +subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _FSUB(r0, r1, r2)); +} + +static void +negr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _FNEG(r0, r1)); +} + +static void +negr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + negr_d(_jit, r0, r1); +} + +static void +mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _FMULS(r0, r1, r2)); +} + +static void +mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _FMUL(r0, r1, r2)); +} + +static void +divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _FDIVS(r0, r1, r2)); +} + +static void +divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + em_wp(_jit, _FDIV(r0, r1, r2)); +} + +static void +truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + jit_fpr_t reg = get_temp_fpr(_jit); + em_wp(_jit, _FCTIWZ(fn(reg), r1)); + /* use reserved 8 bytes area */ + stxi_d(_jit, -8, rn(_FP), fn(reg)); +#if __BYTE_ORDER == __BIG_ENDIAN + ldxi_i(_jit, r0, rn(_FP), -4); +#else + ldxi_i(_jit, r0, rn(_FP), -8); +#endif + unget_temp_fpr(_jit); +} + +static void +truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + truncr_d_i(_jit, r0, r1); +} + +# if __WORDSIZE == 64 +static void +truncr_d_l(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + jit_fpr_t reg = get_temp_fpr(_jit); + em_wp(_jit, _FCTIDZ(fn(reg), r1)); + /* use reserved 8 bytes area */ + stxi_d(_jit, -8, rn(_FP), fn(reg)); + ldxi_l(_jit, r0, rn(_FP), -8); + unget_temp_fpr(_jit); +} + +static void +truncr_f_l(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + truncr_d_l(_jit, r0, r1); +} +# endif + + +static jit_reloc_t +bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _FCMPO(CR_0, r0, r1)); + return emit_cc_jump(_jit, _BLT(0)); +} + +static jit_reloc_t +bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bltr_d(_jit, r0, r1); +} + +static jit_reloc_t +bler_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _FCMPO(CR_0, r0, r1)); + em_wp(_jit, _CREQV(CR_GT, CR_GT, CR_UN)); + return emit_cc_jump(_jit, _BGT(0)); +} + +static jit_reloc_t +bler_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bler_d(_jit, r0, r1); +} + +static jit_reloc_t +beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _FCMPO(CR_0, r0, r1)); + return emit_cc_jump(_jit, _BEQ(0)); +} + +static jit_reloc_t +beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return beqr_d(_jit, r0, r1); +} + +static jit_reloc_t +bger_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _FCMPO(CR_0, r0, r1)); + em_wp(_jit, _CREQV(CR_LT, CR_LT, CR_UN)); + return emit_cc_jump(_jit, _BLT(0)); +} + +static jit_reloc_t +bger_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bger_d(_jit, r0, r1); +} + +static jit_reloc_t +bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _FCMPO(CR_0, r0, r1)); + return emit_cc_jump(_jit, _BGT(0)); +} + +static jit_reloc_t +bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bgtr_d(_jit, r0, r1); +} + +static jit_reloc_t +bner_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _FCMPO(CR_0, r0, r1)); + return emit_cc_jump(_jit, _BNE(0)); +} + +static jit_reloc_t +bner_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bner_d(_jit, r0, r1); +} + +static jit_reloc_t +bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _FCMPU(CR_0, r0, r1)); + em_wp(_jit, _CROR(CR_LT, CR_LT, CR_UN)); + return emit_cc_jump(_jit, _BLT(0)); +} + +static jit_reloc_t +bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bunltr_d(_jit, r0, r1); +} + +static jit_reloc_t +bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _FCMPU(CR_0, r0, r1)); + return emit_cc_jump(_jit, _BLE(0)); +} + +static jit_reloc_t +bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bunler_d(_jit, r0, r1); +} + +static jit_reloc_t +buneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _FCMPU(CR_0, r0, r1)); + em_wp(_jit, _CROR(CR_EQ, CR_EQ, CR_UN)); + return emit_cc_jump(_jit, _BEQ(0)); +} + +static jit_reloc_t +buneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return buneqr_d(_jit, r0, r1); +} + +static jit_reloc_t +bunger_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _FCMPU(CR_0, r0, r1)); + return emit_cc_jump(_jit, _BGE(0)); +} + +static jit_reloc_t +bunger_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bunger_d(_jit, r0, r1); +} + +static jit_reloc_t +bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _FCMPU(CR_0, r0, r1)); + em_wp(_jit, _CROR(CR_GT, CR_GT, CR_UN)); + return emit_cc_jump(_jit, _BGT(0)); +} + +static jit_reloc_t +bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bungtr_d(_jit, r0, r1); +} + +static jit_reloc_t +bltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _FCMPU(CR_0, r0, r1)); + em_wp(_jit, _CROR(CR_EQ, CR_LT, CR_GT)); + return emit_cc_jump(_jit, _BEQ(0)); +} + +static jit_reloc_t +bltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bltgtr_d(_jit, r0, r1); +} + +static jit_reloc_t +bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _FCMPU(CR_0, r0, r1)); + return emit_cc_jump(_jit, _BNU(0)); +} + +static jit_reloc_t +bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bordr_d(_jit, r0, r1); +} + +static jit_reloc_t +bunordr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _FCMPU(CR_0, r0, r1)); + return emit_cc_jump(_jit, _BUN(0)); +} + +static jit_reloc_t +bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return bunordr_d(_jit, r0, r1); +} + +static void +ldr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _LFSX(r0, rn(_R0), r1)); +} + +static void +ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) { + em_wp(_jit, _LFS(r0, rn(_R0), i0)); + } else if (can_sign_extend_int_p(i0)) { + jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15)); + jit_word_t lo = (int16_t)(i0 - (hi << 16)); + jit_gpr_t reg = get_temp_gpr(_jit); + em_wp(_jit, _LIS(rn(reg), hi)); + em_wp(_jit, _LFS(r0, rn(reg), lo)); + unget_temp_gpr(_jit); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldr_f(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _LFDX(r0, rn(_R0), r1)); +} + +static void +ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_short_p(i0)) { + em_wp(_jit, _LFD(r0, rn(_R0), i0)); + } else if (can_sign_extend_int_p(i0)) { + jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15)); + jit_word_t lo = (int16_t)(i0 - (hi << 16)); + jit_gpr_t reg = get_temp_gpr(_jit); + em_wp(_jit, _LIS(rn(reg), hi)); + em_wp(_jit, _LFD(r0, rn(reg), lo)); + unget_temp_gpr(_jit); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldr_d(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r1 == rn(_R0)) { + if (r2 != rn(_R0)) { + em_wp(_jit, _LFSX(r0, r2, r1)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + em_wp(_jit, _LFSX(r0, rn(reg), r2)); + unget_temp_gpr(_jit); + } + } else { + em_wp(_jit, _LFSX(r0, r1, r2)); + } +} + +static void +ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r1 == rn(_R0)) { + if (r2 != rn(_R0)) { + em_wp(_jit, _LFDX(r0, r2, r1)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + em_wp(_jit, _LFDX(r0, rn(reg), r2)); + unget_temp_gpr(_jit); + } + } else { + em_wp(_jit, _LFDX(r0, r1, r2)); + } +} + +static void +ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) { + ldr_f(_jit, r0, r1); + } else if (can_sign_extend_short_p(i0)) { + if (r1 == rn(_R0)) { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + em_wp(_jit, _LFS(r0, rn(reg), i0)); + unget_temp_gpr(_jit); + } else { + em_wp(_jit, _LFS(r0, r1, i0)); + } + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldxr_f(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) { + ldr_d(_jit, r0, r1); + } else if (can_sign_extend_short_p(i0)) { + if (r1 == rn(_R0)) { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + em_wp(_jit, _LFD(r0, rn(reg), i0)); + unget_temp_gpr(_jit); + } else { + em_wp(_jit, _LFD(r0, r1, i0)); + } + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldxr_d(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +str_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _STFSX(r1, rn(_R0), r0)); +} + +static void +sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_short_p(i0)) { + em_wp(_jit, _STFS(r0, rn(_R0), i0)); + } else if (can_sign_extend_int_p(i0)) { + jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15)); + jit_word_t lo = (int16_t)(i0 - (hi << 16)); + jit_gpr_t reg = get_temp_gpr(_jit); + em_wp(_jit, _LIS(rn(reg), hi)); + em_wp(_jit, _STFS(r0, rn(reg), lo)); + unget_temp_gpr(_jit); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + str_f(_jit, rn(reg), r0); + unget_temp_gpr(_jit); + } +} + +static void +str_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + em_wp(_jit, _STFDX(r1, rn(_R0), r0)); +} + +static void +sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_short_p(i0)) { + em_wp(_jit, _STFD(r0, rn(_R0), i0)); + } else if (can_sign_extend_int_p(i0)) { + jit_word_t hi = (int16_t)((i0 >> 16) + ((uint16_t)i0 >> 15)); + jit_word_t lo = (int16_t)(i0 - (hi << 16)); + jit_gpr_t reg = get_temp_gpr(_jit); + em_wp(_jit, _LIS(rn(reg), hi)); + em_wp(_jit, _STFD(r0, rn(reg), lo)); + unget_temp_gpr(_jit); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + str_d(_jit, rn(reg), r0); + unget_temp_gpr(_jit); + } +} + +static void +stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == rn(_R0)) { + if (r1 != rn(_R0)) { + em_wp(_jit, _STFSX(r2, r1, r0)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + em_wp(_jit, _STFSX(r2, rn(reg), r0)); + unget_temp_gpr(_jit); + } + } else { + em_wp(_jit, _STFSX(r2, r0, r1)); + } +} + +static void +stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == rn(_R0)) { + if (r1 != rn(_R0)) { + em_wp(_jit, _STFDX(r2, r1, r0)); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r0); + em_wp(_jit, _STFDX(r2, rn(reg), r1)); + unget_temp_gpr(_jit); + } + } else { + em_wp(_jit, _STFDX(r2, r0, r1)); + } +} + +static void +stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (i0 == 0) { + str_f(_jit, r0, r1); + } else if (can_sign_extend_short_p(i0)) { + if (r0 == rn(_R0)) { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), i0); + em_wp(_jit, _STFS(r1, rn(reg), i0)); + unget_temp_gpr(_jit); + } else { + em_wp(_jit, _STFS(r1, r0, i0)); + } + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + stxr_f(_jit, rn(reg), r0, r1); + unget_temp_gpr(_jit); + } +} + +static void +stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (i0 == 0) { + str_d(_jit, r0, r1); + } else if (can_sign_extend_short_p(i0)) { + if (r0 == rn(_R0)) { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), i0); + em_wp(_jit, _STFD(r1, rn(reg), i0)); + unget_temp_gpr(_jit); + } else { + em_wp(_jit, _STFD(r1, r0, i0)); + } + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + stxr_d(_jit, rn(reg), r0, r1); + unget_temp_gpr(_jit); + } +} + +static void +retr_f(jit_state_t *_jit, int32_t r0) +{ + if (rn(JIT_RET) != r0) + movr_f(_jit, fn(JIT_FRET), r0); + + ret(_jit); +} + +static void +retr_d(jit_state_t *_jit, int32_t r0) +{ + if (fn(JIT_FRET) != r0) + movr_d(_jit, fn(JIT_FRET), r0); + + ret(_jit); +} + +static void +retval_d(jit_state_t *_jit, int32_t r0) +{ + if (r0 != fn(JIT_FRET)) + movr_d(_jit, r0, fn(JIT_FRET)); +} + +static void +retval_f(jit_state_t *_jit, int32_t r0) +{ + retval_d(_jit, r0); +} diff --git a/deps/lightening/lightening/ppc.c b/deps/lightening/lightening/ppc.c new file mode 100644 index 0000000..7c8ec5d --- /dev/null +++ b/deps/lightening/lightening/ppc.c @@ -0,0 +1,476 @@ +/* + * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +# define JIT_RA0 _R3 +# define JIT_FA0 _F1 +# define JIT_RET _R3 +# define JIT_FRET _F1 + +# define rn(x) jit_gpr_regno(x) + +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define C_DISP 0 +# define S_DISP 0 +# define I_DISP 0 +# define F_DISP 0 +#else +# define C_DISP (__WORDSIZE >> 3) - sizeof(int8_t) +# define S_DISP (__WORDSIZE >> 3) - sizeof(int16_t) +# define I_DISP (__WORDSIZE >> 3) - sizeof(int32_t) +# define F_DISP (__WORDSIZE >> 3) - sizeof(jit_float32_t) +#endif + +static const jit_gpr_t abi_gpr_args[] = { + _R3, _R4, _R5, _R6, _R7, _R8, _R9, _R10 +}; + +static const jit_fpr_t abi_fpr_args[] = { + _F1, _F2, _F3, _F4, _F5, _F6, _F7, _F8, _F9, _F10, _F11, _F12, _F13 +}; + +static const int abi_gpr_arg_count = sizeof(abi_gpr_args) / sizeof(abi_gpr_args[0]); +static const int abi_fpr_arg_count = sizeof(abi_fpr_args) / sizeof(abi_fpr_args[0]); + +/* + * Types + */ +typedef jit_pointer_t jit_va_list_t; + +/* libgcc */ +extern void __clear_cache(void *, void *); + +#define em_wp(_jit, x) emit_u32_with_pool(_jit, (x)) + +typedef union { +#if __BYTE_ORDER == __BIG_ENDIAN + struct { + uint32_t po:6; + uint32_t ft:5; + uint32_t fa:5; + uint32_t fb:5; + uint32_t fc:5; + uint32_t xo:5; + uint32_t rc:1; + } A; + + struct { + uint32_t po:6; + uint32_t rt:5; + uint32_t ra:5; + uint32_t rb:5; + uint32_t u0:1; + uint32_t xo:9; + uint32_t u1:1; + } XO; + + struct { + uint32_t po:6; + uint32_t rx:5; + uint32_t ra:5; + uint32_t d:16; + } D; + + struct { + uint32_t po:6; + uint32_t f0:5; + uint32_t ra:5; + uint32_t rb:5; + uint32_t xo:10; + uint32_t u0:1; + } X; + + struct { + uint32_t po:6; + int32_t li:24; + uint32_t aa:1; + uint32_t lk:1; + } I; + + struct { + uint32_t po:6; + uint32_t bo:5; + uint32_t bi:5; + int32_t bd:14; + uint32_t aa:1; + uint32_t lk:1; + } B; + + struct { + uint32_t po:6; + uint32_t bo:5; + uint32_t ba:5; + uint32_t bb:5; + uint32_t xo:10; + uint32_t lk:1; + } XL; + + struct { + uint32_t po:6; + uint32_t rs:5; + uint32_t fx:10; + uint32_t xo:10; + uint32_t u0:1; + } XFX; + + struct { + uint32_t po:6; + uint32_t l:1; + uint32_t fm:8; + uint32_t w:1; + uint32_t fb:5; + uint32_t xo:10; + uint32_t rc:1; + } XFL; + + struct { + uint32_t po:6; + uint32_t rs:5; + uint32_t ra:5; + uint32_t rb:5; + uint32_t mb:5; + uint32_t me:5; + uint32_t rc:1; + } M; + +#if __WORDSIZE == 64 + struct { + uint32_t po:6; + uint32_t rs:5; + uint32_t ra:5; + uint32_t rb:5; + uint32_t mx:6; + uint32_t xo:4; + uint32_t rc:1; + } MDS; + + struct { + uint32_t po:6; + uint32_t rs:5; + uint32_t ra:5; + uint32_t s0:5; + uint32_t mx:6; + uint32_t xo:3; + uint32_t s1:1; + uint32_t rc:1; + } MD; + + struct { + uint32_t po:6; + uint32_t rs:5; + uint32_t ra:5; + uint32_t s0:5; + uint32_t xo:9; + uint32_t s1:1; + uint32_t rc:1; + } XS; +#endif +#else + struct { + uint32_t rc:1; + uint32_t xo:5; + uint32_t fc:5; + uint32_t fb:5; + uint32_t fa:5; + uint32_t ft:5; + uint32_t po:6; + } A; + + struct { + uint32_t u1:1; + uint32_t xo:9; + uint32_t u0:1; + uint32_t rb:5; + uint32_t ra:5; + uint32_t rt:5; + uint32_t po:6; + } XO; + + struct { + uint32_t d:16; + uint32_t ra:5; + uint32_t rx:5; + uint32_t po:6; + } D; + + struct { + uint32_t u0:1; + uint32_t xo:10; + uint32_t rb:5; + uint32_t ra:5; + uint32_t f0:5; + uint32_t po:6; + } X; + + struct { + uint32_t lk:1; + uint32_t aa:1; + int32_t li:24; + uint32_t po:6; + } I; + + struct { + uint32_t lk:1; + uint32_t aa:1; + int32_t bd:14; + uint32_t bi:5; + uint32_t bo:5; + uint32_t po:6; + } B; + + struct { + uint32_t lk:1; + uint32_t xo:10; + uint32_t bb:5; + uint32_t ba:5; + uint32_t bo:5; + uint32_t po:6; + } XL; + + struct { + uint32_t u0:1; + uint32_t xo:10; + uint32_t fx:10; + uint32_t rs:5; + uint32_t po:6; + } XFX; + + struct { + uint32_t rc:1; + uint32_t xo:10; + uint32_t fb:5; + uint32_t w:1; + uint32_t fm:8; + uint32_t l:1; + uint32_t po:6; + } XFL; + + struct { + uint32_t rc:1; + uint32_t me:5; + uint32_t mb:5; + uint32_t rb:5; + uint32_t ra:5; + uint32_t rs:5; + uint32_t po:6; + } M; + +#if __WORDSIZE == 64 + struct { + uint32_t rc:1; + uint32_t xo:4; + uint32_t mx:6; + uint32_t rb:5; + uint32_t ra:5; + uint32_t rs:5; + uint32_t po:6; + } MDS; + + struct { + uint32_t rc:1; + uint32_t s1:1; + uint32_t xo:3; + uint32_t mx:6; + uint32_t s0:5; + uint32_t ra:5; + uint32_t rs:5; + uint32_t po:6; + } MD; + + struct { + uint32_t rc:1; + uint32_t s1:1; + uint32_t xo:9; + uint32_t s0:5; + uint32_t ra:5; + uint32_t rs:5; + uint32_t po:6; + } XS; +#endif +#endif + uint32_t w; +} instr_t; + +#include "ppc-cpu.c" +#include "ppc-fpu.c" + +struct abi_arg_iterator +{ + const jit_operand_t *args; + size_t argc; + + size_t flags; + + size_t arg_idx; + size_t gpr_idx; + size_t fpr_idx; + + size_t stack_size; + size_t stack_padding; +}; + +static size_t page_size; + +/* + * Implementation + */ +static jit_bool_t +jit_get_cpu(void) +{ + page_size = sysconf(_SC_PAGE_SIZE); + // FIXME check hardware fp support? + return 1; +} + +static jit_bool_t +jit_init(jit_state_t *_jit) +{ + return 1; +} + +static size_t +jit_initial_frame_size(void) +{ + return 32; +} + +static size_t +jit_stack_alignment() +{ + return 16; +} + +static void +jit_flush(void *fptr, void *tptr) +{ +#if defined(__GNUC__) + __clear_cache((void *)fptr, (void *)tptr); +#endif +} + +static void +patch_jmp_without_veneer(jit_state_t *_jit, uint32_t *loc) +{ + patch_jmp_offset(loc, _jit->pc.ui - loc); +} + +static uint32_t * +jmp_without_veneer(jit_state_t *_jit) +{ + uint32_t *loc = _jit->pc.ui; + emit_u32(_jit, _B(0)); + return loc; +} + +static void +patch_load_from_pool_offset(uint32_t *loc, int32_t v) +{ + /* + * not used by this backend + */ + (void)loc; + (void)v; + abort(); +} + +static int32_t +read_load_from_pool_offset(uint32_t *loc) +{ + /* + * not used by this backend + */ + (void)loc; + abort(); + return 0; +} + +static void +jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc, jit_pointer_t addr) +{ + (void)_jit; + (void)reloc; + (void)addr; +} + +static void * +bless_function_pointer(void *ptr) +{ + return ptr; +} + +static void +reset_call_arg_iterator(struct abi_arg_iterator *iter, size_t argc, + const jit_operand_t *args) +{ + memset(iter, 0, sizeof(*iter)); + iter->argc = argc; + iter->args = args; + iter->stack_size = 32; +} + +static void +reset_load_arg_iterator(struct abi_arg_iterator *iter, size_t argc, + const jit_operand_t *args) +{ + memset(iter, 0, sizeof(*iter)); + iter->argc = argc; + iter->args = args; + // Skip over initial frame + iter->stack_size = 0; +} + +static void +next_abi_arg(struct abi_arg_iterator *iter, jit_operand_t *arg) +{ + ASSERT(iter->arg_idx < iter->argc); + enum jit_operand_abi abi = iter->args[iter->arg_idx].abi; + iter->arg_idx++; + + if (is_gpr_arg(abi) && iter->gpr_idx < abi_gpr_arg_count) { + *arg = jit_operand_gpr(abi, abi_gpr_args[iter->gpr_idx++]); + return; + } + + if (is_fpr_arg(abi) && iter->fpr_idx < abi_fpr_arg_count) { + *arg = jit_operand_fpr(abi, abi_fpr_args[iter->fpr_idx++]); + iter->gpr_idx++; + return; + } + + // if this is the first time here, append register save area + if (!iter->flags) { + iter->stack_size += (iter->arg_idx - 1) * 8; + iter->flags = 1; + } + + *arg = jit_operand_mem(abi, JIT_SP, iter->stack_size); + iter->stack_size += 8; +} + +// Prepare _R0 to be saved to stack. Slightly hacky? +static void +jit_prolog(jit_state_t *_jit) +{ + pop_link_register(_jit); +} + +static void +jit_epilog(jit_state_t *_jit) +{ + push_link_register(_jit); +} diff --git a/deps/lightening/lightening/ppc.h b/deps/lightening/lightening/ppc.h new file mode 100644 index 0000000..a6c8675 --- /dev/null +++ b/deps/lightening/lightening/ppc.h @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2012-2017 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#ifndef _jit_ppc_h +#define _jit_ppc_h + +#define JIT_NEEDS_LITERAL_POOL 1 +#define JIT_USE_IMMEDIATE_RELOC 1 +#define JIT_NEEDS_PROLOG 1 +#define JIT_ASYMMETRIC_STACK 1 + +#if __powerpc__ +# if _CALL_ELF == 2 +/* __BYTE_ORDER == __LITTLE_ENDIAN */ +# define ABI_ELFv2 1 +# endif +#endif + +#define _R0 JIT_GPR(0) +#define _R1 JIT_GPR(1) +#define _R2 JIT_GPR(2) +#define _R3 JIT_GPR(3) +#define _R4 JIT_GPR(4) +#define _R5 JIT_GPR(5) +#define _R6 JIT_GPR(6) +#define _R7 JIT_GPR(7) +#define _R8 JIT_GPR(8) +#define _R9 JIT_GPR(9) +#define _R10 JIT_GPR(10) +#define _R11 JIT_GPR(11) +#define _R12 JIT_GPR(12) +#define _R13 JIT_GPR(13) +#define _R14 JIT_GPR(14) +#define _R15 JIT_GPR(15) +#define _R16 JIT_GPR(16) +#define _R17 JIT_GPR(17) +#define _R18 JIT_GPR(18) +#define _R19 JIT_GPR(19) +#define _R20 JIT_GPR(20) +#define _R21 JIT_GPR(21) +#define _R22 JIT_GPR(22) +#define _R23 JIT_GPR(23) +#define _R24 JIT_GPR(24) +#define _R25 JIT_GPR(25) +#define _R26 JIT_GPR(26) +#define _R27 JIT_GPR(27) +#define _R28 JIT_GPR(28) +#define _R29 JIT_GPR(29) +#define _R30 JIT_GPR(30) +#define _R31 JIT_GPR(31) + +#define _FP _R31 + +#define _F0 JIT_FPR(0) +#define _F1 JIT_FPR(1) +#define _F2 JIT_FPR(2) +#define _F3 JIT_FPR(3) +#define _F4 JIT_FPR(4) +#define _F5 JIT_FPR(5) +#define _F6 JIT_FPR(6) +#define _F7 JIT_FPR(7) +#define _F8 JIT_FPR(8) +#define _F9 JIT_FPR(9) +#define _F10 JIT_FPR(10) +#define _F11 JIT_FPR(11) +#define _F12 JIT_FPR(12) +#define _F13 JIT_FPR(13) +#define _F14 JIT_FPR(14) +#define _F15 JIT_FPR(15) +#define _F16 JIT_FPR(16) +#define _F17 JIT_FPR(17) +#define _F18 JIT_FPR(18) +#define _F19 JIT_FPR(19) +#define _F20 JIT_FPR(20) +#define _F21 JIT_FPR(21) +#define _F22 JIT_FPR(22) +#define _F23 JIT_FPR(23) +#define _F24 JIT_FPR(24) +#define _F25 JIT_FPR(25) +#define _F26 JIT_FPR(26) +#define _F27 JIT_FPR(27) +#define _F28 JIT_FPR(28) +#define _F29 JIT_FPR(29) +#define _F30 JIT_FPR(30) +#define _F31 JIT_FPR(31) + +#define JIT_R0 _R3 +#define JIT_R1 _R4 +#define JIT_R2 _R5 +#define JIT_R3 _R6 +#define JIT_R4 _R7 +#define JIT_R5 _R8 +#define JIT_R6 _R9 +#define JIT_R7 _R10 + +#define JIT_V0 _R14 +#define JIT_V1 _R15 +#define JIT_V2 _R16 +#define JIT_V3 _R17 +#define JIT_V4 _R18 +#define JIT_V5 _R19 +#define JIT_V6 _R20 +#define JIT_V7 _R21 +#define JIT_V8 _R22 +#define JIT_V9 _R23 +#define JIT_V10 _R24 +#define JIT_V11 _R25 +#define JIT_V12 _R26 +#define JIT_V13 _R27 +#define JIT_TMP0 _R28 +#define JIT_TMP1 _R29 +#define JIT_TMP2 _R30 + +#define JIT_FP _R31 +#define JIT_SP _R1 + +// TODO shouldn't these be one-to-one? +#define JIT_F0 _F1 +#define JIT_F1 _F2 +#define JIT_F2 _F3 +#define JIT_F3 _F4 +#define JIT_F4 _F5 +#define JIT_F5 _F6 +#define JIT_F6 _F7 +#define JIT_F7 _F8 +#define JIT_F8 _F9 +#define JIT_F9 _F10 +#define JIT_F10 _F11 +#define JIT_F11 _F12 +#define JIT_F12 _F13 + +#define JIT_VF0 _F14 +#define JIT_VF1 _F15 +#define JIT_VF2 _F16 +#define JIT_VF3 _F17 +#define JIT_VF4 _F18 +#define JIT_VF5 _F19 +#define JIT_VF6 _F20 +#define JIT_VF7 _F21 +#define JIT_VF8 _F22 +#define JIT_VF9 _F23 +#define JIT_VF10 _F24 +#define JIT_VF11 _F25 +#define JIT_VF12 _F26 +#define JIT_VF13 _F27 +#define JIT_VF14 _F28 +#define JIT_VF15 _F29 +#define JIT_VF16 _F30 +#define JIT_VF17 _F31 + +#define JIT_FTMP _F0 + +#define JIT_LR _R0 + +#define JIT_PLATFORM_CALLEE_SAVE_GPRS _R0, JIT_SP, JIT_FP, JIT_TMP0, JIT_TMP1, JIT_TMP2 +#define JIT_PLATFORM_CALLEE_SAVE_FPRS + +// At most, we need MTCTR & BCTR, so two instructions per jump. +#define JIT_JMP_MAX_SIZE (sizeof(uint32_t) * 2) + +#if __WORDSIZE == 64 +#define JIT_LITERAL_MAX_SIZE ((sizeof(uint32_t) * 6) + JIT_JMP_MAX_SIZE) +#else +#define JIT_LITERAL_MAX_SIZE ((sizeof(uint32_t) * 2) + JIT_JMP_MAX_SIZE) +#endif + +#define JIT_INST_MAX_SIZE JIT_JMP_MAX_SIZE + +// For example atomics are fairly long unbreakable sequences +// that can cause the pretty tight literal pool limits to run over, +// so make sure we give ourselves enough space to emit at least one +// uninterrupted sequence. +// +// TODO: check if this is sound reasoning or just a hack. +#define JIT_EXTRA_SPACE (16 * sizeof(uint32_t)) + +#endif /* _jit_ppc_h */ diff --git a/deps/lightening/lightening/s390-cpu.c b/deps/lightening/lightening/s390-cpu.c new file mode 100644 index 0000000..02f2675 --- /dev/null +++ b/deps/lightening/lightening/s390-cpu.c @@ -0,0 +1,3848 @@ +/* + * Copyright (C) 2013-2017 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#if PROTO +# if __WORDSIZE == 32 +# define ldr(r0,r1) ldr_i(r0,r1) +# define ldxr(r0,r1,r2) ldxr_i(r0,r1,r2) +# define ldxi(r0,r1,i0) ldxi_i(r0,r1,i0) +# define stxi(i0,r0,r1) stxi_i(i0,r0,r1) +# else +# define ldr(r0,r1) ldr_l(r0,r1) +# define ldxr(r0,r1,r2) ldxr_l(r0,r1,r2) +# define ldxi(r0,r1,i0) ldxi_l(r0,r1,i0) +# define stxi(i0,r0,r1) stxi_l(i0,r0,r1) +# endif +# define is(i) *_jit->pc.us++ = i +# if __WORDSIZE == 32 +# define stack_framesize 96 +# else +# define stack_framesize 160 +# endif +# define _R0_REGNO 0 +# define _R1_REGNO 1 +# define _R7_REGNO 7 +# define _R13_REGNO 13 +# define _FP_REGNO _R13_REGNO +# define _R14_REGNO 14 +# define _R15_REGNO 15 +# define u12_p(i0) ((i0) >= 0 && (i0) <= 4095) +# define s16_p(i0) ((i0) >= -32768 && (i0) <= 32767) +# define x16(i0) ((i0) & 0xffff) +# define s20_p(i0) ((i0) >= -524288 && (i0) <= 524287) +# define x20(i0) ((i0) & 0xfffff) +# if __WORDSIZE == 32 +# define s32_p(i0) 1 +# else +# define s32_p(i0) \ + ((i0) >= -2147483648L && (i0) < 2147483647L) +# endif + +/* + Condition Code Instruction (Mask) Bit Mask Value + 0 8 8 + 1 9 4 + 2 10 2 + 3 11 1 + +AGR: + 0 Zero + 1 < zero + 2 > zero + 3 Overflow +-- +1 -> overflow CC_O +14 -> no overflow CC_NO + +ALGR: + 0 Zero, no carry + 1 Not zero, no carry + 2 Zero, carry + 3 Not zero, carry +-- +2|1 -> carry CC_NLE +8|4 -> no carry CC_LE + +SGR: + 0 Zero + 1 < zero + 2 > zero + 3 Overflow +-- +1 -> overflow CC_O +14 -> no overflow CC_NO + +SLGR: + 0 -- + 1 Not zero, borrow + 2 Zero, no borrow + 3 Not zero, no borrow +-- +4 -> borrow CC_L +11 -> no borrow CC_NL + */ + +# define CC_NV 0x0 +# define CC_O 0x1 +# define CC_H 0x2 +# define CC_NLE 0x3 +# define CC_L 0x4 +# define CC_NHE 0x5 +# define CC_LH 0x6 +# define CC_NE 0x7 +# define CC_E 0x8 +# define CC_NLH 0x9 +# define CC_HE 0xA +# define CC_NL 0xB +# define CC_LE 0xC +# define CC_NH 0xD +# define CC_NO 0xE +# define CC_AL 0xF +# define _us uint16_t +# define _ui uint32_t +# define E_(Op) _E(_jit,Op) +static void _E(jit_state_t*,_ui); +# define I_(Op,I) _I(_jit,Op,I) +static void _I(jit_state_t*,_ui,_ui); +# define RR_(Op,R1,R2) _RR(_jit,Op,R1,R2) +static void _RR(jit_state_t*,_ui,_ui,_ui); +# define RRE_(Op,R1,R2) _RRE(_jit,Op,R1,R2) +static void _RRE(jit_state_t*,_ui,_ui,_ui); +# define RRF_(Op,R3,M4,R1,R2) _RRF(_jit,Op,R3,M4,R1,R2) +static void _RRF(jit_state_t*,_ui,_ui,_ui,_ui,_ui); +# define RX_(Op,R1,X2,B2,D2) _RX(_jit,Op,R1,X2,B2,D2) +static void _RX(jit_state_t*,_ui,_ui,_ui,_ui,_ui); +# define RXE_(Op,R1,X2,B2,D2,Op2) _RXE(_jit,Op,R1,X2,B2,D2,Op2) +static void _RXE(jit_state_t*,_ui,_ui,_ui,_ui,_ui,_ui); +# define RXF_(Op,R3,X2,B2,D2,R1,Op2) _RXF(_jit,Op,R3,X2,B2,D2,R1,Op2) +static void _RXF(jit_state_t*,_ui,_ui,_ui,_ui,_ui,_ui,_ui); +# define RXY_(Op,R1,X2,B2,D2,Op2) _RXY(_jit,Op,R1,X2,B2,D2,Op2) +static void _RXY(jit_state_t*,_ui,_ui,_ui,_ui,_ui,_ui); +# define RS_(Op,R1,R3,B2,D2) _RS(_jit,Op,R1,R3,B2,D2) +static void _RS(jit_state_t*,_ui,_ui,_ui,_ui,_ui); +# define RSY_(Op,R1,R3,B2,D2,Op2) RXY_(Op,R1,R3,B2,D2,Op2) +# define RSL_(Op,L1,B1,D1,Op2) _RSL(_jit,Op,L1,B1,D1,Op2) +static void _RSL(jit_state_t*,_ui,_ui,_ui,_ui,_ui); +# define RSI_(Op,R1,R3,I2) _RSI(_jit,Op,R1,R3,I2) +static void _RSI(jit_state_t*,_ui,_ui,_ui,_ui); +# define RI_(Op,R1,Op2,I2) RSI_(Op,R1,Op2,I2) +# define RIE_(Op,R1,R3,I2,Op2) _RIE(_jit,Op,R1,R3,I2,Op2) +static void _RIE(jit_state_t*,_ui,_ui,_ui,_ui,_ui); +# define RIL_(Op,R1,Op2,I2) _RIL(_jit,Op,R1,Op2,I2) +static void _RIL(jit_state_t*,_ui,_ui,_ui,_ui); +# define SI_(Op,I2,B1,D1) _SI(_jit,Op,I2,B1,D1) +static void _SI(jit_state_t*,_ui,_ui,_ui,_ui); +# define SIY_(Op,I2,B1,D1,Op2) _SIY(_jit,Op,I2,B1,D1,Op2) +static void _SIY(jit_state_t*,_ui,_ui,_ui,_ui,_ui); +# define S_(Op,B2,D2) _S(_jit,Op,B2,D2) +static void _S(jit_state_t*,_ui,_ui,_ui); +# define SSL_(Op,L,B1,D1,B2,D2) SS_(Op,(L)>>4,(L)&0xF,B1,D1,B2,D2) +# define SS_(Op,LL,LH,B1,D1,B2,D2) _SS(_jit,Op,LL,LH,B1,D1,B2,D2) +static void _SS(jit_state_t*,_ui,_ui,_ui,_ui,_ui,_ui,_ui); +# define SSE_(Op,B1,D1,B2,D2) _SSE(_jit,Op,B1,D1,B2,D2) +static void _SSE(jit_state_t*,_ui,_ui,_ui,_ui,_ui); +# undef _us +# undef _ui +# define nop(c) _nop(_jit,c) +static void _nop(jit_state_t*,int32_t); +# if __WORDSIZE == 32 +# define ADD_(r0,r1) AR(r0,r1) +# define ADDI_(r0,i0) AHI(r0,i0) +# define ADDC_(r0,r1) ALR(r0,r1) +# define ADDX_(r0,r1) ALCR(r0,r1) +# define AND_(r0,r1) NR(r0,r1) +# define CMP_(r0,r1) CR(r0,r1) +# define CMPU_(r0,r1) CLR(r0,r1) +# define DIVREM_(r0,r1) DR(r0,r1) +# define DIVREMU_(r0,r1) DLR(r0,r1) +# define OR_(r0,r1) OR(r0,r1) +# define MUL_(r0,r1) MSR(r0,r1) +# define MULI_(r0,i0) MHI(r0,i0) +# define MULU_(r0,r1) MLR(r0,r1) +# define SUB_(r0,r1) SR(r0,r1) +# define SUBC_(r0,r1) SLR(r0,r1) +# define SUBX_(r0,r1) SLBR(r0,r1) +# define TEST_(r0,r1) LTR(r0,r1) +# define XOR_(r0,r1) XR(r0,r1) +# else +# define ADD_(r0,r1) AGR(r0,r1) +# define ADDI_(r0,i0) AGHI(r0,i0) +# define ADDC_(r0,r1) ALGR(r0,r1) +# define ADDX_(r0,r1) ALCGR(r0,r1) +# define AND_(r0,r1) NGR(r0,r1) +# define CMP_(r0,r1) CGR(r0,r1) +# define CMPU_(r0,r1) CLGR(r0,r1) +# define DIVREM_(r0,r1) DSGR(r0,r1) +# define DIVREMU_(r0,r1) DLGR(r0,r1) +# define MUL_(r0,r1) MSGR(r0,r1) +# define MULI_(r0,i0) MGHI(r0,i0) +# define MULU_(r0,r1) MLGR(r0,r1) +# define OR_(r0,r1) OGR(r0,r1) +# define SUB_(r0,r1) SGR(r0,r1) +# define SUBC_(r0,r1) SLGR(r0,r1) +# define SUBX_(r0,r1) SLBGR(r0,r1) +# define TEST_(r0,r1) LTGR(r0,r1) +# define XOR_(r0,r1) XGR(r0,r1) +# endif +/**************************************************************** + * General Instructions * + ****************************************************************/ +/* ADD */ +# define AR(R1,R2) RR_(0x1A,R1,R2) +# define AGR(R1,R2) RRE_(0xB908,R1,R2) +# define AGFR(R1,R2) RRE_(0xB918,R1,R2) +# define A(R1,D2,X2,B2) RX_(0x5A,R1,X2,B2,D2) +# define AY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x5A) +# define AG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x08) +# define AGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x18) +/* ADD HALFWORD */ +# define AH(R1,D2,X2,B2) RX_(0x4A,R1,X2,B2,D2) +# define AHY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x7A) +/* ADD HALFWORD IMMEDIATE */ +# define AHI(R1,I2) RI_(0xA7,R1,0xA,I2) +# define AGHI(R1,I2) RI_(0xA7,R1,0xB,I2) +/* ADD LOGICAL */ +# define ALR(R1,R2) RR_(0x1E,R1,R2) +# define ALGR(R1,R2) RRE_(0xB90A,R1,R2) +# define ALGFR(R1,R2) RRE_(0xB91A,R1,R2) +# define AL(R1,D2,X2,B2) RX_(0x5E,R1,X2,B2,D2) +# define ALY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x5E) +# define ALG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x0A) +# define ALGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x1A) +/* ADD LOGICAL WITH CARRY */ +# define ALCR(R1,R2) RRE_(0xB998,R1,R2) +# define ALCGR(R1,R2) RRE_(0xB988,R1,R2) +# define ALC(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x98) +# define ALCG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x88) +/* AND */ +# define NR(R1,R2) RR_(0x14,R1,R2) +# define NGR(R1,R2) RRE_(0xB980,R1,R2) +# define N(R1,D2,X2,B2) RX_(0x54,R1,X2,B2,D2) +# define NY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x54) +# define NG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x80) +# define NI(D1,B1,I2) SI_(0x94,I2,B1,D1) +# define NIY(D1,B1,I2) SIY_(0xEB,I2,B1,D1,0x54) +# define NC(D1,L,B1,D2,B2) SSL_(0xD4,L,B1,D1,B2,D2) +/* AND IMMEDIATE */ +# define NIHH(R1,I2) RI_(0xA5,R1,0x4,I2) +# define NIHL(R1,I2) RI_(0xA5,R1,0x5,I2) +# define NILH(R1,I2) RI_(0xA5,R1,0x6,I2) +# define NILL(R1,I2) RI_(0xA5,R1,0x7,I2) +/* BRANCH AND LINK */ +# define BALR(R1,R2) RR_(0x05,R1,R2) +# define BAL(R1,D2,X2,B2) RX_(0x45,R1,X2,B2,D2) +/* BRANCH AND SAVE */ +# define BASR(R1,R2) RR_(0x0D,R1,R2) +# define BAS(R1,D2,X2,B2) RX_(0x4D,R1,X2,B2,D2) +/* BRANCH AND SAVE AND SET MODE */ +# define BASSM(R1,R2) RR_(0x0C,R1,R2) +/* BRANCH AND SET MODE */ +# define BSM(R1,R2) RR_(0x0B,R1,R2) +/* BRANCH ON CONDITION */ +# define BCR(M1,R2) RR_(0x07,M1,R2) +# define BR(R2) BCR(CC_AL,R2) +# define NOPR(R2) BCR(CC_NV,R2) +# define BC(M1,D2,X2,B2) RX_(0x47,M1,X2,B2,D2) +/* BRANCH ON COUNT */ +# define BCTR(R1,R2) RR_(0x06,R1,R2) +# define BCTGR(R1,R2) RRE_(0xB946,R1,R2) +# define BCT(R1,D2,X2,B2) RX_(0x46,R1,X2,B2,D2) +# define BCTG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x46) +/* BRANCH ON INDEX HIGH */ +# define BXH(R1,R3,D2,B2) RS_(0x86,R1,R3,B2,D2) +# define BXHG(R1,R3,B2,D2) RSY_(0xEB,R1,R3,B2,D2,0x44) +/* BRANCH ON INDEX LOW OR EQUAL */ +# define BXLE(R1,R3,D2,B2) RS_(0x87,R1,R3,B2,D2) +# define BXLEG(R1,R3,B2,D2) RSY_(0xEB,R1,R3,B2,D2,0x45) +/* BRANCH RELATIVE AND SAVE */ +# define BRAS(R1,I2) RI_(0xA7,R1,0x5,I2) +/* BRANCH RELATIVE AND SAVE LONG */ +# define BRASL(R1,I2) RIL_(0xC0,R1,0x5,I2) +/* BRANCH RELATIVE ON CONDITION */ +# define BRC(M1,I2) RI_(0xA7,M1,0x4,I2) +# define J(I2) BRC(CC_AL,I2) +/* BRANCH RELATIVE ON CONDITION LONG */ +# define BRCL(M1,I2) RIL_(0xC0,M1,0x4,I2) +# define BRL(I2) BRCL(CC_AL,I2) +/* BRANCH RELATIVE ON COUNT */ +# define BRCT(M1,I2) RI_(0xA7,M1,0x6,I2) +# define BRCTG(M1,I2) RI_(0xA7,M1,0x7,I2) +/* BRANCH RELATIVE ON INDEX HIGH */ +# define BRXH(R1,R3,I2) RSI_(0x84,R1,R3,I2) +# define BRXHG(R1,R3,I2) RIE_(0xEC,R1,R3,I2,0x44) +/* BRANCH RELATIVE ON INDEX LOW OR EQUAL */ +# define BRXLE(R1,R3,I2) RSI_(0x85,R1,R3,I2) +# define BRXLEG(R1,R3,I2) RIE_(0xEC,R1,R3,I2,0x45) +/* CHECKSUM */ +# define CKSUM(R1,R2) RRE_(0xB241,R1,R2) +/* CIPHER MESAGE (KM) */ +# define KM(R1,R2) RRE_(0xB92E,R1,R2) +/* CIPHER MESAGE WITH CHAINING (KMC) */ +# define KMC(R1,R2) RRE_(0xB92F,R1,R2) +/* COMPARE */ +# define CR(R1,R2) RR_(0x19,R1,R2) +# define CGR(R1,R2) RRE_(0xB920,R1,R2) +# define CGFR(R1,R2) RRE_(0xB930,R1,R2) +# define C(R1,D2,X2,B2) RX_(0x59,R1,X2,B2,D2) +# define CY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x59) +# define CG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x20) +# define CGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x30) +/* COMPARE AND FORM CODEWORD */ +# define CFC(D2,B2) S_(0xB21A,B2,D2) +/* COMPARE AND SWAP */ +# define CS(R1,R3,D2,B2) RS_(0xBA,R1,R3,B2,D2) +# define CSY(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x14) +# define CSG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x30) +/* COMPARE DOUBLE AND SWAP */ +# define CDS(R1,R3,D2,B2) RS_(0xBB,R1,R3,B2,D2) +# define CSDY(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x31) +# define CSDG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x3E) +/* COMPARE HALFWORD */ +# define CH(R1,D2,X2,B2) RX_(0x49,R1,X2,B2,D2) +# define CHY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x79) +/* COMPARE HALFWORD IMMEDIATE */ +# define CHI(R1,I2) RI_(0xA7,R1,0xE,I2) +# define CGHI(R1,I2) RI_(0xA7,R1,0xF,I2) +/* COMPARE LOGICAL */ +# define CLR(R1,R2) RR_(0x15,R1,R2) +# define CLGR(R1,R2) RRE_(0xB921,R1,R2) +# define CLGFR(R1,R2) RRE_(0xB931,R1,R2) +# define CL(R1,D2,X2,B2) RX_(0x55,R1,X2,B2,D2) +# define CLY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x55) +# define CLG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x21) +# define CLGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x31) +# define CLI(D1,B1,I2) SI_(0x95,I2,B1,D1) +# define CLIY(D1,B1,I2) SIY_(0xEB,I2,B1,D1,0x55) +# define CLC(D1,L,B1,D2,B2) SSL_(0xD5,L,B1,D1,B2,D2) +/* COMPARE LOGICAL CHARACTERS UNDER MASK */ +# define CLM(R1,M3,D2,B2) RS_(0xBD,R1,M3,B2,D2) +# define CLMY(R1,M3,D2,B2) RSY_(0xEB,R1,M3,B2,D2,0x21) +# define CLMH(R1,M3,D2,B2) RSY_(0xEB,R1,M3,B2,D2,0x20) +/* COMPARE LOGICAL LONG */ +# define CLCL(R1,R2) RR_(0x0F,R1,R2) +/* COMPARE LOGICAL LONG EXTENDED */ +# define CLCLE(R1,R3,D2,B2) RS_(0xA9,R1,R3,B2,D2) +/* COMPARE LOGICAL LONG UNICODE */ +# define CLCLU(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x8F) +/* COMPARE LOGICAL STRING */ +# define CLST(R1,R2) RRE_(0xB25D,R1,R2) +/* COMPARE UNTIL SUBSTRING EQUAL */ +# define CUSE(R1,R2) RRE_(0xB257,R1,R2) +/* COMPRESSION CALL */ +# define CMPSC(R1,R2) RRE_(0xB263,R1,R2) +/* COMPUTE INTERMEDIATE MESSAGE DIGEST (KIMD) */ +# define KIMD(R1,R2) RRE_(0xB93E,R1,R2) +/* COMPUTE LAST MESSAGE DIGEST (KIMD) */ +# define KLMD(R1,R2) RRE_(0xB93F,R1,R2) +/* COMPUTE MESSAGE AUTHENTICATION CODE (KMAC) */ +# define KMAC(R1,R2) RRE_(0xB91E,R1,R2) +/* CONVERT TO BINARY */ +# define CVB(R1,D2,X2,B2) RX_(0x4F,R1,X2,B2,D2) +# define CVBY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x06) +# define CVBG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x0e) +/* CONVERT TO DECIMAL */ +# define CVD(R1,D2,X2,B2) RX_(0x4E,R1,X2,B2,D2) +# define CVDY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x26) +# define CVDG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x2E) +/* CONVERT UNICODE TO UTF-8 */ +# define CUUTF(R1,R2) RRE_(0xB2A6,R1,R2) +/* CONVERT UTF-8 TO UNICODE */ +# define CUTFU(R1,R2) RRE_(0xB2A7,R1,R2) +/* COPY ACCESS */ +# define CPYA(R1,R2) RRE_(0xB24D,R1,R2) +/* DIVIDE */ +# define DR(R1,R2) RR_(0x1D,R1,R2) +# define D(R1,D2,X2,B2) RX_(0x5D,R1,X2,B2,D2) +/* DIVIDE LOGICAL */ +# define DLR(R1,R2) RRE_(0xB997,R1,R2) +# define DLGR(R1,R2) RRE_(0xB987,R1,R2) +# define DL(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x97) +# define DLG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x87) +/* DIVIDE SINGLE */ +# define DSGR(R1,R2) RRE_(0xB90D,R1,R2) +# define DSGFR(R1,R2) RRE_(0xB91D,R1,R2) +# define DSG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x0D) +# define DSGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x1D) +/* EXCLUSIVE OR */ +# define XR(R1,R2) RR_(0x17,R1,R2) +# define XGR(R1,R2) RRE_(0xB982,R1,R2) +# define X(R1,D2,X2,B2) RX_(0x57,R1,X2,B2,D2) +# define XY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x57) +# define XG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x82) +# define XI(D1,B1,I2) SI_(0x97,I2,B1,D1) +# define XIY(D1,B1,I2) SIY_(0xEB,I2,B1,D1,0x57) +# define XC(D1,L,B1,D2,B2) SSL_(0xD7,L,B1,D1,B2,D2) +/* EXECUTE */ +# define EX(R1,D2,X2,B2) RX_(0x44,R1,X2,B2,D2) +/* EXTRACT ACCESS */ +# define EAR(R1,R2) RRE_(0xB24F,R1,R2) +/* EXTRACT PSW */ +# define EPSW(R1,R2) RRE_(0xB98D,R1,R2) +/* INSERT CHARACTER */ +# define IC(R1,D2,X2,B2) RX_(0x43,R1,X2,B2,D2) +# define ICY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x73) +/* INSERT CHARACTERS UNDER MASK */ +# define ICM(R1,M3,D2,B2) RS_(0xBF,R1,M3,B2,D2) +# define ICMY(R1,M3,D2,B2) RSY_(0xEB,R1,M3,B2,D2,0x81) +# define ICMH(R1,M3,D2,B2) RSY_(0xEB,R1,M3,B2,D2,0x80) +/* INSERT IMMEDIATE */ +# define IIHH(R1,I2) RI_(0xA5,R1,0x0,I2) +# define IIHL(R1,I2) RI_(0xA5,R1,0x1,I2) +# define IILH(R1,I2) RI_(0xA5,R1,0x2,I2) +# define IILL(R1,I2) RI_(0xA5,R1,0x3,I2) +/* INSERT PROGRAM MASK */ +# define IPM(R1) RRE_(0xB222,R1,0) +/* LOAD */ +# define LR(R1,R2) RR_(0x18,R1,R2) +# define LGR(R1,R2) RRE_(0xB904,R1,R2) +# define LGFR(R1,R2) RRE_(0xB914,R1,R2) +# define L(R1,D2,X2,B2) RX_(0x58,R1,X2,B2,D2) +# define LY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x58) +# define LG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x04) +# define LGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x14) +/* LOAD ACCESS MULTIPLE */ +# define LAM(R1,R3,D2,B2) RS_(0x9A,R1,R3,B2,D2) +# define LAMY(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x9A) +/* LOAD ADDRESS */ +# define LA(R1,D2,X2,B2) RX_(0x41,R1,X2,B2,D2) +# define LAY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x71) +/* LOAD ADDRESS EXTENDED */ +# define LAE(R1,D2,X2,B2) RX_(0x51,R1,X2,B2,D2) +/* LOAD ADDRESS RELATIVE LONG */ +# define LARL(R1,I2) RIL_(0xC0,R1,0x0,I2) +/* LOAD AND TEST */ +# define LTR(R1,R2) RR_(0x12,R1,R2) +# define LTGR(R1,R2) RRE_(0xB902,R1,R2) +# define LTGFR(R1,R2) RRE_(0xB912,R1,R2) +/* LOAD BYTE */ +# define LGBR(R1,R2) RRE_(0xB906,R1,R2) /* disasm */ +# define LB(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x76) +# define LGB(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x77) +/* LOAD COMPLEMENT */ +# define LCR(R1,R2) RR_(0x13,R1,R2) +# define LCGR(R1,R2) RRE_(0xB903,R1,R2) +# define LCGFR(R1,R2) RRE_(0xB913,R1,R2) +/* LOAD HALFWORD */ +# define LH(R1,D2,X2,B2) RX_(0x48,R1,X2,B2,D2) +# define LHY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x78) +# define LGHR(R1,R2) RRE_(0xB907,R1,R2) /* disasm */ +# define LGH(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x15) +/* LOAD HALFWORD IMMEDIATE */ +# define LHI(R1,I2) RI_(0xA7,R1,0x8,I2) +# define LGHI(R1,I2) RI_(0xA7,R1,0x9,I2) +/* LOAD LOGICAL */ +# define LLGFR(R1,R2) RRE_(0xB916,R1,R2) +# define LLGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x16) +/* LOAD LOGICAL CHARACTER */ +# define LLGCR(R1,R2) RRE_(0xB984,R1,R2) /* disasm */ +# define LLGC(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x90) +/* LOAD LOGICAL HALFWORD */ +# define LLGHR(R1,R2) RRE_(0xB985,R1,R2) /* disasm */ +# define LLGH(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x91) +/* LOAD LOGICAL IMMEDIATE */ +# define LLIHH(R1,I2) RI_(0xA5,R1,0xC,I2) +# define LLIHL(R1,I2) RI_(0xA5,R1,0xD,I2) +# define LLILH(R1,I2) RI_(0xA5,R1,0xE,I2) +# define LLILL(R1,I2) RI_(0xA5,R1,0xF,I2) +/* LOAD LOGICAL THIRTY ONE BITS */ +# define LLGTR(R1,R2) RRE_(0xB917,R1,R2) +# define LLGT(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x17) +/* LOAD MULTIPLE */ +# define LM(R1,R3,D2,B2) RS_(0x98,R1,R3,B2,D2) +# define LMY(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x98) +# define LMG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x04) +/* LOAD MULTIPLE DISJOINT */ +# define LMD(R1,R3,D2,B2,D4,B4) SS_(0xEF,R1,R3,B2,D2,B4,D4) +/* LOAD MULTIPLE HIGH */ +# define LMH(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x96) +/* LOAD NEGATIVE */ +# define LNR(R1,R2) RR_(0x11,R1,R2) +# define LNGR(R1,R2) RRE_(0xB901,R1,R2) +# define LNGFR(R1,R2) RRE_(0xB911,R1,R2) +/* LOAD PAIR FROM QUADWORD */ +# define LPQ(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x8F) +/* LOAD POSITIVE */ +# define LPR(R1,R2) RR_(0x10,R1,R2) +# define LPGR(R1,R2) RRE_(0xB900,R1,R2) +# define LPGFR(R1,R2) RRE_(0xB910,R1,R2) +/* LOAD REVERSED */ +# define LRVR(R1,R2) RRE_(0xB91F,R1,R2) +# define LRVGR(R1,R2) RRE_(0xB90F,R1,R2) +# define LRVH(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x1F) +# define LRV(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x1E) +# define LRVG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x0F) +/* MONITOR CALL */ +# define MC(D1,B1,I2) SI_(0xAF,I2,B1,D1) +/* MOVE */ +# define MVI(D1,B1,I2) SI_(0x92,I2,B1,D1) +# define MVIY(D1,B1,I2) SIY_(0xEB,I2,B1,D1,0x52) +# define MVC(D1,L,B1,D2,B2) SSL_(0xD2,L,B1,D1,B2,D2) +/* MOVE INVERSE */ +# define MVCIN(D1,L,B1,D2,B2) SSL_(0xE8,L,B1,D1,B2,D2) +/* MOVE LONG */ +# define MVCL(R1,R2) RR_(0x0E,R1,R2) +/* MOVE LONG EXTENDED */ +# define MVCLE(R1,R3,D2,B2) RS_(0xA8,R1,R3,B2,D2) +/* MOVE LONG UNICODE */ +# define MVCLU(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x8E) +/* MOVE NUMERICS */ +# define MVN(D1,L,B1,D2,B2) SSL_(0xD1,L,B1,D1,B2,D2) +/* MOVE STRING */ +# define MVST(R1,R2) RRE_(0xB255,R1,R2) +/* MOVE WITH OFFSET */ +# define MVO(D1,L1,B1,D2,L2,B2) SS_(0xF1,L1,L2,B1,D1,B2,D2) +/* MOVE ZONES */ +# define MVZ(D1,L,B1,D2,B2) SSL_(0xD3,L,B1,D1,B2,D2) +/* MULTIPLY */ +# define MR(R1,R2) RR_(0x1C,R1,R2) +# define M(R1,D2,X2,B2) RX_(0x5C,R1,X2,B2,D2) +/* MULTIPLY HALFWORD */ +# define MH(R1,D2,X2,B2) RX_(0x4C,R1,X2,B2,D2) +/* MULTIPLY HALFWORD IMMEDIATE */ +# define MHI(R1,I2) RI_(0xA7,R1,0xC,I2) +# define MGHI(R1,I2) RI_(0xA7,R1,0xD,I2) +/* MULTIPLY LOGICAL */ +# define MLR(R1,R2) RRE_(0xB996,R1,R2) +# define MLGR(R1,R2) RRE_(0xB986,R1,R2) +# define ML(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x96) +# define MLG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x86) +/* MULTIPLY SINGLE */ +# define MSR(R1,R2) RRE_(0xB252,R1,R2) +# define MSGR(R1,R2) RRE_(0xB90C,R1,R2) +# define MSGFR(R1,R2) RRE_(0xB91C,R1,R2) +# define MS(R1,D2,X2,B2) RX_(0x71,R1,X2,B2,D2) +# define MSY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x51) +# define MSG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x0C) +# define MSGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x1C) +/* OR */ +# define OR(R1,R2) RR_(0x16,R1,R2) +# define OGR(R1,R2) RRE_(0xB981,R1,R2) +# define O(R1,D2,X2,B2) RX_(0x56,R1,X2,B2,D2) +# define OY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x56) +# define OG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x81) +# define OI(D1,B1,I2) SI_(0x96,I2,B1,D1) +# define OIY(D1,B1,I2) SIY_(0xEB,I2,B1,D1,0x56) +# define OC(D1,L,B1,D2,B2) SSL_(0xD6,L,B1,D1,B2,D2) +/* OR IMMEDIATE */ +# define OIHH(R1,I2) RI_(0xA5,R1,0x8,I2) +# define OIHL(R1,I2) RI_(0xA5,R1,0x9,I2) +# define OILH(R1,I2) RI_(0xA5,R1,0xA,I2) +# define OILL(R1,I2) RI_(0xA5,R1,0xB,I2) +/* PACK */ +# define PACK(D1,L1,B1,D2,L2,B2) SS_(0xF2,L1,L2,B1,D1,B2,D2) +/* PACK ASCII */ +# define PKA(D1,B1,D2,L2,B2) SSL_(0xE9,L2,B1,D1,B2,D2) +/* PACK UNICODE */ +# define PKU(D1,B1,D2,L2,B2) SSL_(0xE1,L2,B1,D1,B2,D2) +/* PERFORM LOCKED OPERATION */ +# define PLO(R1,D2,B2,R3,D4,B4) SS_(0xEE,R1,R3,B2,D2,B4,D4) +/* ROTATE LEFT SINGLE LOGICAL */ +# define RLL(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x1D) +# define RLLG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x1C) +/* SEARCH STRING */ +# define SRST(R1,R2) RRE_(0xB25E,R1,R2) +/* SET ACCESS */ +# define SAR(R1,R2) RRE_(0xB24E,R1,R2) +/* SET ADDRESSING MODE */ +# define SAM24() E_(0x10C) +# define SAM31() E_(0x10D) +# define SAM64() E_(0x10E) +/* SET PROGRAM MASK */ +# define SPM(R1) RR_(0x04,R1,0) +/* SHIFT LEFT DOUBLE */ +# define SLDA(R1,D2,B2) RS_(0x8F,R1,0,B2,D2) +/* SHIFT LEFT DOUBLE LOGICAL */ +# define SLDL(R1,D2,B2) RS_(0x8D,R1,0,B2,D2) +/* SHIFT LEFT SINGLE */ +# define SLA(R1,D2,B2) RS_(0x8B,R1,0,B2,D2) +# define SLAG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x0B) +/* SHIFT LEFT SINGLE LOGICAL */ +# define SLL(R1,D2,B2) RS_(0x89,R1,0,B2,D2) +# define SLLG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x0D) +/* SHIFT RIGHT DOUBLE */ +# define SRDA(R1,D2,B2) RS_(0x8E,R1,0,B2,D2) +/* SHIFT RIGHT DOUBLE LOGICAL */ +# define SRDL(R1,D2,B2) RS_(0x8C,R1,0,B2,D2) +/* SHIFT RIGHT SINGLE */ +# define SRA(R1,D2,B2) RS_(0x8A,R1,0,B2,D2) +# define SRAG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x0A) +/* SHIFT RIGHT SINGLE LOGICAL */ +# define SRL(R1,D2,B2) RS_(0x88,R1,0,B2,D2) +# define SRLG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x0C) +/* STORE */ +# define ST(R1,D2,X2,B2) RX_(0x50,R1,X2,B2,D2) +# define STY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x50) +# define STG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x24) +/* STORE ACCESS MULTIPLE */ +# define STAM(R1,R3,D2,B2) RS_(0x9B,R1,R3,B2,D2) +# define STAMY(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x9B) +/* STORE CHARACTER */ +# define STC(R1,D2,X2,B2) RX_(0x42,R1,X2,B2,D2) +# define STCY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x72) +/* STORE CHARACTERS UNDER MASK */ +# define STCM(R1,M3,D2,B2) RS_(0xBE,R1,M3,B2,D2) +# define STCMY(R1,M3,D2,B2) RSY_(0xEB,R1,M3,B2,D2,0x2D) +# define STCMH(R1,M3,D2,B2) RSY_(0xEB,R1,M3,B2,D2,0x2C) +/* STORE CLOCK */ +# define STCK(D2,B2) S_(0xB205,B2,D2) +/* STORE CLOCK EXTENDED */ +# define STCKE(D2,B2) S_(0xB278,B2,D2) +/* STORE HALFWORD */ +# define STH(R1,D2,X2,B2) RX_(0x40,R1,X2,B2,D2) +# define STHY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x70) +/* STORE MULTIPLE */ +# define STM(R1,R3,D2,B2) RS_(0x90,R1,R3,B2,D2) +# define STMY(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x90) +# define STMG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x24) +/* STORE MULTIPLE HIGH */ +# define STMH(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x26) +/* STORE PAIR TO QUADWORD */ +# define STPQ(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x8E) +/* STORE REVERSED */ +# define STRVH(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x3F) +# define STRV(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x3E) +# define STRVG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x2F) +/* SUBTRACT */ +# define SR(R1,R2) RR_(0x1B,R1,R2) +# define SGR(R1,R2) RRE_(0xB909,R1,R2) +# define SGFR(R1,R2) RRE_(0xB919,R1,R2) +# define S(R1,D2,X2,B2) RX_(0x5B,R1,X2,B2,D2) +# define SY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x5B) +# define SG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x09) +# define SGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x19) +/* SUBTRACT HALFWORD */ +# define SH(R1,D2,X2,B2) RX_(0x4B,R1,X2,B2,D2) +# define SHY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x7B) +/* SUBTRACT LOGICAL */ +# define SLR(R1,R2) RR_(0x1F,R1,R2) +# define SLGR(R1,R2) RRE_(0xB90B,R1,R2) +# define SLGFR(R1,R2) RRE_(0xB91B,R1,R2) +# define SL(R1,D2,X2,B2) RX_(0x5F,R1,X2,B2,D2) +# define SLY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x5F) +# define SLG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x0B) +# define SLGF(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x1B) +/* SUBTRACT LOGICAL WITH BORROW */ +# define SLBR(R1,R2) RRE_(0xB999,R1,R2) +# define SLBGR(R1,R2) RRE_(0xB989,R1,R2) +# define SLB(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x99) +# define SLBG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x89) +/* SUPERVISOR CALL */ +# define SVC(I) I_(0xA,I) +/* TEST ADDRESSING MODE */ +# define TAM() E_(0x10B) +/* TEST AND SET */ +# define TS(D2,B2) RS_(0x93,0,0,B2,D2) +/* TEST UNDER MASK (TEST UNDER MASK HIGH, TEST UNDER MASK LOW) */ +# define TM(D1,B1,I2) SI_(0x91,I2,B1,D1) +# define TMY(D1,B1,I2) SIY_(0xEB,I2,B1,D1,0x51) +# define TMHH(R1,I2) RI_(0xA7,R1,0x2,I2) +# define TMHL(R1,I2) RI_(0xA7,R1,0x3,I2) +# define TMLH(R1,I2) RI_(0xA7,R1,0x0,I2) +# define TMH(R1,I2) TMLH(R1,I2) +# define TMLL(R1,I2) RI_(0xA7,R1,0x1,I2) +# define TML(R1,I2) TMLL(R1,I2) +/* TRANSLATE */ +# define TR(D1,L,B1,D2,B2) SSL_(0xDC,L,B1,D1,B2,D2) +/* TRANSLATE AND TEST */ +# define TRT(D1,L,B1,D2,B2) SSL_(0xDD,L,B1,D1,B2,D2) +/* TRANSLATE EXTENDED */ +# define TRE(R1,R2) RRE_(0xB2A5,R1,R2) +/* TRANSLATE ONE TO ONE */ +# define TROO(R1,R2) RRE_(0xB993,R1,R2) +/* TRANSLATE ONE TO TWO */ +# define TROT(R1,R2) RRE_(0xB992,R1,R2) +/* TRANSLATE TWO TO ONE */ +# define TRTO(R1,R2) RRE_(0xB991,R1,R2) +/* TRANSLATE TWO TO TWO */ +# define TRTT(R1,R2) RRE_(0xB990,R1,R2) +/* UNPACK */ +# define UNPK(D1,L1,B1,D2,L2,B2) SS_(0xF3,L1,L2,B1,D1,B2,D2) +/* UNPACK ASCII */ +# define UNPKA(D1,L1,B1,D2,L2,B2) SS_(0xEA,L1,L2,B1,D1,B2,D2) +/* UNPACK UNICODE */ +# define UNPKU(D1,L1,B1,D2,L2,B2) SS_(0xE2,L1,L2,B1,D1,B2,D2) +/* UPDATE TREE */ +# define UPT() E_(0x0102) +/**************************************************************** + * Decimal Instructions * + ****************************************************************/ +/* ADD DECIMAL */ +# define AP(D1,L1,B1,D2,L2,B2) SS_(0xFA,L1,L2,B1,D1,B2,D2) +/* COMPARE DECIMAL */ +# define CP(D1,L1,B1,D2,L2,B2) SS_(0xF9,L1,L2,B1,D1,B2,D2) +/* DIVIDE DECIMAL */ +# define DP(D1,L1,B1,D2,L2,B2) SS_(0xFD,L1,L2,B1,D1,B2,D2) +/* EDIT */ +# define ED(D1,L,B1,D2,B2) SSL_(0xDE,L,B1,D1,B2,D2) +/* EDIT AND MARK */ +# define EDMK(D1,L,B1,D2,B2) SSL_(0xDE,L,B1,D1,B2,D2) +/* MULTIPLY DECIMAL */ +# define MP(D1,L1,B1,D2,L2,B2) SS_(0xFC,L1,L2,B1,D1,B2,D2) +/* SHIFT AND ROUND DECIMAL */ +# define SRP(D1,L1,B1,D2,L2,B2) SS_(0xF0,L1,L2,B1,D1,B2,D2) +/* SUBTRACE DECIMAL */ +# define SP(D1,L1,B1,D2,L2,B2) SS_(0xFB,L1,L2,B1,D1,B2,D2) +/* TEST DECIMAL */ +# define TP(D1,L1,B1) RSL_(0xEB,L1,B1,D1,0xC0) +/* ZERO AND ADD */ +# define ZAP(D1,L1,B1,D2,L2,B2) SS_(0xF8,L1,L2,B1,D1,B2,D2) +/**************************************************************** + * Control Instructions * + ****************************************************************/ +/* BRANCH AND SET AUTHORITY */ +# define BSA(R1,R2) RRE_(0xB25A,R1,R2) +/* BRANCH AND STACK */ +# define BAKR(R1,R2) RRE_(0xB240,R1,R2) +/* BRANCH IN SUBSPACE GROUP */ +# define BSG(R1,R2) RRE_(0xB258,R1,R2) +/* COMPARE AND SWAP AND PURGE */ +# define CSP(R1,R2) RRE_(0xB250,R1,R2) +# define CSPG(R1,R2) RRE_(0xB98A,R1,R2) +/* DIAGNOSE */ +# define DIAG() SI_(0x83,0,0,0) +/* EXTRACT AND SET EXTENDED AUTHORITY */ +# define ESEA(R1) RRE_(0xB99D,R1,0) +/* EXTRACT PRIMARY ASN */ +# define EPAR(R1) RRE_(0xB226,R1,0) +/* EXTRACT SECONDARY ASN */ +# define ESAR(R1) RRE_(0xB227,R1,0) +/* EXTRACT STACKED REGISTERS */ +# define EREG(R1,R2) RRE_(0xB249,R1,R2) +# define EREGG(R1,R2) RRE_(0xB90E,R1,R2) +/* EXTRACT STACKED STATE */ +# define ESTA(R1,R2) RRE_(0xB24A,R1,R2) +/* INSERT ADDRESS SPACE CONTROL */ +# define IAC(R1) RRE_(0xB224,R1,0) +/* INSERT PSW KEY */ +# define IPK() S_(0xB20B,0,0) +/* INSERT STORAGE KEY EXTENDED */ +# define ISKE(R1,R2) RRE_(0xB229,R1,R2) +/* INSERT VIRTUAL STORAGE KEY */ +# define IVSK(R1,R2) RRE_(0xB223,R1,R2) +/* INVALIDATE DAT TABLE ENTRY */ +# define IDTE(R1,R2,R3) RRF_(0xB98E,R3,0,R1,R2) +/* INVALIDATE PAGE TABLE ENTRY */ +# define IPTE(R1,R2) RRE_(0xB221,R1,R2) +/* LOAD ADDRESS SPACE PARAMETER */ +# define LASP(D1,B1,D2,B2) SSE_(0xE500,B1,D1,B2,D2) +/* LOAD CONTROL */ +# define LCTL(R1,R3,D2,B2) RS_(0xB7,R1,R3,B2,D2) +# define LCTLG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x2F) +/* LOAD PSW */ +# define LPSW(D2,B2) SI_(0x82,0,B2,D2) +/* LOAD PSW EXTENDED */ +# define LPSWE(D2,B2) S_(0xB2B2,B2,D2) +/* LOAD REAL ADDRESS */ +# define LRA(R1,D2,X2,B2) RX_(0xB1,R1,X2,B2,D2) +# define LRAY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x13) +# define LRAG(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x03) +/* LOAD USING REAL ADDRESS */ +# define LURA(R1,R2) RRE_(0xB24B,R1,R2) +# define LURAG(R1,R2) RRE_(0xB905,R1,R2) +/* MODIFY STACKED STATE */ +# define MSTA(R1) RRE_(0xB247,R1,0) +/* MOVE PAGE */ +# define MVPG(R1,R2) RRE_(0xB254,R1,R2) +/* MOVE TO PRIMARY */ +# define MVCP(D1,R1,B1,D2,B2,R3) SS_(0xDA,R1,R3,B1,D1,B2,D2) +/* MOVE TO SECONDARY */ +# define MVCS(D1,R1,B1,D2,B2,R3) SS_(0xDB,R1,R3,B1,D1,B2,D2) +/* MOVE WITH DESTINATION KEY */ +# define MVCDK(D1,B1,D2,B2) SSE_(0xE50F,B1,D1,B2,D2) +/* MOVE WITH KEY */ +# define MVCK(D1,R1,B1,D2,B2,R3) SS_(0xD9,R1,R3,B1,D1,B2,D2) +/* MOVE WITH SOURCE KEY */ +# define MVCSK(D1,B1,D2,B2) SSE_(0xE50E,B1,D1,B2,D2) +/* PAGE IN */ +# define PGIN(R1,R2) RRE_(0xB22E,R1,R2) +/* PAGE OUT */ +# define PGOUT(R1,R2) RRE_(0xB22F,R1,R2) +/* PROGRAM CALL */ +# define PC(D2,B2) S_(0xB218,B2,D2) +/* PROGRAM RETURN */ +# define PR() E_(0x0101) +/* PROGRAM TRANSFER */ +# define PT(R1,R2) RRE_(0xB228,R1,R2) +/* PURGE ALB */ +# define PALB() RRE_(0xB248,0,0) +/* PURGE TLB */ +# define PTLB() S_(0xB20D,0,0) +/* RESET REFERENCE BIT EXTENDED */ +# define RRBE(R1,R2) RRE_(0xB22A,R1,R2) +/* RESUME PROGRAM */ +# define RP(D2,B2) S_(0xB277,B2,D2) +/* SET ADDRESS SPACE CONTROL */ +# define SAC(D2,B2) S_(0xB219,B2,D2) +/* SET ADDRESS SPACE CONTROL FAST */ +# define SACF(D2,B2) S_(0xB279,B2,D2) +/* SET CLOCK */ +# define SCK(D2,B2) S_(0xB204,B2,D2) +/* SET CLOCK COMPARATOR */ +# define SCKC(D2,B2) S_(0xB206,B2,D2) +/* SET CLOCK PROGRAMMABLE FIELD */ +# define SCKPF() E_(0x0107) +/* SET CPU TIMER */ +# define SPT(D2,B2) S_(0xB208,B2,D2) +/* SET PREFIX */ +# define SPX(D2,B2) S_(0xB210,B2,D2) +/* SET PSW FROM ADDRESS */ +# define SPKA(D2,B2) S_(0xB20A,B2,D2) +/* SET SECONDARY ASN */ +# define SSAR(R1) RRE_(0xB225,R1,0) +/* SET STORAGE KEY EXTENDED */ +# define SSKE(R1,R2) RRE_(0xB22B,R1,R2) +/* SET SYSTEM MASK */ +# define SSM(D2,B2) SI_(0x80,0,B2,D2) +/* SIGNAL PROCESSOR */ +# define SIGP(R1,R3,D2,B2) RS_(0xAE,R1,R3,B2,D2) +/* STORE CLOCK COMPARATOR */ +# define STCKC(D2,B2) S_(0xB207,B2,D2) +/* STORE CONTROL */ +# define STCTL(R1,R3,D2,B2) RS_(0xB6,R1,R3,B2,D2) +# define STCTG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x25) +/* STORE CPU ADDRESS */ +# define STAP(D2,B2) S_(0xB212,B2,D2) +/* STORE CPU ID */ +# define STIDP(D2,B2) S_(0xB202,B2,D2) +/* STORE CPU TIMER */ +# define STPT(D2,B2) S_(0xB209,B2,D2) +/* STORE FACILITY LIST */ +# define STFL(D2,B2) S_(0xB2B1,B2,D2) +/* STORE PREFIX */ +# define STPX(D2,B2) S_(0xB211,B2,D2) +/* STORE REAL ADDRES */ +# define STRAG(D1,B1,D2,B2) SSE_(0xE502,B1,D1,B2,D2) +/* STORE SYSTEM INFORMATION */ +# define STSI(D2,B2) S_(0xB27D,B2,D2) +/* STORE THEN AND SYSTEM MASK */ +# define STNSM(D1,B1,I2) SI_(0xAC,I2,B1,D1) +/* STORE THEN OR SYSTEM MASK */ +# define STOSM(D1,B1,I2) SI_(0xAD,I2,B1,D1) +/* STORE USING REAL ADDRESS */ +# define STURA(R1,R2) RRE_(0xB246,R1,R2) +# define STURG(R1,R2) RRE_(0xB925,R1,R2) +/* TEST ACCESS */ +# define TAR(R1,R2) RRE_(0xB24C,R1,R2) +/* TEST BLOCK */ +# define TB(R1,R2) RRE_(0xB22C,R1,R2) +/* TEST PROTECTION */ +# define TPROT(D1,B1,D2,B2) SSE_(0xE501,B1,D1,B2,D2) +/* TRACE */ +# define TRACE(R1,R3,D2,B2) RS_(0x99,R1,R3,B2,D2) +# define TRACG(R1,R3,D2,B2) RSY_(0xEB,R1,R3,B2,D2,0x0F) +/* TRAP */ +# define TRAP2() E_(0x01FF) +# define TRAP4(D2,B2) S_(0xB2FF,B2,D2) +/**************************************************************** + * I/O Instructions * + ****************************************************************/ +/* CANCEL SUBCHANNEL */ +# define XSCH() S_(0xB276,0,0) +/* CLEAR SUBCHANNEL */ +# define CSCH() S_(0xB230,0,0) +/* HALT SUBCHANNEL */ +# define HSCH() S_(0xB231,0,0) +/* MODIFY SUBCHANNEL */ +# define MSCH(D2,B2) S_(0xB232,B2,D2) +/* RESET CHANNEL PATH */ +# define RCHP() S_(0xB23B,0,0) +/* RESUME SUBCHANNEL */ +# define RSCH() S_(0xB238,0,0) +/* SET ADDRESS LIMIT */ +# define SAL() S_(0xB237,0,0) +/* SET CHANNEL MONITOR */ +# define SCHM() S_(0xB23C,0,0) +/* START SUBCHANNEL */ +# define SSCH(D2,B2) S_(0xB233,B2,D2) +/* STORE CHANNEL PATH STATUS */ +# define STCPS(D2,B2) S_(0xB23A,B2,D2) +/* STORE CHANNEL REPORT WORD */ +# define STCRW(D2,B2) S_(0xB239,B2,D2) +/* STORE SUBCHANNEL */ +# define STSCH(D2,B2) S_(0xB234,B2,D2) +/* TEST PENDING INTERRUPTION */ +# define TPI(D2,B2) S_(0xB236,B2,D2) +/* TEST SUBCHANNEL */ +# define TSCH(D2,B2) S_(0xB235,B2,D2) +# define xdivr(r0,r1) _xdivr(_jit,r0,r1) +static int32_t _xdivr(jit_state_t*,int32_t,int32_t); +# define xdivr_u(r0,r1) _xdivr_u(_jit,r0,r1) +static int32_t _xdivr_u(jit_state_t*,int32_t,int32_t); +# define xdivi(r0,i0) _xdivi(_jit,r0,i0) +static int32_t _xdivi(jit_state_t*,int32_t,jit_word_t); +# define xdivi_u(r0,i0) _xdivi_u(_jit,r0,i0) +static int32_t _xdivi_u(jit_state_t*,int32_t,jit_word_t); +# define crr(cc,r0,r1,r2) _crr(_jit,cc,r0,r1,r2) +static void _crr(jit_state_t*, + int32_t,int32_t,int32_t,int32_t); +# define cri(cc,r0,r1,i0) _cri(_jit,cc,r0,r1,i0) +static void _cri(jit_state_t*, + int32_t,int32_t,int32_t,jit_word_t); +# define crr_u(cc,r0,r1,r2) _crr_u(_jit,cc,r0,r1,r2) +static void _crr_u(jit_state_t*, + int32_t,int32_t,int32_t,int32_t); +# define cri_u(cc,r0,r1,i0) _cri_u(_jit,cc,r0,r1,i0) +static void _cri_u(jit_state_t*, + int32_t,int32_t,int32_t,jit_word_t); +# define brr(cc,i0,r0,r1) _brr(_jit,cc,i0,r0,r1) +static void _brr(jit_state_t*,int32_t, + jit_word_t,int32_t,int32_t); +# define brr_p(cc,i0,r0,r1) _brr_p(_jit,cc,i0,r0,r1) +static jit_word_t _brr_p(jit_state_t*,int32_t, + jit_word_t,int32_t,int32_t); +# define bri(cc,i0,r0,i1) _bri(_jit,cc,i0,r0,i1) +static void _bri(jit_state_t*,int32_t, + jit_word_t,int32_t,jit_word_t); +# define bri_p(cc,i0,r0,i1) _bri_p(_jit,cc,i0,r0,i1) +static jit_word_t _bri_p(jit_state_t*,int32_t, + jit_word_t,int32_t,jit_word_t); +# define brr_u(cc,i0,r0,r1) _brr_u(_jit,cc,i0,r0,r1) +static void _brr_u(jit_state_t*,int32_t, + jit_word_t,int32_t,int32_t); +# define brr_u_p(cc,i0,r0,r1) _brr_u_p(_jit,cc,i0,r0,r1) +static jit_word_t _brr_u_p(jit_state_t*,int32_t, + jit_word_t,int32_t,int32_t); +# define bri_u(cc,i0,r0,i1) _bri_u(_jit,cc,i0,r0,i1) +static void _bri_u(jit_state_t*,int32_t, + jit_word_t,int32_t,jit_word_t); +# define bri_u_p(cc,i0,r0,i1) _bri_u_p(_jit,cc,i0,r0,i1) +static jit_word_t _bri_u_p(jit_state_t*,int32_t, + jit_word_t,int32_t,jit_word_t); +# define baddr(c,s,i0,r0,r1) _baddr(_jit,c,s,i0,r0,r1) +static void _baddr(jit_state_t*,int32_t,jit_bool_t, + jit_word_t,int32_t,int32_t); +# define baddr_p(c,s,i0,r0,r1) _baddr_p(_jit,c,s,i0,r0,r1) +static jit_word_t _baddr_p(jit_state_t*,int32_t,jit_bool_t, + jit_word_t,int32_t,int32_t); +# define baddi(c,s,i0,r0,i1) _baddi(_jit,c,s,i0,r0,i1) +static void _baddi(jit_state_t*,int32_t,jit_bool_t, + jit_word_t,int32_t,jit_word_t); +# define baddi_p(c,s,i0,r0,i1) _baddi_p(_jit,c,s,i0,r0,i1) +static jit_word_t _baddi_p(jit_state_t*,int32_t,jit_bool_t, + jit_word_t,int32_t,jit_word_t); +# define bsubr(c,s,i0,r0,r1) _bsubr(_jit,c,s,i0,r0,r1) +static void _bsubr(jit_state_t*,int32_t,jit_bool_t, + jit_word_t,int32_t,int32_t); +# define bsubr_p(c,s,i0,r0,r1) _bsubr_p(_jit,c,s,i0,r0,r1) +static jit_word_t _bsubr_p(jit_state_t*,int32_t,jit_bool_t, + jit_word_t,int32_t,int32_t); +# define bsubi(c,s,i0,r0,i1) _bsubi(_jit,c,s,i0,r0,i1) +static void _bsubi(jit_state_t*,int32_t,jit_bool_t, + jit_word_t,int32_t,jit_word_t); +# define bsubi_p(c,s,i0,r0,i1) _bsubi_p(_jit,c,s,i0,r0,i1) +static jit_word_t _bsubi_p(jit_state_t*,int32_t,jit_bool_t, + jit_word_t,int32_t,jit_word_t); +# define bmxr(cc,i0,r0,r1) _bmxr(_jit,cc,i0,r0,r1) +static void _bmxr(jit_state_t*,int32_t, + jit_word_t,int32_t,int32_t); +# define bmxr_p(cc,i0,r0,r1) _bmxr_p(_jit,cc,i0,r0,r1) +static jit_word_t _bmxr_p(jit_state_t*,int32_t, + jit_word_t,int32_t,int32_t); +# define bmxi(cc,i0,r0,i1) _bmxi(_jit,cc,i0,r0,i1) +static void _bmxi(jit_state_t*,int32_t, + jit_word_t,int32_t,jit_word_t); +# define bmxi_p(cc,i0,r0,i1) _bmxi_p(_jit,cc,i0,r0,i1) +static jit_word_t _bmxi_p(jit_state_t*,int32_t, + jit_word_t,int32_t,jit_word_t); +# define movr(r0,r1) _movr(_jit,r0,r1) +static void _movr(jit_state_t*,int32_t,int32_t); +# define movi(r0,i0) _movi(_jit,r0,i0) +static void _movi(jit_state_t*,int32_t,jit_word_t); +# define movi_p(r0,i0) _movi_p(_jit,r0,i0) +static jit_word_t _movi_p(jit_state_t*,int32_t,jit_word_t); +# define addr(r0,r1,r2) _addr(_jit,r0,r1,r2) +static void _addr(jit_state_t*,int32_t,int32_t,int32_t); +# define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) +static void _addi(jit_state_t*,int32_t,int32_t,jit_word_t); +# define addcr(r0,r1,r2) _addcr(_jit,r0,r1,r2) +static void _addcr(jit_state_t*,int32_t,int32_t,int32_t); +# define addci(r0,r1,i0) _addci(_jit,r0,r1,i0) +static void _addci(jit_state_t*,int32_t,int32_t,jit_word_t); +# define addxr(r0,r1,r2) _addxr(_jit,r0,r1,r2) +static void _addxr(jit_state_t*,int32_t,int32_t,int32_t); +# define addxi(r0,r1,i0) _addxi(_jit,r0,r1,i0) +static void _addxi(jit_state_t*,int32_t,int32_t,jit_word_t); +# define subr(r0,r1,r2) _subr(_jit,r0,r1,r2) +static void _subr(jit_state_t*,int32_t,int32_t,int32_t); +# define subi(r0,r1,i0) _subi(_jit,r0,r1,i0) +static void _subi(jit_state_t*,int32_t,int32_t,jit_word_t); +# define subcr(r0,r1,r2) _subcr(_jit,r0,r1,r2) +static void _subcr(jit_state_t*,int32_t,int32_t,int32_t); +# define subci(r0,r1,i0) _subci(_jit,r0,r1,i0) +static void _subci(jit_state_t*,int32_t,int32_t,jit_word_t); +# define subxr(r0,r1,r2) _subxr(_jit,r0,r1,r2) +static void _subxr(jit_state_t*,int32_t,int32_t,int32_t); +# define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0) +static void _subxi(jit_state_t*,int32_t,int32_t,jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,int32_t,int32_t,jit_word_t); +# define mulr(r0,r1,r2) _mulr(_jit,r0,r1,r2) +static void _mulr(jit_state_t*,int32_t,int32_t,int32_t); +# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) +static void _muli(jit_state_t*,int32_t,int32_t,jit_word_t); +# define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3) +static void _qmulr(jit_state_t*,int32_t, + int32_t,int32_t,int32_t); +# define qmuli(r0,r1,r2,i0) _qmuli(_jit,r0,r1,r2,i0) +static void _qmuli(jit_state_t*,int32_t, + int32_t,int32_t,jit_word_t); +# define qmulr_u(r0,r1,r2,r3) _qmulr_u(_jit,r0,r1,r2,r3) +static void _qmulr_u(jit_state_t*,int32_t, + int32_t,int32_t,int32_t); +# define qmuli_u(r0,r1,r2,i0) _qmuli_u(_jit,r0,r1,r2,i0) +static void _qmuli_u(jit_state_t*,int32_t, + int32_t,int32_t,jit_word_t); +# define divr(r0,r1,r2) _divr(_jit,r0,r1,r2) +static void _divr(jit_state_t*,int32_t,int32_t,int32_t); +# define divi(r0,r1,i0) _divi(_jit,r0,r1,i0) +static void _divi(jit_state_t*,int32_t,int32_t,jit_word_t); +# define divr_u(r0,r1,r2) _divr_u(_jit,r0,r1,r2) +static void _divr_u(jit_state_t*,int32_t,int32_t,int32_t); +# define divi_u(r0,r1,i0) _divi_u(_jit,r0,r1,i0) +static void _divi_u(jit_state_t*,int32_t,int32_t,jit_word_t); +# define remr(r0,r1,r2) _remr(_jit,r0,r1,r2) +static void _remr(jit_state_t*,int32_t,int32_t,int32_t); +# define remi(r0,r1,i0) _remi(_jit,r0,r1,i0) +static void _remi(jit_state_t*,int32_t,int32_t,jit_word_t); +# define remr_u(r0,r1,r2) _remr_u(_jit,r0,r1,r2) +static void _remr_u(jit_state_t*,int32_t,int32_t,int32_t); +# define remi_u(r0,r1,i0) _remi_u(_jit,r0,r1,i0) +static void _remi_u(jit_state_t*,int32_t,int32_t,jit_word_t); +# define qdivr(r0,r1,r2,r3) _qdivr(_jit,r0,r1,r2,r3) +static void _qdivr(jit_state_t*,int32_t, + int32_t,int32_t,int32_t); +# define qdivi(r0,r1,r2,i0) _qdivi(_jit,r0,r1,r2,i0) +static void _qdivi(jit_state_t*,int32_t, + int32_t,int32_t,jit_word_t); +# define qdivr_u(r0,r1,r2,r3) _qdivr_u(_jit,r0,r1,r2,r3) +static void _qdivr_u(jit_state_t*,int32_t, + int32_t,int32_t,int32_t); +# define qdivi_u(r0,r1,r2,i0) _qdivi_u(_jit,r0,r1,r2,i0) +static void _qdivi_u(jit_state_t*,int32_t, + int32_t,int32_t,jit_word_t); +# if __WORDSIZE == 32 +# define lshr(r0,r1,r2) _lshr(_jit,r0,r1,r2) +static void _lshr(jit_state_t*,int32_t,int32_t,int32_t); +# else +# define lshr(r0,r1,r2) SLLG(r0,r1,0,r2) +# endif +# define lshi(r0,r1,i0) _lshi(_jit,r0,r1,i0) +static void _lshi(jit_state_t*,int32_t,int32_t,jit_word_t); +# if __WORDSIZE == 32 +# define rshr(r0,r1,r2) _rshr(_jit,r0,r1,r2) +static void _rshr(jit_state_t*,int32_t,int32_t,int32_t); +# else +# define rshr(r0,r1,r2) SRAG(r0,r1,0,r2) +# endif +# define rshi(r0,r1,i0) _rshi(_jit,r0,r1,i0) +static void _rshi(jit_state_t*,int32_t,int32_t,jit_word_t); +# if __WORDSIZE == 32 +# define rshr_u(r0,r1,r2) _rshr_u(_jit,r0,r1,r2) +static void _rshr_u(jit_state_t*,int32_t,int32_t,int32_t); +# else +# define rshr_u(r0,r1,r2) SRLG(r0,r1,0,r2) +# endif +# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) +static void _rshi_u(jit_state_t*,int32_t,int32_t,jit_word_t); +# if __WORDSIZE == 32 +# define negr(r0,r1) LCR(r0,r1) +# else +# define negr(r0,r1) LCGR(r0,r1) +# endif +# define comr(r0,r1) _comr(_jit,r0,r1) +static void _comr(jit_state_t*,int32_t,int32_t); +# define andr(r0,r1,r2) _andr(_jit,r0,r1,r2) +static void _andr(jit_state_t*,int32_t,int32_t,int32_t); +# define andi(r0,r1,i0) _andi(_jit,r0,r1,i0) +static void _andi(jit_state_t*,int32_t,int32_t,jit_word_t); +# define orr(r0,r1,r2) _orr(_jit,r0,r1,r2) +static void _orr(jit_state_t*,int32_t,int32_t,int32_t); +# define ori(r0,r1,i0) _ori(_jit,r0,r1,i0) +static void _ori(jit_state_t*,int32_t,int32_t,jit_word_t); +# define xorr(r0,r1,r2) _xorr(_jit,r0,r1,r2) +static void _xorr(jit_state_t*,int32_t,int32_t,int32_t); +# define xori(r0,r1,i0) _xori(_jit,r0,r1,i0) +static void _xori(jit_state_t*,int32_t,int32_t,jit_word_t); +# define bswapr_us(r0,r1) extr_us(r0,r1) +# if __WORDSIZE == 32 +# define bswapr_ui(r0,r1) movr(r0,r1) +# else +# define bswapr_ui(r0,r1) extr_ui(r0,r1) +# define bswapr, 2019_ul(r0,r1) movr(r0,r1) +# endif +# define extr_c(r0,r1) LGBR(r0,r1) +# define extr_uc(r0,r1) LLGCR(r0,r1) +# define extr_s(r0,r1) LGHR(r0,r1) +# define extr_us(r0,r1) LLGHR(r0,r1) +# if __WORDSIZE == 64 +# define extr_i(r0,r1) LGFR(r0,r1) +# define extr_ui(r0,r1) LLGFR(r0,r1) +# endif +# define ldr_c(r0,r1) LGB(r0,0,0,r1) +# define ldi_c(r0,i0) _ldi_c(_jit,r0,i0) +static void _ldi_c(jit_state_t*,int32_t,jit_word_t); +# define ldxr_c(r0,r1,r2) _ldxr_c(_jit,r0,r1,r2) +static void _ldxr_c(jit_state_t*,int32_t,int32_t,int32_t); +# define ldxi_c(r0,r1,i0) _ldxi_c(_jit,r0,r1,i0) +static void _ldxi_c(jit_state_t*,int32_t,int32_t,jit_word_t); +# define ldr_uc(r0,r1) LLGC(r0,0,0,r1) +# define ldi_uc(r0,i0) _ldi_uc(_jit,r0,i0) +static void _ldi_uc(jit_state_t*,int32_t,jit_word_t); +# define ldxr_uc(r0,r1,r2) _ldxr_uc(_jit,r0,r1,r2) +static void _ldxr_uc(jit_state_t*,int32_t,int32_t,int32_t); +# define ldxi_uc(r0,r1,i0) _ldxi_uc(_jit,r0,r1,i0) +static void _ldxi_uc(jit_state_t*,int32_t,int32_t,jit_word_t); +# if __WORDSIZE == 32 +# define ldr_s(r0,r1) LH(r0,0,0,r1) +# else +# define ldr_s(r0,r1) LGH(r0,0,0,r1) +# endif +# define ldi_s(r0,i0) _ldi_s(_jit,r0,i0) +static void _ldi_s(jit_state_t*,int32_t,jit_word_t); +# define ldxr_s(r0,r1,r2) _ldxr_s(_jit,r0,r1,r2) +static void _ldxr_s(jit_state_t*,int32_t,int32_t,int32_t); +# define ldxi_s(r0,r1,i0) _ldxi_s(_jit,r0,r1,i0) +static void _ldxi_s(jit_state_t*,int32_t,int32_t,jit_word_t); +# define ldr_us(r0,r1) LLGH(r0,0,0,r1) +# define ldi_us(r0,i0) _ldi_us(_jit,r0,i0) +static void _ldi_us(jit_state_t*,int32_t,jit_word_t); +# define ldxr_us(r0,r1,r2) _ldxr_us(_jit,r0,r1,r2) +static void _ldxr_us(jit_state_t*,int32_t,int32_t,int32_t); +# define ldxi_us(r0,r1,i0) _ldxi_us(_jit,r0,r1,i0) +static void _ldxi_us(jit_state_t*,int32_t,int32_t,jit_word_t); +# if __WORDSIZE == 32 +# define ldr_i(r0,r1) LLGF(r0,0,0,r1) +# else +# define ldr_i(r0,r1) LGF(r0,0,0,r1) +# endif +# define ldi_i(r0,i0) _ldi_i(_jit,r0,i0) +static void _ldi_i(jit_state_t*,int32_t,jit_word_t); +# define ldxr_i(r0,r1,r2) _ldxr_i(_jit,r0,r1,r2) +static void _ldxr_i(jit_state_t*,int32_t,int32_t,int32_t); +# define ldxi_i(r0,r1,i0) _ldxi_i(_jit,r0,r1,i0) +static void _ldxi_i(jit_state_t*,int32_t,int32_t,jit_word_t); +# if __WORDSIZE == 64 +# define ldr_ui(r0,r1) LLGF(r0,0,0,r1) +# define ldi_ui(r0,i0) _ldi_ui(_jit,r0,i0) +static void _ldi_ui(jit_state_t*,int32_t,jit_word_t); +# define ldxr_ui(r0,r1,r2) _ldxr_ui(_jit,r0,r1,r2) +static void _ldxr_ui(jit_state_t*,int32_t,int32_t,int32_t); +# define ldxi_ui(r0,r1,i0) _ldxi_ui(_jit,r0,r1,i0) +static void _ldxi_ui(jit_state_t*,int32_t,int32_t,jit_word_t); +# define ldr_l(r0,r1) LG(r0,0,0,r1) +# define ldi_l(r0,i0) _ldi_l(_jit,r0,i0) +static void _ldi_l(jit_state_t*,int32_t,jit_word_t); +# define ldxr_l(r0,r1,r2) _ldxr_l(_jit,r0,r1,r2) +static void _ldxr_l(jit_state_t*,int32_t,int32_t,int32_t); +# define ldxi_l(r0,r1,i0) _ldxi_l(_jit,r0,r1,i0) +static void _ldxi_l(jit_state_t*,int32_t,int32_t,jit_word_t); +# endif +# define str_c(r0,r1) STC(r1,0,0,r0) +# define sti_c(i0,r0) _sti_c(_jit,i0,r0) +static void _sti_c(jit_state_t*,jit_word_t,int32_t); +# define stxr_c(r0,r1,r2) _stxr_c(_jit,r0,r1,r2) +static void _stxr_c(jit_state_t*,int32_t,int32_t,int32_t); +# define stxi_c(i0,r0,r1) _stxi_c(_jit,i0,r0,r1) +static void _stxi_c(jit_state_t*,jit_word_t,int32_t,int32_t); +# define str_s(r0,r1) STH(r1,0,0,r0) +# define sti_s(i0,r0) _sti_s(_jit,i0,r0) +static void _sti_s(jit_state_t*,jit_word_t,int32_t); +# define stxr_s(r0,r1,r2) _stxr_s(_jit,r0,r1,r2) +static void _stxr_s(jit_state_t*,int32_t,int32_t,int32_t); +# define stxi_s(i0,r0,r1) _stxi_s(_jit,i0,r0,r1) +static void _stxi_s(jit_state_t*,jit_word_t,int32_t,int32_t); +# define str_i(r0,r1) ST(r1,0,0,r0) +# define sti_i(i0,r0) _sti_i(_jit,i0,r0) +static void _sti_i(jit_state_t*,jit_word_t,int32_t); +# define stxr_i(r0,r1,r2) _stxr_i(_jit,r0,r1,r2) +static void _stxr_i(jit_state_t*,int32_t,int32_t,int32_t); +# define stxi_i(i0,r0,r1) _stxi_i(_jit,i0,r0,r1) +static void _stxi_i(jit_state_t*,jit_word_t,int32_t,int32_t); +# if __WORDSIZE == 64 +# define str_l(r0,r1) STG(r1,0,0,r0) +# define sti_l(i0,r0) _sti_l(_jit,i0,r0) +static void _sti_l(jit_state_t*,jit_word_t,int32_t); +# define stxr_l(r0,r1,r2) _stxr_l(_jit,r0,r1,r2) +static void _stxr_l(jit_state_t*,int32_t,int32_t,int32_t); +# define stxi_l(i0,r0,r1) _stxi_l(_jit,i0,r0,r1) +static void _stxi_l(jit_state_t*,jit_word_t,int32_t,int32_t); +# endif +# define ltr(r0,r1,r2) crr(CC_L,r0,r1,r2) +# define lti(r0,r1,i0) cri(CC_L,r0,r1,i0) +# define ltr_u(r0,r1,r2) crr_u(CC_L,r0,r1,r2) +# define lti_u(r0,r1,i0) cri_u(CC_L,r0,r1,i0) +# define ler(r0,r1,r2) crr(CC_LE,r0,r1,r2) +# define lei(r0,r1,i0) cri(CC_LE,r0,r1,i0) +# define ler_u(r0,r1,r2) crr_u(CC_LE,r0,r1,r2) +# define lei_u(r0,r1,i0) cri_u(CC_LE,r0,r1,i0) +# define eqr(r0,r1,r2) crr(CC_E,r0,r1,r2) +# define eqi(r0,r1,i0) cri(CC_E,r0,r1,i0) +# define ger(r0,r1,r2) crr(CC_HE,r0,r1,r2) +# define gei(r0,r1,i0) cri(CC_HE,r0,r1,i0) +# define ger_u(r0,r1,r2) crr_u(CC_HE,r0,r1,r2) +# define gei_u(r0,r1,i0) cri_u(CC_HE,r0,r1,i0) +# define gtr(r0,r1,r2) crr(CC_H,r0,r1,r2) +# define gti(r0,r1,i0) cri(CC_H,r0,r1,i0) +# define gtr_u(r0,r1,r2) crr_u(CC_H,r0,r1,r2) +# define gti_u(r0,r1,i0) cri_u(CC_H,r0,r1,i0) +# define ner(r0,r1,r2) crr(CC_NE,r0,r1,r2) +# define nei(r0,r1,i0) cri(CC_NE,r0,r1,i0) +# define bltr(i0,r0,r1) brr(CC_L,i0,r0,r1) +# define bltr_p(i0,r0,r1) brr_p(CC_L,i0,r0,r1) +# define blti(i0,r0,i1) bri(CC_L,i0,r0,i1) +# define blti_p(i0,r0,i1) bri_p(CC_L,i0,r0,i1) +# define bltr_u(i0,r0,r1) brr_u(CC_L,i0,r0,r1) +# define bltr_u_p(i0,r0,r1) brr_u_p(CC_L,i0,r0,r1) +# define blti_u(i0,r0,i1) bri_u(CC_L,i0,r0,i1) +# define blti_u_p(i0,r0,i1) bri_u_p(CC_L,i0,r0,i1) +# define bler(i0,r0,r1) brr(CC_LE,i0,r0,r1) +# define bler_p(i0,r0,r1) brr_p(CC_LE,i0,r0,r1) +# define blei(i0,r0,i1) bri(CC_LE,i0,r0,i1) +# define blei_p(i0,r0,i1) bri_p(CC_LE,i0,r0,i1) +# define bler_u(i0,r0,r1) brr_u(CC_LE,i0,r0,r1) +# define bler_u_p(i0,r0,r1) brr_u_p(CC_LE,i0,r0,r1) +# define blei_u(i0,r0,i1) bri_u(CC_LE,i0,r0,i1) +# define blei_u_p(i0,r0,i1) bri_u_p(CC_LE,i0,r0,i1) +# define beqr(i0,r0,r1) brr(CC_E,i0,r0,r1) +# define beqr_p(i0,r0,r1) brr_p(CC_E,i0,r0,r1) +# define beqi(i0,r0,i1) bri(CC_E,i0,r0,i1) +# define beqi_p(i0,r0,i1) bri_p(CC_E,i0,r0,i1) +# define bger(i0,r0,r1) brr(CC_HE,i0,r0,r1) +# define bger_p(i0,r0,r1) brr_p(CC_HE,i0,r0,r1) +# define bgei(i0,r0,i1) bri(CC_HE,i0,r0,i1) +# define bgei_p(i0,r0,i1) bri_p(CC_HE,i0,r0,i1) +# define bger_u(i0,r0,r1) brr_u(CC_HE,i0,r0,r1) +# define bger_u_p(i0,r0,r1) brr_u_p(CC_HE,i0,r0,r1) +# define bgei_u(i0,r0,i1) bri_u(CC_HE,i0,r0,i1) +# define bgei_u_p(i0,r0,i1) bri_u_p(CC_HE,i0,r0,i1) +# define bgtr(i0,r0,r1) brr(CC_H,i0,r0,r1) +# define bgtr_p(i0,r0,r1) brr_p(CC_H,i0,r0,r1) +# define bgti(i0,r0,i1) bri(CC_H,i0,r0,i1) +# define bgti_p(i0,r0,i1) bri_p(CC_H,i0,r0,i1) +# define bgtr_u(i0,r0,r1) brr_u(CC_H,i0,r0,r1) +# define bgtr_u_p(i0,r0,r1) brr_u_p(CC_H,i0,r0,r1) +# define bgti_u(i0,r0,i1) bri_u(CC_H,i0,r0,i1) +# define bgti_u_p(i0,r0,i1) bri_u_p(CC_H,i0,r0,i1) +# define bner(i0,r0,r1) brr(CC_NE,i0,r0,r1) +# define bner_p(i0,r0,r1) brr_p(CC_NE,i0,r0,r1) +# define bnei(i0,r0,i1) bri(CC_NE,i0,r0,i1) +# define bnei_p(i0,r0,i1) bri_p(CC_NE,i0,r0,i1) +# define boaddr(i0,r0,r1) baddr(CC_O,1,i0,r0,r1) +# define boaddr_p(i0,r0,r1) baddr_p(CC_O,1,i0,r0,r1) +# define boaddi(i0,r0,i1) baddi(CC_O,1,i0,r0,i1) +# define boaddi_p(i0,r0,i1) baddi_p(CC_O,1,i0,r0,i1) +# define boaddr_u(i0,r0,r1) baddr(CC_NLE,0,i0,r0,r1) +# define boaddr_u_p(i0,r0,r1) baddr_p(CC_NLE,0,i0,r0,r1) +# define boaddi_u(i0,r0,i1) baddi(CC_NLE,0,i0,r0,i1) +# define boaddi_u_p(i0,r0,i1) baddi_p(CC_NLE,0,i0,r0,i1) +# define bxaddr(i0,r0,r1) baddr(CC_NO,1,i0,r0,r1) +# define bxaddr_p(i0,r0,r1) baddr_p(CC_NO,1,i0,r0,r1) +# define bxaddi(i0,r0,i1) baddi(CC_NO,1,i0,r0,i1) +# define bxaddi_p(i0,r0,i1) baddi_p(CC_NO,1,i0,r0,i1) +# define bxaddr_u(i0,r0,r1) baddr(CC_LE,0,i0,r0,r1) +# define bxaddr_u_p(i0,r0,r1) baddr_p(CC_LE,0,i0,r0,r1) +# define bxaddi_u(i0,r0,i1) baddi(CC_LE,0,i0,r0,i1) +# define bxaddi_u_p(i0,r0,i1) baddi_p(CC_LE,0,i0,r0,i1) +# define bosubr(i0,r0,r1) bsubr(CC_O,1,i0,r0,r1) +# define bosubr_p(i0,r0,r1) bsubr_p(CC_O,1,i0,r0,r1) +# define bosubi(i0,r0,i1) bsubi(CC_O,1,i0,r0,i1) +# define bosubi_p(i0,r0,i1) bsubi_p(CC_O,1,i0,r0,i1) +# define bosubr_u(i0,r0,r1) bsubr(CC_L,0,i0,r0,r1) +# define bosubr_u_p(i0,r0,r1) bsubr_p(CC_L,0,i0,r0,r1) +# define bosubi_u(i0,r0,i1) bsubi(CC_L,0,i0,r0,i1) +# define bosubi_u_p(i0,r0,i1) bsubi_p(CC_L,0,i0,r0,i1) +# define bxsubr(i0,r0,r1) bsubr(CC_NO,1,i0,r0,r1) +# define bxsubr_p(i0,r0,r1) bsubr_p(CC_NO,1,i0,r0,r1) +# define bxsubi(i0,r0,i1) bsubi(CC_NO,1,i0,r0,i1) +# define bxsubi_p(i0,r0,i1) bsubi_p(CC_NO,1,i0,r0,i1) +# define bxsubr_u(i0,r0,r1) bsubr(CC_NL,0,i0,r0,r1) +# define bxsubr_u_p(i0,r0,r1) bsubr_p(CC_NL,0,i0,r0,r1) +# define bxsubi_u(i0,r0,i1) bsubi(CC_NL,0,i0,r0,i1) +# define bxsubi_u_p(i0,r0,i1) bsubi_p(CC_NL,0,i0,r0,i1) +# define bmsr(i0,r0,r1) bmxr(CC_NE,i0,r0,r1) +# define bmsr_p(i0,r0,r1) bmxr_p(CC_NE,i0,r0,r1) +# define bmsi(i0,r0,i1) bmxi(CC_NE,i0,r0,i1) +# define bmsi_p(i0,r0,i1) bmxi_p(CC_NE,i0,r0,i1) +# define bmcr(i0,r0,r1) bmxr(CC_E,i0,r0,r1) +# define bmcr_p(i0,r0,r1) bmxr_p(CC_E,i0,r0,r1) +# define bmci(i0,r0,i1) bmxi(CC_E,i0,r0,i1) +# define bmci_p(i0,r0,i1) bmxi_p(CC_E,i0,r0,i1) +# define jmpr(r0) BR(r0) +# define jmpi(i0) _jmpi(_jit,i0) +static void _jmpi(jit_state_t*,jit_word_t); +# define jmpi_p(i0) _jmpi_p(_jit,i0) +static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); +# define callr(r0) BALR(_R14_REGNO,r0) +# define calli(i0) _calli(_jit,i0) +static void _calli(jit_state_t*,jit_word_t); +# define calli_p(i0) _calli_p(_jit,i0) +static jit_word_t _calli_p(jit_state_t*,jit_word_t); +# define prolog(i0) _prolog(_jit,i0) +static void _prolog(jit_state_t*,jit_node_t*); +# define epilog(i0) _epilog(_jit,i0) +static void _epilog(jit_state_t*,jit_node_t*); +# define vastart(r0) _vastart(_jit, r0) +static void _vastart(jit_state_t*, int32_t); +# define vaarg(r0, r1) _vaarg(_jit, r0, r1) +static void _vaarg(jit_state_t*, int32_t, int32_t); +# define patch_at(instr,label) _patch_at(_jit,instr,label) +static void _patch_at(jit_state_t*,jit_word_t,jit_word_t); +#endif + +#if CODE +# define _us uint16_t +# define _ui uint32_t +static void +_E(jit_state_t *_jit, _ui Op) +{ + union { + struct { + _us op; + } b; + _us s; + } i0; + i0.b.op = Op; + assert(i0.b.op == Op); + is(i0.s); +} + +static void +_I(jit_state_t *_jit, _ui Op, _ui I) +{ + union { + struct { + _us op : 8; + _us i : 8; + } b; + _us s; + } i0; + i0.b.op = Op; + i0.b.i = I; + assert(i0.b.op == Op); + assert(i0.b.i == I); + is(i0.s); +} + +static void +_RR(jit_state_t *_jit, _ui Op, _ui R1, _ui R2) +{ + union { + struct { + _us op : 8; + _us r1 : 4; + _us r2 : 4; + } b; + _us s; + } i0; + i0.b.op = Op; + i0.b.r1 = R1; + i0.b.r2 = R2; + assert(i0.b.op == Op); + assert(i0.b.r1 == R1); + assert(i0.b.r2 == R2); + is(i0.s); +} + +static void +_RRE(jit_state_t *_jit, _ui Op, _ui R1, _ui R2) +{ + union { + struct { + _us op; + } b; + _us s; + } i0; + union { + struct { + _us _ : 8; + _us r1 : 4; + _us r2 : 4; + } b; + _us s; + } i1; + i0.b.op = Op; + i1.b._ = 0; + i1.b.r1 = R1; + i1.b.r2 = R2; + assert(i0.b.op == Op); + assert(i1.b.r1 == R1); + assert(i1.b.r2 == R2); + is(i0.s); + is(i1.s); +} + +static void +_RRF(jit_state_t *_jit, _ui Op, _ui R3, _ui M4, _ui R1, _ui R2) +{ + union { + struct { + _us op; + } b; + _us s; + } i0; + union { + struct { + _us r3 : 4; + _us m4 : 4; + _us r1 : 4; + _us r2 : 4; + } b; + _us s; + } i1; + i0.b.op = Op; + i1.b.r3 = R3; + i1.b.m4 = M4; + i1.b.r1 = R1; + i1.b.r2 = R2; + assert(i0.b.op == Op); + assert(i1.b.r3 == R3); + assert(i1.b.m4 == M4); + assert(i1.b.r1 == R1); + assert(i1.b.r2 == R2); + is(i0.s); + is(i1.s); +} + +static void +_RX(jit_state_t *_jit, _ui Op, _ui R1, _ui X2, _ui B2, _ui D2) +{ + union { + struct { + _us op : 8; + _us r1 : 4; + _us x2 : 4; + } b; + _us s; + } i0; + union { + struct { + _us b2 : 4; + _us d2 : 12; + } b; + _us s; + } i1; + i0.b.op = Op; + i0.b.r1 = R1; + i0.b.x2 = X2; + i1.b.b2 = B2; + i1.b.d2 = D2; + assert(i0.b.op == Op); + assert(i0.b.r1 == R1); + assert(i0.b.x2 == X2); + assert(i1.b.b2 == B2); + assert(i1.b.d2 == D2); + is(i0.s); + is(i1.s); +} + +static void +_RXE(jit_state_t *_jit, _ui Op, _ui R1, _ui X2, _ui B2, _ui D2, _ui Op2) +{ + union { + struct { + _us op : 8; + _us r1 : 4; + _us x2 : 4; + } b; + _us s; + } i0; + union { + struct { + _us b2 : 4; + _us d2 : 12; + } b; + _ui s; + } i1; + union { + struct { + _us _ : 8; + _us op : 8; + } b; + _us s; + } i2; + i2.b._ = 0; + i0.b.op = Op; + i0.b.r1 = R1; + i0.b.x2 = X2; + i1.b.b2 = B2; + i1.b.d2 = D2; + i2.b.op = Op2; + assert(i0.b.op == Op); + assert(i0.b.r1 == R1); + assert(i0.b.x2 == X2); + assert(i1.b.b2 == B2); + assert(i1.b.d2 == D2); + assert(i2.b.op == Op2); + is(i0.s); + is(i1.s); + is(i2.s); +} + +static void +_RXF(jit_state_t *_jit, _ui Op, _ui R3, _ui X2, _ui B2, _ui D2, _ui R1, _ui Op2) +{ + union { + struct { + _us op : 8; + _us r3 : 4; + _us x2 : 4; + } b; + _us s; + } i0; + union { + struct { + _us b2 : 4; + _us d2 : 12; + } b; + _us s; + } i1; + union { + struct { + _us r1 : 4; + _us _ : 4; + _us op : 8; + } b; + _us s; + } i2; + i2.b._ = 0; + i0.b.op = Op; + i0.b.r3 = R3; + i0.b.x2 = X2; + i1.b.b2 = B2; + i1.b.d2 = D2; + i2.b.r1 = R1; + i2.b.op = Op2; + assert(i0.b.op == Op); + assert(i0.b.r3 == R3); + assert(i0.b.x2 == X2); + assert(i1.b.b2 == B2); + assert(i1.b.d2 == D2); + assert(i2.b.r1 == R1); + assert(i2.b.op == Op2); + is(i0.s); + is(i1.s); + is(i2.s); +} + +static void +_RXY(jit_state_t *_jit, _ui Op, _ui R1, _ui X2, _ui B2, _ui D2, _ui Op2) +{ + union { + struct { + _us op : 8; + _us r1 : 4; + _us x2 : 4; + } b; + _us s; + } i0; + union { + struct { + _us b2 : 4; + _us dl : 12; + } b; + _us s; + } i1; + union { + struct { + _us dh : 8; + _us op : 8; + } b; + _us s; + } i2; + i0.s = i1.s = i2.s = 0; + i0.b.op = Op; + i0.b.r1 = R1; + i0.b.x2 = X2; + i1.b.b2 = B2; + i1.b.dl = D2 & 0xfff; + i2.b.dh = D2 >> 12; + i2.b.op = Op2; + assert(i0.b.op == Op); + assert(i0.b.r1 == R1); + assert(i0.b.x2 == X2); + assert(i1.b.b2 == B2); + assert(i2.b.dh == D2 >> 12); + assert(i2.b.op == Op2); + is(i0.s); + is(i1.s); + is(i2.s); +} + +static void +_RS(jit_state_t *_jit, _ui Op, _ui R1, _ui R3, _ui B2, _ui D2) +{ + union { + struct { + _us op : 8; + _us r1 : 4; + _us r3 : 4; + } b; + _us s; + } i0; + union { + struct { + _us b2 : 4; + _us d2 : 12; + } b; + _us s; + } i1; + i0.s = i1.s = 0; + i0.b.op = Op; + i0.b.r1 = R1; + i0.b.r3 = R3; + i1.b.b2 = B2; + i1.b.d2 = D2; + assert(i0.b.op == Op); + assert(i0.b.r1 == R1); + assert(i0.b.r3 == R3); + assert(i1.b.b2 == B2); + assert(i1.b.d2 == D2); + is(i0.s); + is(i1.s); +} + +static void +_RSL(jit_state_t *_jit, _ui Op, _ui L1, _ui B1, _ui D1, _ui Op2) +{ + union { + struct { + _us op : 8; + _us l1 : 4; + _us _ : 4; + } b; + _us s; + } i0; + union { + struct { + _us b1 : 4; + _us d1 : 12; + } b; + _us s; + } i1; + union { + struct { + _us _ : 8; + _us op : 8; + } b; + _us s; + } i2; + i0.b._ = 0; + i2.b._ = 0; + i0.b.op = Op; + i0.b.l1 = L1; + i1.b.b1 = B1; + i1.b.d1 = D1; + i2.b.op = Op2; + assert(i0.b.op == Op); + assert(i0.b.l1 == L1); + assert(i1.b.b1 == B1); + assert(i1.b.d1 == D1); + assert(i2.b.op == Op2); + is(i0.s); + is(i1.s); + is(i2.s); +} + +static void +_RSI(jit_state_t *_jit, _ui Op, _ui R1, _ui R3, _ui I2) +{ + union { + struct { + _us op : 8; + _us r1 : 4; + _us r3 : 4; + } b; + _us s; + } i0; + union { + struct { + _us i2; + } b; + _us s; + } i1; + i0.b.op = Op; + i0.b.r1 = R1; + i0.b.r3 = R3; + i1.b.i2 = I2; + assert(i0.b.op == Op); + assert(i0.b.r1 == R1); + assert(i0.b.r3 == R3); + assert(i1.b.i2 == I2); + is(i0.s); + is(i1.s); +} + +static void +_RIE(jit_state_t *_jit, _ui Op, _ui R1, _ui R3, _ui I2, _ui Op2) +{ + union { + struct { + _us op : 8; + _us r1 : 4; + _us r3 : 4; + } b; + _us s; + } i0; + union { + struct { + _us i2; + } b; + _us s; + } i1; + union { + struct { + _us _ : 8; + _us op : 8; + } b; + _us s; + } i2; + i2.b._ = 0; + i0.b.op = Op; + i0.b.r1 = R1; + i0.b.r3 = R3; + i1.b.i2 = I2; + i2.b.op = Op2; + assert(i0.b.op == Op); + assert(i0.b.r1 == R1); + assert(i0.b.r3 == R3); + assert(i1.b.i2 == I2); + assert(i2.b.op == Op2); + is(i0.s); + is(i1.s); + is(i2.s); +} + +static void +_RIL(jit_state_t *_jit, _ui Op, _ui R1, _ui Op2, _ui I2) +{ + union { + struct { + _us o1 : 8; + _us r1 : 4; + _us o2 : 4; + } b; + _us s; + } i0; + union { + struct { + _ui ih : 16; + _ui il : 16; + } b; + _ui i; + } i12; + i0.b.o1 = Op; + i0.b.r1 = R1; + i0.b.o2 = Op2; + i12.i = I2; + assert(i0.b.o1 == Op); + assert(i0.b.r1 == R1); + assert(i0.b.o2 == Op2); + is(i0.s); + is(i12.b.ih); + is(i12.b.il); +} + +static void +_SI(jit_state_t *_jit, _ui Op, _ui I2, _ui B1, _ui D1) +{ + union { + struct { + _us op : 8; + _us i2 : 8; + } b; + _us s; + } i0; + union { + struct { + _us b1 : 4; + _us d1 : 12; + } b; + _us s; + } i1; + i0.b.op = Op; + i0.b.i2 = I2; + i1.b.b1 = B1; + i1.b.d1 = D1; + assert(i0.b.op == Op); + assert(i0.b.i2 == I2); + assert(i1.b.b1 == B1); + assert(i1.b.d1 == D1); + is(i0.s); + is(i1.s); +} + +static void +_SIY(jit_state_t *_jit, _ui Op, _ui I2, _ui B1, _ui D1, _ui Op2) +{ + union { + struct { + _us op : 8; + _us i2 : 8; + } b; + _us s; + } i0; + union { + struct { + _us b1 : 4; + _us dl : 12; + } b; + _us s; + } i1; + union { + struct { + _us dh : 8; + _us op : 8; + } b; + _us s; + } i2; + i0.b.op = Op; + i0.b.i2 = I2; + i1.b.b1 = B1; + i1.b.dl = D1 & 0xfff; + i2.b.dh = D1 >> 8; + i2.b.op = Op2; + assert(i0.b.op == Op); + assert(i0.b.i2 == I2); + assert(i1.b.b1 == B1); + assert(i2.b.dh == D1 >> 8); + assert(i2.b.op == Op2); + is(i0.s); + is(i1.s); + is(i2.s); +} + +static void +_S(jit_state_t *_jit, _ui Op, _ui B2, _ui D2) +{ + union { + struct { + _us op; + } b; + _us s; + } i0; + union { + struct { + _us b2 : 4; + _us d2 : 12; + } b; + _us s; + } i1; + i0.b.op = Op; + i1.b.b2 = B2; + i1.b.d2 = D2; + assert(i0.b.op == Op); + assert(i1.b.b2 == B2); + assert(i1.b.d2 == D2); + is(i0.s); + is(i1.s); +} + +static void +_SS(jit_state_t *_jit, _ui Op, _ui LL, _ui LH, _ui B1, _ui D1, _ui B2, _ui D2) +{ + union { + struct { + _us op : 8; + _us ll : 4; + _us lh : 4; + } b; + _us s; + } i0; + union { + struct { + _us b1 : 4; + _us d1 : 12; + } b; + _us s; + } i1; + union { + struct { + _us b2 : 4; + _us d2 : 12; + } b; + _us s; + } i2; + i0.b.op = Op; + i0.b.ll = LL; + i0.b.lh = LH; + i1.b.b1 = B1; + i1.b.d1 = D1; + i2.b.b2 = B2; + i2.b.d2 = D2; + assert(i0.b.op == Op); + assert(i0.b.ll == LL); + assert(i0.b.lh == LH); + assert(i1.b.b1 == B1); + assert(i1.b.d1 == D1); + assert(i2.b.b2 == B2); + assert(i2.b.d2 == D2); + is(i0.s); + is(i1.s); + is(i2.s); +} + +static void +_SSE(jit_state_t *_jit, _ui Op, _ui B1, _ui D1, _ui B2, _ui D2) +{ + union { + struct { + _us op; + } b; + _us s; + } i0; + union { + struct { + _us b1 : 4; + _us d1 : 12; + } b; + _us s; + } i1; + union { + struct { + _us b2 : 4; + _us d2 : 12; + } b; + _us s; + } i2; + i0.b.op = Op; + i1.b.b1 = B1; + i1.b.d1 = D1; + i2.b.b2 = B2; + i2.b.d2 = D2; + assert(i0.b.op == Op); + assert(i1.b.b1 == B1); + assert(i1.b.d1 == D1); + assert(i2.b.b2 == B2); + assert(i2.b.d2 == D2); + is(i0.s); + is(i1.s); + is(i2.s); +} +# undef _us +# undef _ui + +static void +_nop(jit_state_t *_jit, int32_t c) +{ + assert(c >= 0 && !(c & 1)); + while (c) { + NOPR(_R7_REGNO); + c -= 2; + } +} + +static int32_t +_xdivr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + int32_t regno; + regno = jit_get_reg_pair(); +#if __WORDSIZE == 32 + movr(rn(regno), r0); + SRDA(rn(regno), 32, 0); +#else + movr(rn(regno) + 1, r0); +#endif + DIVREM_(rn(regno), r1); + jit_unget_reg_pair(regno); + return (regno); +} + +static int32_t +_xdivr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + int32_t regno; + regno = jit_get_reg_pair(); +#if __WORDSIZE == 32 + movr(rn(regno), r0); + SRDL(rn(regno), 32, 0); +#else + movr(rn(regno) + 1, r0); +#endif + movi(rn(regno), 0); + DIVREMU_(rn(regno), r1); + jit_unget_reg_pair(regno); + return (regno); +} + +static int32_t +_xdivi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + int32_t imm, regno; + regno = jit_get_reg_pair(); + imm = jit_get_reg(jit_class_gpr); +#if __WORDSIZE == 32 + movr(rn(regno), r0); + SRDA(rn(regno), 32, 0); +#else + movr(rn(regno) + 1, r0); +#endif + movi(rn(imm), i0); + DIVREM_(rn(regno), rn(imm)); + jit_unget_reg(imm); + jit_unget_reg_pair(regno); + return (regno); +} + +static int32_t +_xdivi_u(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + /* cannot overlap because operand is 128-bit */ + int32_t imm, regno; + regno = jit_get_reg_pair(); + imm = jit_get_reg(jit_class_gpr); +#if __WORDSIZE == 32 + movr(rn(regno), r0); + SRDL(rn(regno), 32, 0); +#else + movr(rn(regno) + 1, r0); +#endif + movi(rn(regno), 0); + movi(rn(imm), i0); + DIVREMU_(rn(regno), rn(imm)); + jit_unget_reg(imm); + jit_unget_reg_pair(regno); + return (regno); +} + +static void +_crr(jit_state_t *_jit, int32_t cc, + int32_t r0, int32_t r1, int32_t r2) +{ + jit_word_t w; + int32_t reg, rg; + if (r0 == r1 || r0 == r2) { + reg = jit_get_reg(jit_class_gpr); + rg = rn(reg); + } + else + rg = r0; + movi(rg, 1); + CMP_(r1, r2); + w = _jit->pc.w; + BRC(cc, 0); + movi(rg, 0); + patch_at(w, _jit->pc.w); + if (r0 == r1 || r0 == r2) { + movr(r0, rg); + jit_unget_reg(reg); + } +} + +static void +_cri(jit_state_t *_jit, int32_t cc, + int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + crr(cc, r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_crr_u(jit_state_t *_jit, int32_t cc, + int32_t r0, int32_t r1, int32_t r2) +{ + jit_word_t w; + int32_t reg, rg; + if (r0 == r1 || r0 == r2) { + reg = jit_get_reg(jit_class_gpr); + rg = rn(reg); + } + else + rg = r0; + movi(rg, 1); + CMPU_(r1, r2); + w = _jit->pc.w; + BRC(cc, 0); + movi(rg, 0); + patch_at(w, _jit->pc.w); + if (r0 == r1 || r0 == r2) { + movr(r0, rg); + jit_unget_reg(reg); + } +} + +static void +_cri_u(jit_state_t *_jit, int32_t cc, + int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + crr_u(cc, r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_brr(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, int32_t r1) +{ + jit_word_t d; + CMP_(r0, r1); + d = (i0 - _jit->pc.w) >> 1; + if (s16_p(d)) + BRC(cc, x16(d)); + else { + assert(s32_p(d)); + BRCL(cc, d); + } +} + +static jit_word_t +_brr_p(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, int32_t r1) +{ + jit_word_t w; + CMP_(r0, r1); + w = _jit->pc.w; + BRCL(cc, 0); + return (w); +} + +static void +_bri(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, jit_word_t i1) +{ + int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(reg), i1); + brr(cc, i0, r0, rn(reg)); + jit_unget_reg(reg); +} + +static jit_word_t +_bri_p(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, jit_word_t i1) +{ + jit_word_t w; + int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(reg), i1); + w = brr_p(cc, i0, r0, rn(reg)); + jit_unget_reg(reg); + return (w); +} + +static void +_brr_u(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, int32_t r1) +{ + jit_word_t d; + CMPU_(r0, r1); + d = (i0 - _jit->pc.w) >> 1; + if (s16_p(d)) + BRC(cc, x16(d)); + else { + assert(s32_p(d)); + BRCL(cc, d); + } +} + +static jit_word_t +_brr_u_p(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, int32_t r1) +{ + jit_word_t w; + CMPU_(r0, r1); + w = _jit->pc.w; + BRCL(cc, 0); + return (w); +} + +static void +_bri_u(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, jit_word_t i1) +{ + int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(reg), i1); + brr_u(cc, i0, r0, rn(reg)); + jit_unget_reg(reg); +} + +static jit_word_t +_bri_u_p(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, jit_word_t i1) +{ + jit_word_t w; + int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(reg), i1); + w = brr_u_p(cc, i0, r0, rn(reg)); + jit_unget_reg(reg); + return (w); +} + +static void +_baddr(jit_state_t *_jit, int32_t c, jit_bool_t s, + jit_word_t i0, int32_t r0, int32_t r1) +{ + jit_word_t d; + if (s) addr(r0, r0, r1); + else addcr(r0, r0, r1); + d = (i0 - _jit->pc.w) >> 1; + if (s16_p(d)) + BRC(c, x16(d)); + else { + assert(s32_p(d)); + BRCL(c, d); + } +} + +static void +_baddi(jit_state_t *_jit, int32_t c, jit_bool_t s, + jit_word_t i0, int32_t r0, jit_word_t i1) +{ + int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(reg), i1); + baddr(c, s, i0, r0, rn(reg)); + jit_unget_reg(reg); +} + +static jit_word_t +_baddr_p(jit_state_t *_jit, int32_t c, jit_bool_t s, + jit_word_t i0, int32_t r0, int32_t r1) +{ + jit_word_t d, w; + if (s) addr(r0, r0, r1); + else addcr(r0, r0, r1); + d = (i0 - _jit->pc.w) >> 1; + w = _jit->pc.w; + BRCL(c, d); + return (w); +} + +static jit_word_t +_baddi_p(jit_state_t *_jit, int32_t c, jit_bool_t s, + jit_word_t i0, int32_t r0, jit_word_t i1) +{ + jit_word_t w; + int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(reg), i1); + w = baddr_p(c, s, i0, r0, rn(reg)); + jit_unget_reg(reg); + return (w); +} + +static void +_bsubr(jit_state_t *_jit, int32_t c, jit_bool_t s, + jit_word_t i0, int32_t r0, int32_t r1) +{ + jit_word_t d; + if (s) subr(r0, r0, r1); + else subcr(r0, r0, r1); + d = (i0 - _jit->pc.w) >> 1; + if (s16_p(d)) + BRC(c, x16(d)); + else { + assert(s32_p(d)); + BRCL(c, d); + } +} + +static void +_bsubi(jit_state_t *_jit, int32_t c, jit_bool_t s, + jit_word_t i0, int32_t r0, jit_word_t i1) +{ + int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(reg), i1); + bsubr(c, s, i0, r0, rn(reg)); + jit_unget_reg(reg); +} + +static jit_word_t +_bsubr_p(jit_state_t *_jit, int32_t c, jit_bool_t s, + jit_word_t i0, int32_t r0, int32_t r1) +{ + jit_word_t d, w; + if (s) subr(r0, r0, r1); + else subcr(r0, r0, r1); + d = (i0 - _jit->pc.w) >> 1; + w = _jit->pc.w; + BRCL(c, d); + return (w); +} + +static jit_word_t +_bsubi_p(jit_state_t *_jit, int32_t c, jit_bool_t s, + jit_word_t i0, int32_t r0, jit_word_t i1) +{ + jit_word_t w; + int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi(rn(reg), i1); + w = bsubr_p(c, s, i0, r0, rn(reg)); + jit_unget_reg(reg); + return (w); +} + +static void +_bmxr(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, int32_t r1) +{ + jit_word_t d; + int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), r0); + andr(rn(reg), rn(reg), r1); + TEST_(rn(reg), rn(reg)); + jit_unget_reg(reg); + d = (i0 - _jit->pc.w) >> 1; + if (s16_p(d)) + BRC(cc, x16(d)); + else { + assert(s32_p(d)); + BRCL(cc, d); + } +} + +static jit_word_t +_bmxr_p(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, int32_t r1) +{ + jit_word_t w; + int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), r0); + andr(rn(reg), rn(reg), r1); + TEST_(rn(reg), rn(reg)); + jit_unget_reg(reg); + w = _jit->pc.w; + BRCL(cc, 0); + return (w); +} + +static void +_bmxi(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, jit_word_t i1) +{ + jit_word_t d; + int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i1); + andr(rn(reg), rn(reg), r0); + TEST_(rn(reg), rn(reg)); + jit_unget_reg(reg); + d = (i0 - _jit->pc.w) >> 1; + if (s16_p(d)) + BRC(cc, x16(d)); + else { + assert(s32_p(d)); + BRCL(cc, d); + } +} + +static jit_word_t +_bmxi_p(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, jit_word_t i1) +{ + jit_word_t w; + int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i1); + andr(rn(reg), rn(reg), r0); + TEST_(rn(reg), rn(reg)); + jit_unget_reg(reg); + w = _jit->pc.w; + BRCL(cc, 0); + return (w); +} + +static void +_movr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ +#if __WORDSIZE == 32 + if (r0 != r1) + LR(r0, r1); +#else + if (r0 != r1) + LGR(r0, r1); +#endif +} + +static void +_movi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + jit_word_t d; +#if __WORDSIZE == 64 + int32_t bits; +#endif + d = (i0 - _jit->pc.w) >> 1; + if (s16_p(i0)) { +#if __WORDSIZE == 32 + LHI(r0, x16(i0)); +#else + LGHI(r0, x16(i0)); +#endif + } + /* easy way of loading a large amount of 32 bit values and + * usually address of constants */ + else if (!(i0 & 1) && +#if __WORDSIZE == 32 + i0 > 0 +#else + s32_p(d) +#endif + ) + LARL(r0, d); + else { +#if __WORDSIZE == 32 + LHI(r0, x16(i0)); + IILH(r0, x16((jit_uword_t)i0 >> 16)); +#else + bits = 0; + if (i0 & 0xffffL) bits |= 1; + if (i0 & 0xffff0000L) bits |= 2; + if (i0 & 0xffff00000000L) bits |= 4; + if (i0 & 0xffff000000000000L) bits |= 8; + if (bits != 15) LGHI(r0, 0); + if (bits & 1) IILL(r0, x16(i0)); + if (bits & 2) IILH(r0, x16((jit_uword_t)i0 >> 16)); + if (bits & 4) IIHL(r0, x16((jit_uword_t)i0 >> 32)); + if (bits & 8) IIHH(r0, x16((jit_uword_t)i0 >> 48)); +#endif + } +} + +static jit_word_t +_movi_p(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + jit_word_t w; + w = _jit->pc.w; +#if __WORDSIZE == 32 + LHI(r0, x16(i0)); +#else + IILL(r0, x16(i0)); +#endif + IILH(r0, x16((jit_uword_t)i0 >> 16)); +#if __WORDSIZE == 64 + IIHL(r0, x16((jit_uword_t)i0 >> 32)); + IIHH(r0, x16((jit_uword_t)i0 >> 48)); +#endif + return (w); +} + +static void +_addr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) + ADD_(r0, r1); + else { + movr(r0, r1); + ADD_(r0, r2); + } +} + +static void +_addi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + if (r0 == r1 && s16_p(i0)) + ADDI_(r0, x16(i0)); +#if __WORDSIZE == 64 + else if (s20_p(i0)) + LAY(r0, x20(i0), 0, r1); +#endif + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + addr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_addcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) + ADDC_(r0, r1); + else { + movr(r0, r1); + ADDC_(r0, r2); + } +} + +static void +_addci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + addcr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_addxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) + ADDX_(r0, r1); + else { + movr(r0, r1); + ADDX_(r0, r2); + } +} + +static void +_addxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + addxr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_subr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t reg; + if (r0 == r2) { + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), r2); + movr(r0, r1); + SUB_(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + movr(r0, r1); + SUB_(r0, r2); + } +} + +static void +_subi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + if (r0 == r1 && s16_p(-i0)) + ADDI_(r0, x16(-i0)); +#if __WORDSIZE == 64 + else if (s20_p(-i0)) + LAY(r0, x20(-i0), 0, r1); +#endif + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + subr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_subcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t reg; + if (r0 == r2) { + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), r2); + movr(r0, r1); + SUBC_(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + movr(r0, r1); + SUBC_(r0, r2); + } +} + +static void +_subci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + subcr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_subxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t reg; + if (r0 == r2) { + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), r2); + movr(r0, r1); + SUBX_(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + movr(r0, r1); + SUBX_(r0, r2); + } +} + +static void +_subxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + subxr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_rsbi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + subi(r0, r1, i0); + negr(r0, r0); +} + +static void +_mulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) + MUL_(r0, r1); + else { + movr(r0, r1); + MUL_(r0, r2); + } +} + +static void +_muli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + if (s16_p(i0)) { + movr(r0, r1); + MULI_(r0, x16(i0)); + } + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + mulr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_qmulr(jit_state_t *_jit, + int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + int32_t reg; + /* The only invalid condition is r0 == r1 */ + int32_t t2, t3, s2, s3; + if (r2 == r0 || r2 == r1) { + s2 = jit_get_reg(jit_class_gpr); + t2 = rn(s2); + movr(t2, r2); + } + else + t2 = r2; + if (r3 == r0 || r3 == r1) { + s3 = jit_get_reg(jit_class_gpr); + t3 = rn(s3); + movr(t3, r3); + } + else + t3 = r3; + qmulr_u(r0, r1, r2, r3); + reg = jit_get_reg(jit_class_gpr); + /**/ + rshi(rn(reg), t2, 63); + mulr(rn(reg), rn(reg), t3); + addr(r1, r1, rn(reg)); + /**/ + rshi(rn(reg), t3, 63); + mulr(rn(reg), rn(reg), t2); + addr(r1, r1, rn(reg)); + jit_unget_reg(reg); + if (t2 != r2) + jit_unget_reg(s2); + if (t3 != r3) + jit_unget_reg(s3); +} + +static void +_qmuli(jit_state_t *_jit, + int32_t r0, int32_t r1, int32_t r2, jit_word_t i0) +{ + int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + qmulr(r0, r1, r2, rn(reg)); + jit_unget_reg(reg); +} + +static void +_qmulr_u(jit_state_t *_jit, + int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + int32_t regno; + regno = jit_get_reg_pair(); + movr(rn(regno) + 1, r2); + MULU_(rn(regno), r3); + movr(r0, rn(regno) + 1); + movr(r1, rn(regno)); + jit_unget_reg_pair(regno); +} + +static void +_qmuli_u(jit_state_t *_jit, + int32_t r0, int32_t r1, int32_t r2, jit_word_t i0) +{ + int32_t regno; + regno = jit_get_reg_pair(); + movr(rn(regno) + 1, r2); + movi(rn(regno), i0); + MULU_(rn(regno), rn(regno)); + movr(r0, rn(regno) + 1); + movr(r1, rn(regno)); + jit_unget_reg_pair(regno); +} + +static void +_divr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t regno; + regno = xdivr(r1, r2); + movr(r0, rn(regno) + 1); +} + +static void +_divi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t regno; + regno = xdivi(r1, i0); + movr(r0, rn(regno) + 1); +} + +static void +_divr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t regno; + regno = xdivr_u(r1, r2); + movr(r0, rn(regno) + 1); +} + +static void +_divi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t regno; + regno = xdivi_u(r1, i0); + movr(r0, rn(regno) + 1); +} + +static void +_remr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t regno; + regno = xdivr(r1, r2); + movr(r0, rn(regno)); +} + +static void +_remi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t regno; + regno = xdivi(r1, i0); + movr(r0, rn(regno)); +} + +static void +_remr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t regno; + regno = xdivr_u(r1, r2); + movr(r0, rn(regno)); +} + +static void +_remi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t regno; + regno = xdivi_u(r1, i0); + movr(r0, rn(regno)); +} + +static void +_qdivr(jit_state_t *_jit, + int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + int32_t regno; + regno = xdivr(r2, r3); + movr(r0, rn(regno) + 1); + movr(r1, rn(regno)); +} + +static void +_qdivi(jit_state_t *_jit, + int32_t r0, int32_t r1, int32_t r2, jit_word_t i0) +{ + int32_t regno; + regno = xdivi(r2, i0); + movr(r0, rn(regno) + 1); + movr(r1, rn(regno)); +} + +static void +_qdivr_u(jit_state_t *_jit, + int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + int32_t regno; + regno = xdivr_u(r2, r3); + movr(r0, rn(regno) + 1); + movr(r1, rn(regno)); +} + +static void +_qdivi_u(jit_state_t *_jit, + int32_t r0, int32_t r1, int32_t r2, jit_word_t i0) +{ + int32_t regno; + regno = xdivi_u(r2, i0); + movr(r0, rn(regno) + 1); + movr(r1, rn(regno)); +} + +# if __WORDSIZE == 32 +static void +_lshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t reg; + if (r0 == r2) { + reg = jit_get_reg_but_zero(0); + movr(rn(reg), r2); + movr(r0, r1); + SLL(r0, 0, rn(reg)); + jit_unget_reg_but_zero(reg); + } + else { + movr(r0, r1); + SLL(r0, 0, r2); + } +} +#endif + +static void +_lshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + lshr(r0, r1, rn(reg)); + jit_unget_reg_but_zero(reg); +} + +# if __WORDSIZE == 32 +static void +_rshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t reg; + if (r0 == r2) { + reg = jit_get_reg_but_zero(0); + movr(rn(reg), r2); + movr(r0, r1); + SRA(r0, 0, rn(reg)); + jit_unget_reg_but_zero(reg); + } + else { + movr(r0, r1); + SRA(r0, 0, r2); + } +} +#endif + +static void +_rshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + rshr(r0, r1, rn(reg)); + jit_unget_reg_but_zero(reg); +} + +# if __WORDSIZE == 32 +static void +_rshr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t reg; + if (r0 == r2) { + reg = jit_get_reg_but_zero(0); + movr(rn(reg), r2); + movr(r0, r1); + SRL(r0, 0, rn(reg)); + jit_unget_reg_but_zero(reg); + } + else { + movr(r0, r1); + SRL(r0, 0, r2); + } +} +#endif + +static void +_rshi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + rshr_u(r0, r1, rn(reg)); + jit_unget_reg_but_zero(reg); +} + +static void +_comr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), -1); + movr(r0, r1); + XOR_(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_andr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) + AND_(r0, r1); + else { + movr(r0, r1); + AND_(r0, r2); + } +} + +static void +_andi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + movr(r0, r1); + NILL(r0, x16(i0)); + NILH(r0, x16((jit_uword_t)i0 >> 16)); +#if __WORDSIZE == 64 + NIHL(r0, x16((jit_uword_t)i0 >> 32)); + NIHH(r0, x16((jit_uword_t)i0 >> 48)); +#endif +} + +static void +_orr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) + OR_(r0, r1); + else { + movr(r0, r1); + OR_(r0, r2); + } +} + +static void +_ori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + movr(r0, r1); + OILL(r0, x16(i0)); + OILH(r0, x16((jit_uword_t)i0 >> 16)); +#if __WORDSIZE == 64 + OIHL(r0, x16((jit_uword_t)i0 >> 32)); + OIHH(r0, x16((jit_uword_t)i0 >> 48)); +#endif +} + +static void +_xorr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) + XOR_(r0, r1); + else { + movr(r0, r1); + XOR_(r0, r2); + } +} + +static void +_xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + xorr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_ldi_c(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + movi(r0, i0); + ldr_c(r0, r0); +} + +static void +_ldxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) { + addr(r0, r0, r1); + ldr_c(r0, r0); + } + else { + movr(r0, r1); + addr(r0, r0, r2); + ldr_c(r0, r0); + } +} + +static void +_ldxi_c(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + if (s20_p(i0)) { +#if __WORDSIZE == 32 + LB(r0, x20(i0), 0, r1); +#else + LGB(r0, x20(i0), 0, r1); +#endif + } + else if (r0 != r1) { + movi(r0, i0); + addr(r0, r0, r1); + ldr_c(r0, r0); + } + else { + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + addr(rn(reg), rn(reg), r1); + ldr_c(r0, rn(reg)); + jit_unget_reg_but_zero(reg); + } +} + +static void +_ldi_uc(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + movi(r0, i0); + ldr_uc(r0, r0); +} + +static void +_ldxr_uc(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) { + addr(r0, r0, r1); + ldr_uc(r0, r0); + } + else { + movr(r0, r1); + addr(r0, r0, r2); + ldr_uc(r0, r0); + } +} + +static void +_ldxi_uc(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + if (s20_p(i0)) + LLGC(r0, x20(i0), 0, r1); + else if (r0 != r1) { + movi(r0, i0); + addr(r0, r0, r1); + ldr_uc(r0, r0); + } + else { + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + addr(rn(reg), rn(reg), r1); + ldr_uc(r0, rn(reg)); + jit_unget_reg_but_zero(reg); + } +} + +static void +_ldi_s(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + movi(r0, i0); + ldr_s(r0, r0); +} + +static void +_ldxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) { + addr(r0, r0, r1); + ldr_s(r0, r0); + } + else { + movr(r0, r1); + addr(r0, r0, r2); + ldr_s(r0, r0); + } +} + +static void +_ldxi_s(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; +#if __WORDSIZE == 32 + if (u12_p(i0)) + LH(r0, i0, 0, r1); + else +#endif + if (s20_p(i0)) { +#if __WORDSIZE == 32 + LHY(r0, x20(i0), 0, r1); +#else + LGH(r0, x20(i0), 0, r1); +#endif + } + else if (r0 != r1) { + movi(r0, i0); + addr(r0, r0, r1); + ldr_s(r0, r0); + } + else { + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + addr(rn(reg), rn(reg), r1); + ldr_s(r0, rn(reg)); + jit_unget_reg_but_zero(reg); + } +} + +static void +_ldi_us(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + movi(r0, i0); + ldr_us(r0, r0); +} + +static void +_ldxr_us(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) { + addr(r0, r0, r1); + ldr_us(r0, r0); + } + else { + movr(r0, r1); + addr(r0, r0, r2); + ldr_us(r0, r0); + } +} + +static void +_ldxi_us(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + if (s20_p(i0)) + LLGH(r0, x20(i0), 0, r1); + else if (r0 != r1) { + movi(r0, i0); + addr(r0, r0, r1); + ldr_us(r0, r0); + } + else { + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + addr(rn(reg), rn(reg), r1); + ldr_us(r0, rn(reg)); + jit_unget_reg_but_zero(reg); + } +} + +static void +_ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + movi(r0, i0); + ldr_i(r0, r0); +} + +static void +_ldxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) { + addr(r0, r0, r1); + ldr_i(r0, r0); + } + else { + movr(r0, r1); + addr(r0, r0, r2); + ldr_i(r0, r0); + } +} + +static void +_ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + if (s20_p(i0)) + LGF(r0, x20(i0), 0, r1); + else if (r0 != r1) { + movi(r0, i0); + addr(r0, r0, r1); + ldr_i(r0, r0); + } + else { + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + addr(rn(reg), rn(reg), r1); + ldr_i(r0, rn(reg)); + jit_unget_reg_but_zero(reg); + } +} + +#if __WORDSIZE == 64 +static void +_ldi_ui(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + movi(r0, i0); + ldr_ui(r0, r0); +} + +static void +_ldxr_ui(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) { + addr(r0, r0, r1); + ldr_ui(r0, r0); + } + else { + movr(r0, r1); + addr(r0, r0, r2); + ldr_ui(r0, r0); + } +} + +static void +_ldxi_ui(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + if (s20_p(i0)) + LLGF(r0, x20(i0), 0, r1); + else if (r0 != r1) { + movi(r0, i0); + addr(r0, r0, r1); + ldr_ui(r0, r0); + } + else { + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + addr(rn(reg), rn(reg), r1); + ldr_ui(r0, rn(reg)); + jit_unget_reg_but_zero(reg); + } +} + +static void +_ldi_l(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + movi(r0, i0); + ldr_l(r0, r0); +} + +static void +_ldxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) { + addr(r0, r0, r1); + ldr_l(r0, r0); + } + else { + movr(r0, r1); + addr(r0, r0, r2); + ldr_l(r0, r0); + } +} + +static void +_ldxi_l(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + if (s20_p(i0)) + LG(r0, x20(i0), 0, r1); + else if (r0 != r1) { + movi(r0, i0); + addr(r0, r0, r1); + ldr_l(r0, r0); + } + else { + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + addr(rn(reg), rn(reg), r1); + ldr_l(r0, rn(reg)); + jit_unget_reg_but_zero(reg); + } +} +#endif + +static void +_sti_c(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + int32_t reg; + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + str_c(rn(reg), r0); + jit_unget_reg_but_zero(reg); +} + +static void +_stxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t reg; + reg = jit_get_reg_but_zero(0); + movr(rn(reg), r0); + addr(rn(reg), rn(reg), r1); + str_c(rn(reg), r2); + jit_unget_reg_but_zero(reg); +} + +static void +_stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + int32_t reg; + if (u12_p(i0)) + STC(r1, i0, 0, r0); + else if (s20_p(i0)) + STCY(r1, x20(i0), 0, r0); + else { + reg = jit_get_reg_but_zero(0); + addi(rn(reg), r0, i0); + str_c(rn(reg), r1); + jit_unget_reg_but_zero(reg); + } +} + +static void +_sti_s(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + int32_t reg; + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + str_s(rn(reg), r0); + jit_unget_reg_but_zero(reg); +} + +static void +_stxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t reg; + reg = jit_get_reg_but_zero(0); + movr(rn(reg), r0); + addr(rn(reg), rn(reg), r1); + str_s(rn(reg), r2); + jit_unget_reg_but_zero(reg); +} + +static void +_stxi_s(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + int32_t reg; + if (u12_p(i0)) + STH(r1, i0, 0, r0); + else if (s20_p(i0)) + STHY(r1, x20(i0), 0, r0); + else { + reg = jit_get_reg_but_zero(0); + addi(rn(reg), r0, i0); + str_s(rn(reg), r1); + jit_unget_reg_but_zero(reg); + } +} + +static void +_sti_i(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + int32_t reg; + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + str_i(rn(reg), r0); + jit_unget_reg_but_zero(reg); +} + +static void +_stxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t reg; + reg = jit_get_reg_but_zero(0); + movr(rn(reg), r0); + addr(rn(reg), rn(reg), r1); + str_i(rn(reg), r2); + jit_unget_reg_but_zero(reg); +} + +static void +_stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + int32_t reg; + if (u12_p(i0)) + ST(r1, i0, 0, r0); + else if (s20_p(i0)) + STY(r1, x20(i0), 0, r0); + else { + reg = jit_get_reg_but_zero(0); + addi(rn(reg), r0, i0); + str_i(rn(reg), r1); + jit_unget_reg_but_zero(reg); + } +} + +#if __WORDSIZE == 64 +static void +_sti_l(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + int32_t reg; + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + str_l(rn(reg), r0); + jit_unget_reg_but_zero(reg); +} + +static void +_stxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t reg; + reg = jit_get_reg_but_zero(0); + movr(rn(reg), r0); + addr(rn(reg), rn(reg), r1); + str_l(rn(reg), r2); + jit_unget_reg_but_zero(reg); +} + +static void +_stxi_l(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + int32_t reg; + if (s20_p(i0)) + STG(r1, x20(i0), 0, r0); + else { + reg = jit_get_reg_but_zero(0); + addi(rn(reg), r0, i0); + str_l(rn(reg), r1); + jit_unget_reg_but_zero(reg); + } +} +#endif + +static void +_jmpi(jit_state_t *_jit, jit_word_t i0) +{ + jit_word_t d; + int32_t reg; + d = (i0 - _jit->pc.w) >> 1; + if (s16_p(d)) + J(x16(d)); + else if (s32_p(d)) + BRL(d); + else { + reg = jit_get_reg_but_zero(jit_class_nospill); + movi(rn(reg), i0); + jmpr(rn(reg)); + jit_unget_reg_but_zero(reg); + } +} + +static jit_word_t +_jmpi_p(jit_state_t *_jit, jit_word_t i0) +{ + jit_word_t w; + int32_t reg; + reg = jit_get_reg_but_zero(jit_class_nospill); + w = movi_p(rn(reg), i0); + jmpr(rn(reg)); + jit_unget_reg_but_zero(reg); + return (w); +} + +static void +_calli(jit_state_t *_jit, jit_word_t i0) +{ + jit_word_t d; + int32_t reg; + d = (i0 - _jit->pc.w) >> 1; + if (s32_p(d)) + BRASL(_R14_REGNO, d); + else { + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + callr(rn(reg)); + jit_unget_reg_but_zero(reg); + } +} + +static jit_word_t +_calli_p(jit_state_t *_jit, jit_word_t i0) +{ + jit_word_t w; + int32_t reg; + reg = jit_get_reg_but_zero(0); + w = movi_p(rn(reg), i0); + callr(rn(reg)); + jit_unget_reg_but_zero(reg); + return (w); +} + +static int32_t gprs[] = { + _R2, _R3, _R4, _R5, + _R6, _R7, _R8, _R9, _R10, _R11, _R12, _R13 +}; + +static void +_prolog(jit_state_t *_jit, jit_node_t *i0) +{ + int32_t regno, offset; + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -8; + _jitc->function->stack = ((_jitc->function->self.alen - + /* align stack at 8 bytes */ + _jitc->function->self.aoff) + 7) & -8; + /* *IFF* a non variadic function, + * Lightning does not reserve stack space for spilling arguments + * in registers. + * S390x, as per gcc, has 8 stack slots for spilling arguments, + * (%r6 is callee save) and uses an alloca like approach to save + * callee save fpr registers. + * Since argument registers are not saved in any lightning port, + * use the 8 slots to spill any modified fpr register, and still + * use the same stack frame logic as gcc. + * Save at least %r13 to %r15, as %r13 is used as frame pointer. + * *IFF* a variadic function, a "standard" stack frame, with + * fpr registers saved in an alloca'ed area, is used. + */ + if ((_jitc->function->self.call & jit_call_varargs) && + jit_arg_reg_p(_jitc->function->vagp)) + regno = _jitc->function->vagp; + else { + for (regno = 4; regno < jit_size(gprs) - 1; regno++) { + if (jit_regset_tstbit(&_jitc->function->regset, gprs[regno])) + break; + } + } +#if __WORDSIZE == 32 +# define FP_OFFSET 64 + if (_jitc->function->self.call & jit_call_varargs) + offset = regno * 4 + 8; + else + offset = (regno - 4) * 4 + 32; + STM(rn(gprs[regno]), _R15_REGNO, x20(offset), _R15_REGNO); +#else +# define FP_OFFSET 128 + if (_jitc->function->self.call & jit_call_varargs) + offset = regno * 8 + 16; + else + offset = (regno - 4) * 8 + 48; + STMG(rn(gprs[regno]), _R15_REGNO, x20(offset), _R15_REGNO); +#endif + +#define SPILL(R, O) \ + do { \ + if (jit_regset_tstbit(&_jitc->function->regset, R)) \ + stxi_d(O, _R15_REGNO, rn(R)); \ + } while (0) + if (_jitc->function->self.call & jit_call_varargs) { + for (regno = _jitc->function->vafp; jit_arg_f_reg_p(regno); ++regno) + stxi_d(FP_OFFSET + regno * 8, _R15_REGNO, rn(_F0 - regno)); + SPILL(_F8, _jitc->function->vaoff + offsetof(jit_va_list_t, f8)); + SPILL(_F9, _jitc->function->vaoff + offsetof(jit_va_list_t, f9)); + SPILL(_F10, _jitc->function->vaoff + offsetof(jit_va_list_t, f10)); + SPILL(_F11, _jitc->function->vaoff + offsetof(jit_va_list_t, f11)); + SPILL(_F12, _jitc->function->vaoff + offsetof(jit_va_list_t, f12)); + SPILL(_F13, _jitc->function->vaoff + offsetof(jit_va_list_t, f13)); + SPILL(_F14, _jitc->function->vaoff + offsetof(jit_va_list_t, f14)); + } + else { + /* First 4 in low address */ +#if __WORDSIZE == 32 + SPILL(_F10, 0); + SPILL(_F11, 8); + SPILL(_F12, 16); + SPILL(_F13, 24); + /* gpr registers here */ + SPILL(_F14, 72); + SPILL(_F8, 80); + SPILL(_F9, 88); +#else + SPILL(_F10, 16); + SPILL(_F11, 24); + SPILL(_F12, 32); + SPILL(_F13, 48); + /* Last 3 in high address */ + SPILL(_F14, 136); + SPILL(_F8, 144); + SPILL(_F9, 152); +#endif + } +#undef SPILL + movr(_R13_REGNO, _R15_REGNO); + subi(_R15_REGNO, _R15_REGNO, stack_framesize + _jitc->function->stack); + if (_jitc->function->allocar) { + regno = jit_get_reg(jit_class_gpr); + movi(rn(regno), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, _R13_REGNO, rn(regno)); + jit_unget_reg(regno); + } +} + +static void +_epilog(jit_state_t *_jit, jit_node_t *i0) +{ + int32_t regno, offset; + if (_jitc->function->assume_frame) + return; + if ((_jitc->function->self.call & jit_call_varargs) && + jit_arg_reg_p(_jitc->function->vagp)) + regno = _jitc->function->vagp; + else { + for (regno = 4; regno < jit_size(gprs) - 1; regno++) { + if (jit_regset_tstbit(&_jitc->function->regset, gprs[regno])) + break; + } + } +#if __WORDSIZE == 32 + if (_jitc->function->self.call & jit_call_varargs) + offset = regno * 4 + 8; + else + offset = (regno - 4) * 4 + 32; +#else + if (_jitc->function->self.call & jit_call_varargs) + offset = regno * 8 + 16; + else + offset = (regno - 4) * 8 + 48; +#endif + movr(_R15_REGNO, _R13_REGNO); + +#define LOAD(R, O) \ + do { \ + if (jit_regset_tstbit(&_jitc->function->regset, R)) \ + ldxi_d(rn(R), _R15_REGNO, O); \ + } while (0) + if (_jitc->function->self.call & jit_call_varargs) { + LOAD(_F8, _jitc->function->vaoff + offsetof(jit_va_list_t, f8)); + LOAD(_F9, _jitc->function->vaoff + offsetof(jit_va_list_t, f9)); + LOAD(_F10, _jitc->function->vaoff + offsetof(jit_va_list_t, f10)); + LOAD(_F11, _jitc->function->vaoff + offsetof(jit_va_list_t, f11)); + LOAD(_F12, _jitc->function->vaoff + offsetof(jit_va_list_t, f12)); + LOAD(_F13, _jitc->function->vaoff + offsetof(jit_va_list_t, f13)); + LOAD(_F14, _jitc->function->vaoff + offsetof(jit_va_list_t, f14)); + } + else { +#if __WORDSIZE == 32 + LOAD(_F10, 0); + LOAD(_F11, 8); + LOAD(_F12, 16); + LOAD(_F13, 24); + LOAD(_F14, 72); + LOAD(_F8, 80); + LOAD(_F9, 88); +#else + LOAD(_F10, 16); + LOAD(_F11, 24); + LOAD(_F12, 32); + LOAD(_F13, 48); + LOAD(_F14, 136); + LOAD(_F8, 144); + LOAD(_F9, 152); +#endif + } +#undef LOAD +#if __WORDSIZE == 32 + LM(rn(gprs[regno]), _R15_REGNO, x20(offset), _R15_REGNO); +#else + LMG(rn(gprs[regno]), _R15_REGNO, x20(offset), _R15_REGNO); +#endif + BR(_R14_REGNO); +} + +static void +_vastart(jit_state_t *_jit, int32_t r0) +{ + int32_t reg; + + assert(_jitc->function->self.call & jit_call_varargs); + + /* Return jit_va_list_t in the register argument */ + addi(r0, _R13_REGNO, _jitc->function->vaoff); + reg = jit_get_reg(jit_class_gpr); + + /* Initialize gp offset in the save area. */ + movi(rn(reg), _jitc->function->vagp); + stxi(offsetof(jit_va_list_t, gpoff), r0, rn(reg)); + + /* Initialize fp offset in the save area. */ + movi(rn(reg), _jitc->function->vafp); + stxi(offsetof(jit_va_list_t, fpoff), r0, rn(reg)); + + /* Initialize overflow pointer to the first stack argument. */ + addi(rn(reg), _R13_REGNO, _jitc->function->self.size); + stxi(offsetof(jit_va_list_t, over), r0, rn(reg)); + + /* Initialize register save area pointer. */ + stxi(offsetof(jit_va_list_t, save), r0, _R13_REGNO); + + jit_unget_reg(reg); +} + +static void +_vaarg(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + int32_t rg0; + int32_t rg1; + int32_t rg2; + jit_word_t ge_code; + jit_word_t lt_code; + + assert(_jitc->function->self.call & jit_call_varargs); + + rg0 = jit_get_reg_but_zero(0); + rg1 = jit_get_reg_but_zero(0); + + /* Load the gp offset in save area in the first temporary. */ + ldxi(rn(rg0), r1, offsetof(jit_va_list_t, gpoff)); + + /* Jump over if there are no remaining arguments in the save area. */ + ge_code = bgei_p(_jit->pc.w, rn(rg0), 5); + + /* Load the save area pointer in the second temporary. */ + ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save)); + + /* Scale offset */ + rg2 = jit_get_reg_but_zero(0); + lshi(rn(rg2), rn(rg0), +#if __WORDSIZE == 32 + 2 +#else + 3 +#endif + ); + /* Add offset to saved area. */ + addi(rn(rg2), rn(rg2), 2 * sizeof(jit_word_t)); + + /* Load the vararg argument in the first argument. */ + ldxr(r0, rn(rg1), rn(rg2)); + jit_unget_reg_but_zero(rg2); + + /* Update the gp offset. */ + addi(rn(rg0), rn(rg0), 1); + stxi(offsetof(jit_va_list_t, gpoff), r1, rn(rg0)); + + /* Will only need one temporary register below. */ + jit_unget_reg_but_zero(rg1); + + /* Jump over overflow code. */ + lt_code = jmpi_p(_jit->pc.w); + + /* Where to land if argument is in overflow area. */ + patch_at(ge_code, _jit->pc.w); + + /* Load overflow pointer. */ + ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over)); + + /* Load argument. */ + ldr(r0, rn(rg0)); + + /* Update overflow pointer. */ + addi(rn(rg0), rn(rg0), sizeof(jit_word_t)); + stxi(offsetof(jit_va_list_t, over), r1, rn(rg0)); + + /* Where to land if argument is in save area. */ + patch_at(lt_code, _jit->pc.w); + + jit_unget_reg_but_zero(rg0); +} + +static void +_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) +{ + jit_word_t d; + union { + uint16_t *s; + jit_word_t w; + } u; + u.w = instr; + union { + struct { + uint16_t op : 8; + uint16_t r1 : 4; + uint16_t r3 : 4; + } b; + uint16_t s; + } i0; + union { + struct { + uint16_t i2; + } b; + uint16_t s; + } i1; + union { + struct { + uint32_t ih : 16; + uint32_t il : 16; + } b; + uint32_t i; + } i12; + i0.s = u.s[0]; + /* movi_p */ + if (i0.b.op == +#if __WORDSIZE == 32 + 0xA7 && i0.b.r3 == 8 +#else + 0xA5 +#endif + ) { +#if __WORDSIZE == 64 + assert(i0.b.r3 == 3); +#endif + i1.b.i2 = (jit_uword_t)label; + u.s[1] = i1.s; + i0.s = u.s[2]; + assert(i0.b.op == 0xA5 && i0.b.r3 == 2); + i1.b.i2 = (jit_uword_t)label >> 16; + u.s[3] = i1.s; +#if __WORDSIZE == 64 + i0.s = u.s[4]; + assert(i0.b.op == 0xA5 && i0.b.r3 == 1); + i1.b.i2 = (jit_uword_t)label >> 32; + u.s[5] = i1.s; + i0.s = u.s[6]; + assert(i0.b.op == 0xA5 && i0.b.r3 == 0); + i1.b.i2 = (jit_uword_t)label >> 48; + u.s[7] = i1.s; +#endif + } + /* BRC */ + else if (i0.b.op == 0xA7) { + assert(i0.b.r3 == 0x4); + d = (label - instr) >> 1; + assert(s16_p(d)); + i1.b.i2 = d; + u.s[1] = i1.s; + } + /* BRCL */ + else if (i0.b.op == 0xC0) { + assert(i0.b.r3 == 0x4); + d = (label - instr) >> 1; + assert(s32_p(d)); + i12.i = d; + u.s[1] = i12.b.ih; + u.s[2] = i12.b.il; + } + else + abort(); +} +#endif diff --git a/deps/lightening/lightening/s390-fpu.c b/deps/lightening/lightening/s390-fpu.c new file mode 100644 index 0000000..3866643 --- /dev/null +++ b/deps/lightening/lightening/s390-fpu.c @@ -0,0 +1,1316 @@ +/* + * Copyright (C) 2013-2017 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#if PROTO +# define RND_CUR 0 +# define RND_BIAS_NEAR 1 +# define RND_NEAR 4 +# define RND_ZERO 5 +# define RND_POS_INF 6 +# define RND_NEG_INF 7 +/**************************************************************** + * Floating Point Instructions * + ****************************************************************/ +/* CONVERT BFP TO HFP */ +# define THDER(R1,R2) RRE_(0xB358,R1,R2) +# define THDR(R1,R2) RRE_(0xB359,R1,R2) +/* CONVERT HFP TO BFP */ +# define TBEDR(R1,R2) RRE_(0xB350,R1,R2) +# define TBDR(R1,R2) RRE_(0xB351,R1,R2) +/* LOAD */ +# define LER(R1,R2) RR_(0x38,R1,R2) +# define LDR(R1,R2) RR_(0x28,R1,R2) +# define LXR(R1,R2) RRE_(0xB365,R1,R2) +# define LE(R1,D2,X2,B2) RX_(0x78,R1,X2,B2,D2) +# define LD(R1,D2,X2,B2) RX_(0x68,R1,X2,B2,D2) +# define LEY(R1,D2,X2,B2) RXY_(0xED,R1,X2,B2,D2,0x64) +# define LDY(R1,D2,X2,B2) RXY_(0xED,R1,X2,B2,D2,0x65) +/* LOAD ZERO */ +# define LZER(R1) RRE_(0xB374,R1,0) +# define LZDR(R1) RRE_(0xB375,R1,0) +# define LZXR(R1) RRE_(0xB376,R1,0) +/* STORE */ +# define STE(R1,D2,X2,B2) RX_(0x70,R1,X2,B2,D2) +# define STD(R1,D2,X2,B2) RX_(0x60,R1,X2,B2,D2) +# define STEY(R1,D2,X2,B2) RXY_(0xED,R1,X2,B2,D2,0x66) +# define STDY(R1,D2,X2,B2) RXY_(0xED,R1,X2,B2,D2,0x67) +/**************************************************************** + * Hexadecimal Floating Point Instructions * + ****************************************************************/ +/* ADD NORMALIZED */ +# define AER(R1,R2) RR_(0x3A,R1,R2) +# define ADR(R1,R2) RR_(0x2A,R1,R2) +# define AXR(R1,R2) RR_(0x36,R1,R2) +# define AE(R1,D2,X2,B2) RX_(0x7A,R1,X2,B2,D2) +# define AD(R1,D2,X2,B2) RX_(0x6A,R1,X2,B2,D2) +/* ADD UNNORMALIZED */ +# define AUR(R1,R2) RR_(0x3E,R1,R2) +# define AWR(R1,R2) RR_(0x2E,R1,R2) +# define AU(R1,D2,X2,B2) RX_(0x7E,R1,X2,B2,D2) +# define AW(R1,D2,X2,B2) RX_(0x6E,R1,X2,B2,D2) +/* COMPARE */ +# define CER(R1,R2) RR_(0x39,R1,R2) +# define CDR(R1,R2) RR_(0x29,R1,R2) +# define CXR(R1,R2) RRE_(0xB369,R1,R2) +# define CE(R1,D2,X2,B2) RX_(0x79,R1,X2,B2,D2) +# define CD(R1,D2,X2,B2) RX_(0x69,R1,X2,B2,D2) +/* CONVERT FROM FIXED */ +# define CEFR(R1,R2) RRE_(0xB3B4,R1,R2) +# define CDFR(R1,R2) RRE_(0xB3B5,R1,R2) +# define CXFR(R1,R2) RRE_(0xB3B6,R1,R2) +# define CEGR(R1,R2) RRE_(0xB3C4,R1,R2) +# define CDGR(R1,R2) RRE_(0xB3C5,R1,R2) +# define CXGR(R1,R2) RRE_(0xB3C6,R1,R2) +/* CONVERT TO FIXED */ +# define CFER(R1,R2) RRE_(0xB3B8,R1,R2) +# define CFDR(R1,R2) RRE_(0xB3B9,R1,R2) +# define CFXR(R1,R2) RRE_(0xB3BA,R1,R2) +# define CGER(R1,R2) RRE_(0xB3C8,R1,R2) +# define CGDR(R1,R2) RRE_(0xB3C9,R1,R2) +# define CGXR(R1,R2) RRE_(0xB3CA,R1,R2) +/* DIVIDE */ +# define DER(R1,R2) RR_(0x3D,R1,R2) +# define DDR(R1,R2) RR_(0x2D,R1,R2) +# define DXR(R1,R2) RRE_(0xB22D,R1,R2) +# define DE(R1,D2,X2,B2) RX_(0x7D,R1,X2,B2,D2) +# define DD(R1,D2,X2,B2) RX_(0x6D,R1,X2,B2,D2) +/* HALVE */ +# define HER(R1,R2) RR_(0x34,R1,R2) +# define HDR(R1,R2) RR_(0x24,R1,R2) +/* LOAD AND TEST */ +# define LTER(R1,R2) RR_(0x32,R1,R2) +# define LTDR(R1,R2) RR_(0x22,R1,R2) +# define LTXR(R1,R2) RRE_(0xB362,R1,R2) +/* LOAD COMPLEMENT */ +# define LCER(R1,R2) RR_(0x33,R1,R2) +# define LCDR(R1,R2) RR_(0x23,R1,R2) +# define LCXR(R1,R2) RRE_(0xB363,R1,R2) +/* LOAD FP INTEGER */ +# define FIER(R1,R2) RRE_(0xB377,R1,R2) +# define FIDR(R1,R2) RRE_(0xB37F,R1,R2) +# define FIXR(R1,R2) RRE_(0xB367,R1,R2) +/* LOAD LENGHTENED */ +# define LDER(R1,R2) RRE_(0xB324,R1,R2) +# define LXDR(R1,R2) RRE_(0xB325,R1,R2) +# define LXER(R1,R2) RRE_(0xB326,R1,R2) +# define LDE(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x24) +# define LXD(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x25) +# define LXE(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x26) +/* LOAD NEGATIVE */ +# define LNER(R1,R2) RR_(0x31,R1,R2) +# define LNDR(R1,R2) RR_(0x21,R1,R2) +# define LNXR(R1,R2) RRE_(0xB361,R1,R2) +/* LOAD POSITIVE */ +# define LPER(R1,R2) RR_(0x30,R1,R2) +# define LPDR(R1,R2) RR_(0x20,R1,R2) +# define LPXR(R1,R2) RRE_(0xB360,R1,R2) +/* LOAD ROUNDED */ +# define LEDR(R1,R2) RR_(0x35,R1,R2) +# define LDXR(R1,R2) RR_(0x25,R1,R2) +# define LRER(R1,R2) LEDR(R1,R2) +# define LRDR(R1,R2) LDXR(R1,R2) +# define LRXR(R1,R2) RRE_(0xB366,R1,R2) +/* MULTIPLY */ +# define MEER(R1,R2) RRE_(0xB337,R1,R2) +# define MDR(R1,R2) RR_(0x2C,R1,R2) +# define MXR(R1,R2) RR_(0x26,R1,R2) +# define MDER(R1,R2) RR_(0x3C,R1,R2) +# define MXDR(R1,R2) RR_(0x27,R1,R2) +# define MER(R1,R2) MDER(R1,R2) +# define MEE(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x37) +# define MD(R1,D2,X2,B2) RX_(0x6C,R1,X2,B2,D2) +# define MDE(R1,D2,X2,B2) RX_(0x7C,R1,X2,B2,D2) +# define MXD(R1,D2,X2,B2) RX_(0x67,R1,X2,B2,D2) +# define ME(R1,D2,X2,B2) MDE(R1,D2,X2,B2) +/* MULTIPLY AND ADD */ +# define MAER(R1,R3,R2) RRF_(0xB32E,R1,0,R3,R2) +# define MADR(R1,R3,R2) RRF_(0xB33E,R1,0,R3,R2) +# define MAE(R1,R3,D2,X2,B2) RXF_(0xED,R3,X2,B2,D2,R1,0x2E) +# define MAD(R1,R3,D2,X2,B2) RXF_(0xED,R3,X2,B2,D2,R1,0x3E) +/* MULTIPLY AND SUBTRACT */ +# define MSER(R1,R3,R2) RRF_(0xB32F,R1,0,R3,R2) +# define MSDR(R1,R3,R2) RRF_(0xB33F,R1,0,R3,R2) +# define MSE(R1,R3,D2,X2,B2) RXF_(0xED,R3,X2,B2,D2,R1,0x2F) +# define MSD(R1,R3,D2,X2,B2) RXF_(0xED,R3,X2,B2,D2,R1,0x3F) +/* SQUARE ROOT */ +# define SQER(R1,R2) RRE_(0xB245,R1,R2) +# define SQDR(R1,R2) RRE_(0xB244,R1,R2) +# define SQXR(R1,R2) RRE_(0xB336,R1,R2) +# define SQE(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x34) +# define SQD(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x35) +/* SUBTRACT NORMALIZED */ +# define SER(R1,R2) RR_(0x3B,R1,R2) +# define SDR(R1,R2) RR_(0x2B,R1,R2) +# define SXR(R1,R2) RR_(0x37,R1,R2) +# define SE(R1,D2,X2,B2) RX_(0x7B,R1,X2,B2,D2) +# define SD(R1,D2,X2,B2) RX_(0x6B,R1,X2,B2,D2) +/* SUBTRACT UNNORMALIZED */ +# define SUR(R1,R2) RR_(0x3F,R1,R2) +# define SWR(R1,R2) RR_(0x2F,R1,R2) +# define SU(R1,D2,X2,B2) RX_(0x7F,R1,X2,B2,D2) +# define SW(R1,D2,X2,B2) RX_(0x6F,R1,X2,B2,D2) +/**************************************************************** + * Binary Floating Point Instructions * + ****************************************************************/ +/* ADD */ +# define AEBR(R1,R2) RRE_(0xB30A,R1,R2) +# define ADBR(R1,R2) RRE_(0xB31A,R1,R2) +# define AXBR(R1,R2) RRE_(0xB34A,R1,R2) +# define AEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x0A) +# define ADB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x1A) +/* COMPARE */ +# define CEBR(R1,R2) RRE_(0xB309,R1,R2) +# define CDBR(R1,R2) RRE_(0xB319,R1,R2) +# define CXBR(R1,R2) RRE_(0xB349,R1,R2) +# define CEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x09) +# define CDB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x19) +/* COMPARE AND SIGNAL */ +# define KEBR(R1,R2) RRE_(0xB308,R1,R2) +# define KDBR(R1,R2) RRE_(0xB318,R1,R2) +# define KXBR(R1,R2) RRE_(0xB348,R1,R2) +# define KEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x08) +# define KDB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x18) +/* CONVERT FROM FIXED */ +# define CEFBR(R1,R2) RRE_(0xB394,R1,R2) +# define CDFBR(R1,R2) RRE_(0xB395,R1,R2) +# define CXFBR(R1,R2) RRE_(0xB396,R1,R2) +# define CEGBR(R1,R2) RRE_(0xB3A4,R1,R2) +# define CDGBR(R1,R2) RRE_(0xB3A5,R1,R2) +# define CXGBR(R1,R2) RRE_(0xB3A6,R1,R2) +/* CONVERT TO FIXED */ +# define CFEBR(R1,M3,R2) RRF_(0xB398,M3,0,R1,R2) +# define CFDBR(R1,M3,R2) RRF_(0xB399,M3,0,R1,R2) +# define CFXBR(R1,M3,R2) RRF_(0xB39A,M3,0,R1,R2) +# define CGEBR(R1,M3,R2) RRF_(0xB3A8,M3,0,R1,R2) +# define CGDBR(R1,M3,R2) RRF_(0xB3A9,M3,0,R1,R2) +# define CGXBR(R1,M3,R2) RRF_(0xB3AA,M3,0,R1,R2) +/* DIVIDE */ +# define DEBR(R1,R2) RRE_(0xB30D,R1,R2) +# define DDBR(R1,R2) RRE_(0xB31D,R1,R2) +# define DXBR(R1,R2) RRE_(0xB34D,R1,R2) +# define DEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x0D) +# define DDB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x1D) +/* DIVIDE TO INTEGER */ +# define DIEBR(R1,R3,R2,M4) RRF_(0xB353,R3,M4,R1,R2) +# define DIDBR(R1,R3,R2,M4) RRF_(0xB35B,R3,M4,R1,R2) +/* EXTRACT FPC */ +# define EFPC(R1) RRE_(0xB38C,R1,0) +/* LOAD AND TEST */ +# define LTEBR(R1,R2) RRE_(0xB302,R1,R2) +# define LTDBR(R1,R2) RRE_(0xB312,R1,R2) +# define LTXBR(R1,R2) RRE_(0xB342,R1,R2) +/* LOAD COMPLEMENT */ +# define LCEBR(R1,R2) RRE_(0xB303,R1,R2) +# define LCDBR(R1,R2) RRE_(0xB313,R1,R2) +# define LCXBR(R1,R2) RRE_(0xB343,R1,R2) +/* LOAD FP INTEGER */ +# define FIEBR(R1,M3,R2) RRF_(0xB357,M3,0,R1,R2) +# define FIDBR(R1,M3,R2) RRF_(0xB35F,M3,0,R1,R2) +# define FIXBR(R1,M3,R2) RRF_(0xB347,M3,0,R1,R2) +/* LOAD FPC */ +# define LFPC(D2,B2) S_(0xB29D,B2,D2) +/* LOAD LENGTHENED */ +# define LDEBR(R1,R2) RRE_(0xB304,R1,R2) +# define LXDBR(R1,R2) RRE_(0xB305,R1,R2) +# define LXEBR(R1,R2) RRE_(0xB306,R1,R2) +# define LDEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x04) +# define LXDB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x05) +# define LXEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x06) +/* LOAD NEGATIVE */ +# define LNEBR(R1,R2) RRE_(0xB301,R1,R2) +# define LNDBR(R1,R2) RRE_(0xB311,R1,R2) +# define LNXBR(R1,R2) RRE_(0xB341,R1,R2) +/* LOAD POSITIVE */ +# define LPEBR(R1,R2) RRE_(0xB300,R1,R2) +# define LPDBR(R1,R2) RRE_(0xB310,R1,R2) +# define LPXBR(R1,R2) RRE_(0xB340,R1,R2) +/* LOAD ROUNDED */ +# define LEDBR(R1,R2) RRE_(0xB344,R1,R2) +# define LDXBR(R1,R2) RRE_(0xB345,R1,R2) +# define LEXBR(R1,R2) RRE_(0xB346,R1,R2) +/* MULTIPLY */ +# define MEEBR(R1,R2) RRE_(0xB317,R1,R2) +# define MDBR(R1,R2) RRE_(0xB31C,R1,R2) +# define MXBR(R1,R2) RRE_(0xB34C,R1,R2) +# define MDEBR(R1,R2) RRE_(0xB30C,R1,R2) +# define MXDBR(R1,R2) RRE_(0xB307,R1,R2) +# define MEEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x17) +# define MDB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x1C) +# define MDEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x0C) +# define MXDB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x07) +/* MULTIPLY AND ADD */ +# define MAEBR(R1,R3,R2) RRF_(0xB30E,R1,0,R3,R2) +# define MADBR(R1,R3,R2) RRF_(0xB31E,R1,0,R3,R2) +# define MAEB(R1,R3,D2,X2,B2) RXF_(0xED,R3,X2,B2,D2,R1,0x0E) +# define MADB(R1,R3,D2,X2,B2) RXF_(0xED,R3,X2,B2,D2,R1,0x1E) +/* MULTIPLY AND SUBTRACT */ +# define MSEBR(R1,R3,R2) RRF_(0xB30F,R1,0,R3,R2) +# define MSDBR(R1,R3,R2) RRF_(0xB31F,R1,0,R3,R2) +# define MSEB(R1,R3,D2,X2,B2) RXF_(0xED,R3,X2,B2,D2,R1,0x0F) +# define MSDB(R1,R3,D2,X2,B2) RXF_(0xED,R3,X2,B2,D2,R1,0x1F) +/* SET FPC */ +# define SFPC(R1) RRE_(0xB384,R1,0) +/* SET ROUNDING MODE */ +# define SRNM(D2,B2) S_(0xB299,B2,D2) +/* SQUARE ROOT */ +# define SQEBR(R1,R2) RRE_(0xB314,R1,R2) +# define SQDBR(R1,R2) RRE_(0xB315,R1,R2) +# define SQXBR(R1,R2) RRE_(0xB316,R1,R2) +/* STORE FPC */ +# define STFPC(D2,B2) S_(0xB29C,B2,D2) +/* SUBTRACT */ +# define SEBR(R1,R2) RRE_(0xB30B,R1,R2) +# define SDBR(R1,R2) RRE_(0xB31B,R1,R2) +# define SXBR(R1,R2) RRE_(0xB34B,R1,R2) +# define SEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x0B) +# define SDB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x1B) +/* TEST DATA CLASS */ +# define TCEB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x10) +# define TCDB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x11) +# define TCXB(R1,D2,X2,B2) RXE_(0xED,R1,X2,B2,D2,0x12) +# define fp(code,r0,r1,i0) _fp(_jit,jit_code_##code##i_f,r0,r1,i0) +static void _fp(jit_state_t*,jit_code_t, + int32_t,int32_t,jit_float32_t*); +# define dp(code,r0,r1,i0) _dp(_jit,jit_code_##code##i_d,r0,r1,i0) +static void _dp(jit_state_t*,jit_code_t, + int32_t,int32_t,jit_float64_t*); +# define fr(cc,r0,r1,r2) _fr(_jit,cc,r0,r1,r2) +static void _fr(jit_state_t*,int32_t, + int32_t,int32_t,int32_t); +# define dr(cc,r0,r1,r2) _dr(_jit,cc,r0,r1,r2) +static void _dr(jit_state_t*,int32_t, + int32_t,int32_t,int32_t); +# define fi(cc,r0,r1,i0) _fi(_jit,cc,r0,r1,i0) +static void _fi(jit_state_t*,int32_t, + int32_t,int32_t,jit_float32_t*); +# define di(cc,r0,r1,i0) _di(_jit,cc,r0,r1,i0) +static void _di(jit_state_t*,int32_t, + int32_t,int32_t,jit_float64_t*); +# define bfr(cc,i0,r0,r1) _bfr(_jit,cc,i0,r0,r1) +static void _bfr(jit_state_t*,int32_t, + jit_word_t,int32_t,int32_t); +# define bdr(cc,i0,r0,r1) _bdr(_jit,cc,i0,r0,r1) +static void _bdr(jit_state_t*,int32_t, + jit_word_t,int32_t,int32_t); +# define bfr_p(cc,i0,r0,r1) _bfr_p(_jit,cc,i0,r0,r1) +static jit_word_t _bfr_p(jit_state_t*,int32_t, + jit_word_t,int32_t,int32_t); +# define bdr_p(cc,i0,r0,r1) _bdr_p(_jit,cc,i0,r0,r1) +static jit_word_t _bdr_p(jit_state_t*,int32_t, + jit_word_t,int32_t,int32_t); +# define bfi(cc,i0,r0,i1) _bfi(_jit,cc,i0,r0,i1) +static void _bfi(jit_state_t*,int32_t, + jit_word_t,int32_t,jit_float32_t*); +# define bdi(cc,i0,r0,i1) _bdi(_jit,cc,i0,r0,i1) +static void _bdi(jit_state_t*,int32_t, + jit_word_t,int32_t,jit_float64_t*); +# define bfi_p(cc,i0,r0,i1) _bfi_p(_jit,cc,i0,r0,i1) +static jit_word_t _bfi_p(jit_state_t*,int32_t, + jit_word_t,int32_t,jit_float32_t*); +# define bdi_p(cc,i0,r0,i1) _bdi_p(_jit,cc,i0,r0,i1) +static jit_word_t _bdi_p(jit_state_t*,int32_t, + jit_word_t,int32_t,jit_float64_t*); +# define buneqr(db,i0,r0,r1) _buneqr(_jit,db,i0,r0,r1) +static jit_word_t _buneqr(jit_state_t*,int32_t, + jit_word_t,int32_t,int32_t); +# define buneqi(db,i0,r0,i1) _buneqi(_jit,db,i0,r0,(jit_word_t)i1) +static jit_word_t _buneqi(jit_state_t*,int32_t, + jit_word_t,int32_t,jit_word_t); +# define bltgtr(db,i0,r0,r1) _bltgtr(_jit,db,i0,r0,r1) +static jit_word_t _bltgtr(jit_state_t*,int32_t, + jit_word_t,int32_t,int32_t); +# define bltgti(db,i0,r0,i1) _bltgti(_jit,db,i0,r0,(jit_word_t)i1) +static jit_word_t _bltgti(jit_state_t*,int32_t, + jit_word_t,int32_t,jit_word_t); +# define movr_f(r0,r1) _movr_f(_jit,r0,r1) +static void _movr_f(jit_state_t*,int32_t,int32_t); +# define movi_f(r0,i0) _movi_f(_jit,r0,i0) +static void _movi_f(jit_state_t*,int32_t,jit_float32_t*); +# define movr_d(r0,r1) _movr_d(_jit,r0,r1) +static void _movr_d(jit_state_t*,int32_t,int32_t); +# define movi_d(r0,i0) _movi_d(_jit,r0,i0) +static void _movi_d(jit_state_t*,int32_t,jit_float64_t*); +# define absr_f(r0,r1) LPEBR(r0,r1) +# define absr_d(r0,r1) LPDBR(r0,r1) +# define negr_f(r0,r1) LCEBR(r0,r1) +# define negr_d(r0,r1) LCDBR(r0,r1) +# define sqrtr_f(r0,r1) SQEBR(r0,r1) +# define sqrtr_d(r0,r1) SQDBR(r0,r1) +# define truncr_f_i(r0,r1) CFEBR(r0,RND_ZERO,r1) +# define truncr_d_i(r0,r1) CFDBR(r0,RND_ZERO,r1) +# if __WORDSIZE == 64 +# define truncr_f_l(r0,r1) CGEBR(r0,RND_ZERO,r1) +# define truncr_d_l(r0,r1) CGDBR(r0,RND_ZERO,r1) +# endif +# if __WORDSIZE == 32 +# define extr_f(r0,r1) CEFBR(r0,r1) +# define extr_d(r0,r1) CDFBR(r0,r1) +# else +# define extr_f(r0,r1) CEGBR(r0,r1) +# define extr_d(r0,r1) CDGBR(r0,r1) +# endif +# define extr_d_f(r0,r1) LEDBR(r0,r1) +# define extr_f_d(r0,r1) LDEBR(r0,r1) +# define addr_f(r0,r1,r2) _addr_f(_jit,r0,r1,r2) +static void _addr_f(jit_state_t*,int32_t,int32_t,int32_t); +# define addi_f(r0,r1,i0) fp(add,r0,r1,i0) +# define addr_d(r0,r1,r2) _addr_d(_jit,r0,r1,r2) +static void _addr_d(jit_state_t*,int32_t,int32_t,int32_t); +# define addi_d(r0,r1,i0) dp(add,r0,r1,i0) +# define subr_f(r0,r1,r2) _subr_f(_jit,r0,r1,r2) +static void _subr_f(jit_state_t*,int32_t,int32_t,int32_t); +# define subi_f(r0,r1,i0) fp(sub,r0,r1,i0) +# define subr_d(r0,r1,r2) _subr_d(_jit,r0,r1,r2) +static void _subr_d(jit_state_t*,int32_t,int32_t,int32_t); +# define subi_d(r0,r1,i0) dp(sub,r0,r1,i0) +# define rsbr_f(r0,r1,r2) subr_f(r0,r2,r1) +# define rsbi_f(r0,r1,i0) fp(rsb,r0,r1,i0) +# define rsbr_d(r0,r1,r2) subr_d(r0,r2,r1) +# define rsbi_d(r0,r1,i0) dp(rsb,r0,r1,i0) +# define mulr_f(r0,r1,r2) _mulr_f(_jit,r0,r1,r2) +static void _mulr_f(jit_state_t*,int32_t,int32_t,int32_t); +# define muli_f(r0,r1,i0) fp(mul,r0,r1,i0) +# define mulr_d(r0,r1,r2) _mulr_d(_jit,r0,r1,r2) +static void _mulr_d(jit_state_t*,int32_t,int32_t,int32_t); +# define muli_d(r0,r1,i0) dp(mul,r0,r1,i0) +# define divr_f(r0,r1,r2) _divr_f(_jit,r0,r1,r2) +static void _divr_f(jit_state_t*,int32_t,int32_t,int32_t); +# define divi_f(r0,r1,i0) fp(div,r0,r1,i0) +# define divr_d(r0,r1,r2) _divr_d(_jit,r0,r1,r2) +static void _divr_d(jit_state_t*,int32_t,int32_t,int32_t); +# define divi_d(r0,r1,i0) dp(div,r0,r1,i0) +# define ldr_f(r0,r1) LE(r0,0,0,r1) +# define ldr_d(r0,r1) LD(r0,0,0,r1) +# define ldi_f(r0,i0) _ldi_f(_jit,r0,i0) +static void _ldi_f(jit_state_t*,int32_t,jit_word_t); +# define ldi_d(r0,i0) _ldi_d(_jit,r0,i0) +static void _ldi_d(jit_state_t*,int32_t,jit_word_t); +# define ldxr_f(r0,r1,r2) _ldxr_f(_jit,r0,r1,r2) +static void _ldxr_f(jit_state_t*,int32_t,int32_t,int32_t); +# define ldxr_d(r0,r1,r2) _ldxr_d(_jit,r0,r1,r2) +static void _ldxr_d(jit_state_t*,int32_t,int32_t,int32_t); +# define ldxi_f(r0,r1,i0) _ldxi_f(_jit,r0,r1,i0) +static void _ldxi_f(jit_state_t*,int32_t,int32_t,jit_word_t); +# define ldxi_d(r0,r1,i0) _ldxi_d(_jit,r0,r1,i0) +static void _ldxi_d(jit_state_t*,int32_t,int32_t,jit_word_t); +# define str_f(r0,r1) STE(r1,0,0,r0) +# define str_d(r0,r1) STD(r1,0,0,r0) +# define sti_f(i0,r0) _sti_f(_jit,i0,r0) +static void _sti_f(jit_state_t*,jit_word_t,int32_t); +# define sti_d(i0,r0) _sti_d(_jit,i0,r0) +static void _sti_d(jit_state_t*,jit_word_t,int32_t); +# define stxr_f(r0,r1,r2) _stxr_f(_jit,r0,r1,r2) +static void _stxr_f(jit_state_t*,int32_t,int32_t,int32_t); +# define stxr_d(r0,r1,r2) _stxr_d(_jit,r0,r1,r2) +static void _stxr_d(jit_state_t*,int32_t,int32_t,int32_t); +# define stxi_f(i0,r0,r1) _stxi_f(_jit,i0,r0,r1) +static void _stxi_f(jit_state_t*,jit_word_t,int32_t,int32_t); +# define stxi_d(i0,r0,r1) _stxi_d(_jit,i0,r0,r1) +static void _stxi_d(jit_state_t*,jit_word_t,int32_t,int32_t); +# define ltr_f(r0,r1,r2) fr(CC_L,r0,r1,r2) +# define ltr_d(r0,r1,r2) dr(CC_L,r0,r1,r2) +# define lti_f(r0,r1,i0) fi(CC_L,r0,r1,i0) +# define lti_d(r0,r1,i0) di(CC_L,r0,r1,i0) +# define ler_f(r0,r1,r2) fr(CC_LE,r0,r1,r2) +# define ler_d(r0,r1,r2) dr(CC_LE,r0,r1,r2) +# define lei_f(r0,r1,i0) fi(CC_LE,r0,r1,i0) +# define lei_d(r0,r1,i0) di(CC_LE,r0,r1,i0) +# define eqr_f(r0,r1,r2) fr(CC_E,r0,r1,r2) +# define eqr_d(r0,r1,r2) dr(CC_E,r0,r1,r2) +# define eqi_f(r0,r1,i0) fi(CC_E,r0,r1,i0) +# define eqi_d(r0,r1,i0) di(CC_E,r0,r1,i0) +# define ger_f(r0,r1,r2) fr(CC_HE,r0,r1,r2) +# define ger_d(r0,r1,r2) dr(CC_HE,r0,r1,r2) +# define gei_f(r0,r1,i0) fi(CC_HE,r0,r1,i0) +# define gei_d(r0,r1,i0) di(CC_HE,r0,r1,i0) +# define gtr_f(r0,r1,r2) fr(CC_H,r0,r1,r2) +# define gtr_d(r0,r1,r2) dr(CC_H,r0,r1,r2) +# define gti_f(r0,r1,i0) fi(CC_H,r0,r1,i0) +# define gti_d(r0,r1,i0) di(CC_H,r0,r1,i0) +# define ner_f(r0,r1,r2) fr(CC_NE,r0,r1,r2) +# define ner_d(r0,r1,r2) dr(CC_NE,r0,r1,r2) +# define nei_f(r0,r1,i0) fi(CC_NE,r0,r1,i0) +# define nei_d(r0,r1,i0) di(CC_NE,r0,r1,i0) +# define unltr_f(r0,r1,r2) fr(CC_NHE,r0,r1,r2) +# define unltr_d(r0,r1,r2) dr(CC_NHE,r0,r1,r2) +# define unlti_f(r0,r1,i0) fi(CC_NHE,r0,r1,i0) +# define unlti_d(r0,r1,i0) di(CC_NHE,r0,r1,i0) +# define unler_f(r0,r1,r2) fr(CC_NH,r0,r1,r2) +# define unler_d(r0,r1,r2) dr(CC_NH,r0,r1,r2) +# define unlei_f(r0,r1,i0) fi(CC_NH,r0,r1,i0) +# define unlei_d(r0,r1,i0) di(CC_NH,r0,r1,i0) +# define uneqr_f(r0,r1,r2) _uneqr_f(_jit,r0,r1,r2) +static void _uneqr_f(jit_state_t*,int32_t,int32_t,int32_t); +# define uneqr_d(r0,r1,r2) _uneqr_d(_jit,r0,r1,r2) +static void _uneqr_d(jit_state_t*,int32_t,int32_t,int32_t); +# define uneqi_f(r0,r1,i0) fp(uneq,r0,r1,i0) +# define uneqi_d(r0,r1,i0) dp(uneq,r0,r1,i0) +# define unger_f(r0,r1,r2) fr(CC_NL,r0,r1,r2) +# define unger_d(r0,r1,r2) dr(CC_NL,r0,r1,r2) +# define ungei_f(r0,r1,i0) fi(CC_NL,r0,r1,i0) +# define ungei_d(r0,r1,i0) di(CC_NL,r0,r1,i0) +# define ungtr_f(r0,r1,r2) fr(CC_NLE,r0,r1,r2) +# define ungtr_d(r0,r1,r2) dr(CC_NLE,r0,r1,r2) +# define ungti_f(r0,r1,i0) fi(CC_NLE,r0,r1,i0) +# define ungti_d(r0,r1,i0) di(CC_NLE,r0,r1,i0) +# define ltgtr_f(r0,r1,r2) _ltgtr_f(_jit,r0,r1,r2) +static void _ltgtr_f(jit_state_t*,int32_t,int32_t,int32_t); +# define ltgtr_d(r0,r1,r2) _ltgtr_d(_jit,r0,r1,r2) +static void _ltgtr_d(jit_state_t*,int32_t,int32_t,int32_t); +# define ltgti_f(r0,r1,i0) fp(ltgt,r0,r1,i0) +# define ltgti_d(r0,r1,i0) dp(ltgt,r0,r1,i0) +# define ordr_f(r0,r1,r2) fr(CC_NO,r0,r1,r2) +# define ordr_d(r0,r1,r2) dr(CC_NO,r0,r1,r2) +# define ordi_f(r0,r1,i0) fi(CC_NO,r0,r1,i0) +# define ordi_d(r0,r1,i0) di(CC_NO,r0,r1,i0) +# define unordr_f(r0,r1,r2) fr(CC_O,r0,r1,r2) +# define unordr_d(r0,r1,r2) dr(CC_O,r0,r1,r2) +# define unordi_f(r0,r1,i0) fi(CC_O,r0,r1,i0) +# define unordi_d(r0,r1,i0) di(CC_O,r0,r1,i0) +# define bltr_f(i0,r0,r1) bfr(CC_L,i0,r0,r1) +# define bltr_d(i0,r0,r1) bdr(CC_L,i0,r0,r1) +# define blti_f(i0,r0,i1) bfi(CC_L,i0,r0,i1) +# define blti_d(i0,r0,i1) bdi(CC_L,i0,r0,i1) +# define bltr_f_p(i0,r0,r1) bfr_p(CC_L,i0,r0,r1) +# define bltr_d_p(i0,r0,r1) bdr_p(CC_L,i0,r0,r1) +# define blti_f_p(i0,r0,i1) bfi_p(CC_L,i0,r0,i1) +# define blti_d_p(i0,r0,i1) bdi_p(CC_L,i0,r0,i1) +# define bler_f(i0,r0,r1) bfr(CC_LE,i0,r0,r1) +# define bler_d(i0,r0,r1) bdr(CC_LE,i0,r0,r1) +# define blei_f(i0,r0,i1) bfi(CC_LE,i0,r0,i1) +# define blei_d(i0,r0,i1) bdi(CC_LE,i0,r0,i1) +# define bler_f_p(i0,r0,r1) bfr_p(CC_LE,i0,r0,r1) +# define bler_d_p(i0,r0,r1) bdr_p(CC_LE,i0,r0,r1) +# define blei_f_p(i0,r0,i1) bfi_p(CC_LE,i0,r0,i1) +# define blei_d_p(i0,r0,i1) bdi_p(CC_LE,i0,r0,i1) +# define beqr_f(i0,r0,r1) bfr(CC_E,i0,r0,r1) +# define beqr_d(i0,r0,r1) bdr(CC_E,i0,r0,r1) +# define beqi_f(i0,r0,i1) bfi(CC_E,i0,r0,i1) +# define beqi_d(i0,r0,i1) bdi(CC_E,i0,r0,i1) +# define beqr_f_p(i0,r0,r1) bfr_p(CC_E,i0,r0,r1) +# define beqr_d_p(i0,r0,r1) bdr_p(CC_E,i0,r0,r1) +# define beqi_f_p(i0,r0,i1) bfi_p(CC_E,i0,r0,i1) +# define beqi_d_p(i0,r0,i1) bdi_p(CC_E,i0,r0,i1) +# define bger_f(i0,r0,r1) bfr(CC_HE,i0,r0,r1) +# define bger_d(i0,r0,r1) bdr(CC_HE,i0,r0,r1) +# define bgei_f(i0,r0,i1) bfi(CC_HE,i0,r0,i1) +# define bgei_d(i0,r0,i1) bdi(CC_HE,i0,r0,i1) +# define bger_f_p(i0,r0,r1) bfr_p(CC_HE,i0,r0,r1) +# define bger_d_p(i0,r0,r1) bdr_p(CC_HE,i0,r0,r1) +# define bgei_f_p(i0,r0,i1) bfi_p(CC_HE,i0,r0,i1) +# define bgei_d_p(i0,r0,i1) bdi_p(CC_HE,i0,r0,i1) +# define bgtr_f(i0,r0,r1) bfr(CC_H,i0,r0,r1) +# define bgtr_d(i0,r0,r1) bdr(CC_H,i0,r0,r1) +# define bgti_f(i0,r0,i1) bfi(CC_H,i0,r0,i1) +# define bgti_d(i0,r0,i1) bdi(CC_H,i0,r0,i1) +# define bgtr_f_p(i0,r0,r1) bfr_p(CC_H,i0,r0,r1) +# define bgtr_d_p(i0,r0,r1) bdr_p(CC_H,i0,r0,r1) +# define bgti_f_p(i0,r0,i1) bfi_p(CC_H,i0,r0,i1) +# define bgti_d_p(i0,r0,i1) bdi_p(CC_H,i0,r0,i1) +# define bner_f(i0,r0,r1) bfr(CC_NE,i0,r0,r1) +# define bner_d(i0,r0,r1) bdr(CC_NE,i0,r0,r1) +# define bnei_f(i0,r0,i1) bfi(CC_NE,i0,r0,i1) +# define bnei_d(i0,r0,i1) bdi(CC_NE,i0,r0,i1) +# define bner_f_p(i0,r0,r1) bfr_p(CC_NE,i0,r0,r1) +# define bner_d_p(i0,r0,r1) bdr_p(CC_NE,i0,r0,r1) +# define bnei_f_p(i0,r0,i1) bfi_p(CC_NE,i0,r0,i1) +# define bnei_d_p(i0,r0,i1) bdi_p(CC_NE,i0,r0,i1) +# define bunltr_f(i0,r0,r1) bfr(CC_NHE,i0,r0,r1) +# define bunltr_d(i0,r0,r1) bdr(CC_NHE,i0,r0,r1) +# define bunlti_f(i0,r0,i1) bfi(CC_NHE,i0,r0,i1) +# define bunlti_d(i0,r0,i1) bdi(CC_NHE,i0,r0,i1) +# define bunltr_f_p(i0,r0,r1) bfr_p(CC_NHE,i0,r0,r1) +# define bunltr_d_p(i0,r0,r1) bdr_p(CC_NHE,i0,r0,r1) +# define bunlti_f_p(i0,r0,i1) bfi_p(CC_NHE,i0,r0,i1) +# define bunlti_d_p(i0,r0,i1) bdi_p(CC_NHE,i0,r0,i1) +# define bunler_f(i0,r0,r1) bfr(CC_NH,i0,r0,r1) +# define bunler_d(i0,r0,r1) bdr(CC_NH,i0,r0,r1) +# define bunlei_f(i0,r0,i1) bfi(CC_NH,i0,r0,i1) +# define bunlei_d(i0,r0,i1) bdi(CC_NH,i0,r0,i1) +# define bunler_f_p(i0,r0,r1) bfr_p(CC_NH,i0,r0,r1) +# define bunler_d_p(i0,r0,r1) bdr_p(CC_NH,i0,r0,r1) +# define bunlei_f_p(i0,r0,i1) bfi_p(CC_NH,i0,r0,i1) +# define bunlei_d_p(i0,r0,i1) bdi_p(CC_NH,i0,r0,i1) +# define buneqr_f(i0,r0,r1) buneqr(0,i0,r0,r1) +# define buneqr_d(i0,r0,r1) buneqr(1,i0,r0,r1) +# define buneqi_f(i0,r0,i1) buneqi(0,i0,r0,i1) +# define buneqi_d(i0,r0,i1) buneqi(1,i0,r0,i1) +# define buneqr_f_p(i0,r0,r1) buneqr(0,i0,r0,r1) +# define buneqr_d_p(i0,r0,r1) buneqr(1,i0,r0,r1) +# define buneqi_f_p(i0,r0,i1) buneqi(0,i0,r0,i1) +# define buneqi_d_p(i0,r0,i1) buneqi(1,i0,r0,i1) +# define bunger_f(i0,r0,r1) bfr(CC_NL,i0,r0,r1) +# define bunger_d(i0,r0,r1) bdr(CC_NL,i0,r0,r1) +# define bungei_f(i0,r0,i1) bfi(CC_NL,i0,r0,i1) +# define bungei_d(i0,r0,i1) bdi(CC_NL,i0,r0,i1) +# define bunger_f_p(i0,r0,r1) bfr_p(CC_NL,i0,r0,r1) +# define bunger_d_p(i0,r0,r1) bdr_p(CC_NL,i0,r0,r1) +# define bungei_f_p(i0,r0,i1) bfi_p(CC_NL,i0,r0,i1) +# define bungei_d_p(i0,r0,i1) bdi_p(CC_NL,i0,r0,i1) +# define bungtr_f(i0,r0,r1) bfr(CC_NLE,i0,r0,r1) +# define bungtr_d(i0,r0,r1) bdr(CC_NLE,i0,r0,r1) +# define bungti_f(i0,r0,i1) bfi(CC_NLE,i0,r0,i1) +# define bungti_d(i0,r0,i1) bdi(CC_NLE,i0,r0,i1) +# define bungtr_f_p(i0,r0,r1) bfr_p(CC_NLE,i0,r0,r1) +# define bungtr_d_p(i0,r0,r1) bdr_p(CC_NLE,i0,r0,r1) +# define bungti_f_p(i0,r0,i1) bfi_p(CC_NLE,i0,r0,i1) +# define bungti_d_p(i0,r0,i1) bdi_p(CC_NLE,i0,r0,i1) +# define bltgtr_f(i0,r0,r1) bltgtr(0,i0,r0,r1) +# define bltgtr_d(i0,r0,r1) bltgtr(1,i0,r0,r1) +# define bltgti_f(i0,r0,i1) bltgti(0,i0,r0,i1) +# define bltgti_d(i0,r0,i1) bltgti(1,i0,r0,i1) +# define bltgtr_f_p(i0,r0,r1) bltgtr(0,i0,r0,r1) +# define bltgtr_d_p(i0,r0,r1) bltgtr(1,i0,r0,r1) +# define bltgti_f_p(i0,r0,i1) bltgti(0,i0,r0,i1) +# define bltgti_d_p(i0,r0,i1) bltgti(1,i0,r0,i1) +# define bordr_f(i0,r0,r1) bfr(CC_NO,i0,r0,r1) +# define bordr_d(i0,r0,r1) bdr(CC_NO,i0,r0,r1) +# define bordi_f(i0,r0,i1) bfi(CC_NO,i0,r0,i1) +# define bordi_d(i0,r0,i1) bdi(CC_NO,i0,r0,i1) +# define bordr_f_p(i0,r0,r1) bfr_p(CC_NO,i0,r0,r1) +# define bordr_d_p(i0,r0,r1) bdr_p(CC_NO,i0,r0,r1) +# define bordi_f_p(i0,r0,i1) bfi_p(CC_NO,i0,r0,i1) +# define bordi_d_p(i0,r0,i1) bdi_p(CC_NO,i0,r0,i1) +# define bunordr_f(i0,r0,r1) bfr(CC_O,i0,r0,r1) +# define bunordr_d(i0,r0,r1) bdr(CC_O,i0,r0,r1) +# define bunordi_f(i0,r0,i1) bfi(CC_O,i0,r0,i1) +# define bunordi_d(i0,r0,i1) bdi(CC_O,i0,r0,i1) +# define bunordr_f_p(i0,r0,r1) bfr_p(CC_O,i0,r0,r1) +# define bunordr_d_p(i0,r0,r1) bdr_p(CC_O,i0,r0,r1) +# define bunordi_f_p(i0,r0,i1) bfi_p(CC_O,i0,r0,i1) +# define bunordi_d_p(i0,r0,i1) bdi_p(CC_O,i0,r0,i1) +# define vaarg_d(r0, r1) _vaarg_d(_jit, r0, r1) +static void _vaarg_d(jit_state_t*, int32_t, int32_t); +#endif + +#if CODE +static void +_fp(jit_state_t *_jit, jit_code_t code, + int32_t r0, int32_t r1, jit_float32_t *i0) +{ + int32_t reg; + reg = jit_get_reg(jit_class_fpr); + movi_f(rn(reg), i0); + switch (code) { + case jit_code_addi_f: addr_f(r0, r1, rn(reg)); break; + case jit_code_subi_f: subr_f(r0, r1, rn(reg)); break; + case jit_code_rsbi_f: rsbr_f(r0, r1, rn(reg)); break; + case jit_code_muli_f: mulr_f(r0, r1, rn(reg)); break; + case jit_code_divi_f: divr_f(r0, r1, rn(reg)); break; + case jit_code_uneqi_f: uneqr_f(r0, r1, rn(reg)); break; + case jit_code_ltgti_f: ltgtr_f(r0, r1, rn(reg)); break; + default: abort(); + } + jit_unget_reg(reg); +} + +static void +_dp(jit_state_t *_jit, jit_code_t code, + int32_t r0, int32_t r1, jit_float64_t *i0) +{ + int32_t reg; + reg = jit_get_reg(jit_class_fpr); + movi_d(rn(reg), i0); + switch (code) { + case jit_code_addi_d: addr_d(r0, r1, rn(reg)); break; + case jit_code_subi_d: subr_d(r0, r1, rn(reg)); break; + case jit_code_rsbi_d: rsbr_d(r0, r1, rn(reg)); break; + case jit_code_muli_d: mulr_d(r0, r1, rn(reg)); break; + case jit_code_divi_d: divr_d(r0, r1, rn(reg)); break; + case jit_code_uneqi_d: uneqr_d(r0, r1, rn(reg)); break; + case jit_code_ltgti_d: ltgtr_d(r0, r1, rn(reg)); break; + default: abort(); + } + jit_unget_reg(reg); +} + +static void +_fr(jit_state_t *_jit, int32_t cc, + int32_t r0, int32_t r1, int32_t r2) +{ + jit_word_t w; + LGHI(r0, 1); + CEBR(r1, r2); + w = _jit->pc.w; + BRC(cc, 0); + LGHI(r0, 0); + patch_at(w, _jit->pc.w); +} + +static void +_dr(jit_state_t *_jit, int32_t cc, + int32_t r0, int32_t r1, int32_t r2) +{ + jit_word_t w; + LGHI(r0, 1); + CDBR(r1, r2); + w = _jit->pc.w; + BRC(cc, 0); + LGHI(r0, 0); + patch_at(w, _jit->pc.w); +} + +static void +_fi(jit_state_t *_jit, int32_t cc, + int32_t r0, int32_t r1, jit_float32_t *i0) +{ + int32_t reg; + reg = jit_get_reg(jit_class_fpr|jit_class_nospill); + movi_f(rn(reg), i0); + fr(cc, r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_di(jit_state_t *_jit, int32_t cc, + int32_t r0, int32_t r1, jit_float64_t *i0) +{ + int32_t reg; + reg = jit_get_reg(jit_class_fpr|jit_class_nospill); + movi_d(rn(reg), i0); + dr(cc, r0, r1, rn(reg)); + jit_unget_reg(reg); +} + + +static void +_bfr(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, int32_t r1) +{ + jit_word_t d; + CEBR(r0, r1); + d = (i0 - _jit->pc.w) >> 1; + if (s16_p(d)) + BRC(cc, x16(d)); + else { + assert(s32_p(d)); + BRCL(cc, d); + } +} + +static void +_bdr(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, int32_t r1) +{ + jit_word_t d; + CDBR(r0, r1); + d = (i0 - _jit->pc.w) >> 1; + if (s16_p(d)) + BRC(cc, x16(d)); + else { + assert(s32_p(d)); + BRCL(cc, d); + } +} + +static jit_word_t +_bfr_p(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, int32_t r1) +{ + jit_word_t w; + CEBR(r0, r1); + w = _jit->pc.w; + BRCL(cc, 0); + return (w); +} + +static jit_word_t +_bdr_p(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, int32_t r1) +{ + jit_word_t w; + CDBR(r0, r1); + w = _jit->pc.w; + BRCL(cc, 0); + return (w); +} + +static void +_bfi(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, jit_float32_t *i1) +{ + int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi_f(rn(reg), i1); + bfr(cc, i0, r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_bdi(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, jit_float64_t *i1) +{ + int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi_d(rn(reg), i1); + bdr(cc, i0, r0, rn(reg)); + jit_unget_reg(reg); +} + +static jit_word_t +_bfi_p(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, jit_float32_t *i1) +{ + jit_word_t w; + int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi_f(rn(reg), i1); + w = bfr_p(cc, i0, r0, rn(reg)); + jit_unget_reg(reg); + return (w); +} + +static jit_word_t +_bdi_p(jit_state_t *_jit, int32_t cc, + jit_word_t i0, int32_t r0, jit_float64_t *i1) +{ + jit_word_t w; + int32_t reg; + reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + movi_d(rn(reg), i1); + w = bdr_p(cc, i0, r0, rn(reg)); + jit_unget_reg(reg); + return (w); +} + +static jit_word_t +_buneqr(jit_state_t *_jit, int32_t db, + jit_word_t i0, int32_t r0, int32_t r1) +{ + jit_word_t unord, ne, w; + if (db) CDBR(r0, r1); + else CEBR(r0, r1); + unord = _jit->pc.w; + BRC(CC_O, 0); /* unord satisfies condition */ + ne = _jit->pc.w; + BRC(CC_NE, 0); /* ne does not satisfy condition */ + patch_at(unord, _jit->pc.w); + w = _jit->pc.w; + BRCL(CC_AL, (i0 - _jit->pc.w) >> 1); + patch_at(ne, _jit->pc.w); + return (w); +} + +static jit_word_t +_buneqi(jit_state_t *_jit, int32_t db, + jit_word_t i0, int32_t r0, jit_word_t i1) +{ + jit_word_t w; + int32_t reg; + reg = jit_get_reg(jit_class_fpr|jit_class_nospill); + if (db) + movi_d(rn(reg), (jit_float64_t *)i1); + else + movi_f(rn(reg), (jit_float32_t *)i1); + w = buneqr(db, i0, r0, rn(reg)); + jit_unget_reg(reg); + return (w); +} + +static jit_word_t +_bltgtr(jit_state_t *_jit, int32_t db, + jit_word_t i0, int32_t r0, int32_t r1) +{ + jit_word_t unord, eq, w; + if (db) CDBR(r0, r1); + else CEBR(r0, r1); + unord = _jit->pc.w; + BRC(CC_O, 0); /* unord does not satisfy condition */ + eq = _jit->pc.w; + BRC(CC_E, 0); /* eq does not satisfy condition */ + w = _jit->pc.w; + BRCL(CC_AL, (i0 - _jit->pc.w) >> 1); + patch_at(unord, _jit->pc.w); + patch_at(eq, _jit->pc.w); + return (w); +} + +static jit_word_t +_bltgti(jit_state_t *_jit, int32_t db, + jit_word_t i0, int32_t r0, jit_word_t i1) +{ + jit_word_t w; + int32_t reg; + reg = jit_get_reg(jit_class_fpr|jit_class_nospill); + if (db) + movi_d(rn(reg), (jit_float64_t *)i1); + else + movi_f(rn(reg), (jit_float32_t *)i1); + w = bltgtr(db, i0, r0, rn(reg)); + jit_unget_reg(reg); + return (w); +} + +static void +_movr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 != r1) + LER(r0, r1); +} + +static void +_movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t *i0) +{ + union { + int32_t i; + jit_float32_t f; + } data; + int32_t reg; + + if (*(int32_t *)i0 == 0) + LZER(r0); + else if (_jitc->no_data) { + data.f = *i0; + reg = jit_get_reg_but_zero(0); + movi(rn(reg), data.i & 0xffffffff); + stxi_i(-4, _FP_REGNO, rn(reg)); + jit_unget_reg_but_zero(reg); + ldxi_f(r0, _FP_REGNO, -4); + } + else + ldi_f(r0, (jit_word_t)i0); +} + +static void +_movr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 != r1) + LDR(r0, r1); +} + +static void +_movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t *i0) +{ + union { +#if __WORDSIZE == 32 + int32_t i[2]; +#else + int64_t l; +#endif + jit_float64_t d; + } data; + int32_t reg; + + if (*(int64_t *)i0 == 0) + LZDR(r0); + else if (_jitc->no_data) { + data.d = *i0; + reg = jit_get_reg_but_zero(0); +#if __WORDSIZE == 32 + movi(rn(reg), data.i[0]); + stxi_i(-8, _FP_REGNO, rn(reg)); + movi(rn(reg), data.i[1]); + stxi_i(-4, _FP_REGNO, rn(reg)); +#else + movi(rn(reg), data.l); + stxi_l(-8, _FP_REGNO, rn(reg)); +#endif + jit_unget_reg_but_zero(reg); + ldxi_d(r0, _FP_REGNO, -8); + } + else + ldi_d(r0, (jit_word_t)i0); +} + +static void +_addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) + AEBR(r0, r1); + else { + movr_f(r0, r1); + AEBR(r0, r2); + } +} + +static void +_addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) + ADBR(r0, r1); + else { + movr_d(r0, r1); + ADBR(r0, r2); + } +} + +static void +_subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t reg; + if (r0 == r2) { + reg = jit_get_reg(jit_class_fpr); + movr_f(rn(reg), r2); + movr_f(r0, r1); + SEBR(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + movr_f(r0, r1); + SEBR(r0, r2); + } +} + +static void +_subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t reg; + if (r0 == r2) { + reg = jit_get_reg(jit_class_fpr); + movr_d(rn(reg), r2); + movr_d(r0, r1); + SDBR(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + movr_d(r0, r1); + SDBR(r0, r2); + } +} + +static void +_mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) + MEEBR(r0, r1); + else { + movr_f(r0, r1); + MEEBR(r0, r2); + } +} + +static void +_mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) + MDBR(r0, r1); + else { + movr_d(r0, r1); + MDBR(r0, r2); + } +} + +static void +_divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t reg; + if (r0 == r2) { + reg = jit_get_reg(jit_class_fpr); + movr_f(rn(reg), r2); + movr_f(r0, r1); + DEBR(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + movr_f(r0, r1); + DEBR(r0, r2); + } +} + +static void +_divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t reg; + if (r0 == r2) { + reg = jit_get_reg(jit_class_fpr); + movr_d(rn(reg), r2); + movr_d(r0, r1); + DDBR(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + movr_d(r0, r1); + DDBR(r0, r2); + } +} + +static void +_ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + int32_t reg; + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + ldr_f(r0, rn(reg)); + jit_unget_reg_but_zero(reg); +} + +static void +_ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + int32_t reg; + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + ldr_d(r0, rn(reg)); + jit_unget_reg_but_zero(reg); +} + +static void +_ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t reg; + reg = jit_get_reg_but_zero(0); + movr(rn(reg), r1); + addr(rn(reg), rn(reg), r2); + ldr_f(r0, rn(reg)); + jit_unget_reg_but_zero(reg); +} + +static void +_ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t reg; + reg = jit_get_reg_but_zero(0); + movr(rn(reg), r1); + addr(rn(reg), rn(reg), r2); + ldr_d(r0, rn(reg)); + jit_unget_reg_but_zero(reg); +} + +static void +_ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + if (u12_p(i0)) + LE(r0, i0, 0, r1); + else if (s20_p(i0)) + LEY(r0, x20(i0), 0, r1); + else { + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + addr(rn(reg), rn(reg), r1); + ldr_f(r0, rn(reg)); + jit_unget_reg_but_zero(reg); + } +} + +static void +_ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + int32_t reg; + if (u12_p(i0)) + LD(r0, i0, 0, r1); + else if (s20_p(i0)) + LDY(r0, x20(i0), 0, r1); + else { + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + addr(rn(reg), rn(reg), r1); + ldr_d(r0, rn(reg)); + jit_unget_reg_but_zero(reg); + } +} + +static void +_sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + int32_t reg; + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + str_f(rn(reg), r0); + jit_unget_reg_but_zero(reg); +} + +static void +_sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + int32_t reg; + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + str_d(rn(reg), r0); + jit_unget_reg_but_zero(reg); +} + +static void +_stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t reg; + reg = jit_get_reg_but_zero(0); + movr(rn(reg), r0); + addr(rn(reg), rn(reg), r1); + str_f(rn(reg), r2); + jit_unget_reg_but_zero(reg); +} + +static void +_stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + int32_t reg; + reg = jit_get_reg_but_zero(0); + movr(rn(reg), r0); + addr(rn(reg), rn(reg), r1); + str_d(rn(reg), r2); + jit_unget_reg_but_zero(reg); +} + +static void +_stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + int32_t reg; + if (u12_p(i0)) + STE(r1, i0, 0, r0); + else if (s20_p(i0)) + STEY(r1, x20(i0), 0, r0); + else { + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + addr(rn(reg), rn(reg), r0); + str_f(rn(reg), r1); + jit_unget_reg_but_zero(reg); + } +} + +static void +_stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + int32_t reg; + if (u12_p(i0)) + STD(r1, i0, 0, r0); + else if (s20_p(i0)) + STDY(r1, x20(i0), 0, r0); + else { + reg = jit_get_reg_but_zero(0); + movi(rn(reg), i0); + addr(rn(reg), rn(reg), r0); + str_d(rn(reg), r1); + jit_unget_reg_but_zero(reg); + } +} + +static void +_uneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_word_t unord, eq; + movi(r0, 1); /* set to one */ + CEBR(r1, r2); + unord = _jit->pc.w; /* keep set to one if unord */ + BRC(CC_O, 0); + eq = _jit->pc.w; + BRC(CC_E, 0); /* keep set to one if eq */ + movi(r0, 0); /* set to zero */ + patch_at(unord, _jit->pc.w); + patch_at(eq, _jit->pc.w); +} + +static void +_uneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_word_t unord, eq; + movi(r0, 1); /* set to one */ + CDBR(r1, r2); + unord = _jit->pc.w; /* keep set to one if unord */ + BRC(CC_O, 0); + eq = _jit->pc.w; + BRC(CC_E, 0); /* keep set to one if eq */ + movi(r0, 0); /* set to zero */ + patch_at(unord, _jit->pc.w); + patch_at(eq, _jit->pc.w); +} + +static void +_ltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_word_t unord, eq; + movi(r0, 0); /* set to zero */ + CEBR(r1, r2); + unord = _jit->pc.w; /* keep set to zero if unord */ + BRC(CC_O, 0); + eq = _jit->pc.w; + BRC(CC_E, 0); /* keep set to zero if eq */ + movi(r0, 1); /* set to one */ + patch_at(unord, _jit->pc.w); + patch_at(eq, _jit->pc.w); +} + +static void +_ltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_word_t unord, eq; + movi(r0, 0); /* set to zero */ + CDBR(r1, r2); + unord = _jit->pc.w; /* keep set to zero if unord */ + BRC(CC_O, 0); + eq = _jit->pc.w; + BRC(CC_E, 0); /* keep set to zero if eq */ + movi(r0, 1); /* set to one */ + patch_at(unord, _jit->pc.w); + patch_at(eq, _jit->pc.w); +} + +static void +_vaarg_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + int32_t rg0; + int32_t rg1; + int32_t rg2; + jit_word_t ge_code; + jit_word_t lt_code; + + assert(_jitc->function->self.call & jit_call_varargs); + + rg0 = jit_get_reg_but_zero(jit_class_gpr); + rg1 = jit_get_reg_but_zero(jit_class_gpr); + + /* Load the fp offset in save area in the first temporary. */ + ldxi(rn(rg0), r1, offsetof(jit_va_list_t, fpoff)); + + /* Jump over if there are no remaining arguments in the save area. */ + ge_code = bgei_p(_jit->pc.w, rn(rg0), NUM_FLOAT_REG_ARGS); + + /* Load the save area pointer in the second temporary. */ + ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save)); + + /* Scale offset. */ + rg2 = jit_get_reg_but_zero(0); + lshi(rn(rg2), rn(rg0), 3); + /* Add offset to saved area */ + addi(rn(rg2), rn(rg2), 16 * sizeof(jit_word_t)); + + /* Load the vararg argument in the first argument. */ + ldxr_d(r0, rn(rg1), rn(rg2)); + jit_unget_reg_but_zero(rg2); + + /* Update the fp offset. */ + addi(rn(rg0), rn(rg0), 1); + stxi(offsetof(jit_va_list_t, fpoff), r1, rn(rg0)); + + /* Will only need one temporary register below. */ + jit_unget_reg_but_zero(rg1); + + /* Jump over overflow code. */ + lt_code = jmpi_p(_jit->pc.w); + + /* Where to land if argument is in overflow area. */ + patch_at(ge_code, _jit->pc.w); + + /* Load overflow pointer. */ + ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over)); + + /* Load argument. */ + ldr_d(r0, rn(rg0)); + + /* Update overflow pointer. */ + addi(rn(rg0), rn(rg0), sizeof(jit_float64_t)); + stxi(offsetof(jit_va_list_t, over), r1, rn(rg0)); + + /* Where to land if argument is in save area. */ + patch_at(lt_code, _jit->pc.w); + + jit_unget_reg_but_zero(rg0); +} +#endif diff --git a/deps/lightening/lightening/s390.c b/deps/lightening/lightening/s390.c new file mode 100644 index 0000000..41e0de4 --- /dev/null +++ b/deps/lightening/lightening/s390.c @@ -0,0 +1,1691 @@ +/* + * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +# define JIT_SP _R15 +# define JIT_RET _R2 +# define JIT_FRET _F0 + +#if __WORDSIZE == 32 +# define NUM_FLOAT_REG_ARGS 2 +#else +# define NUM_FLOAT_REG_ARGS 4 +#endif +#define jit_arg_reg_p(i) ((i) >= 0 && (i) < 5) +#define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < NUM_FLOAT_REG_ARGS) + +/* + * Types + */ +typedef struct jit_va_list { + /* The offsets are "1" based, as addresses are fixed in the + * standard stack frame format. */ + jit_word_t gpoff; + jit_word_t fpoff; + + /* Easier when there is an explicitly defined type... +(gdb) ptype ap +type = struct __va_list_tag { + long __gpr; + long __fpr; + void *__overflow_arg_area; + void *__reg_save_area; + + Note that gopff (__gpr) and fpoff (__fpr) are jit_word_t equivalent + and, again, "1" (unit) based, so must be adjusted at va_arg time. + */ + jit_pointer_t over; + jit_pointer_t save; + + /* For variadic functions, always allocate space to save callee + * save fpr registers. + * Note that s390 has a standard stack frame format that lightning + * does not fully comply with, but for variadic functions it must, + * for those (variadic) do not use the "empty" spaces for any + * callee save fpr register, but save them after the va_list + * space; and use the standard stack frame format, as required + * by variadic functions (and have a compatible va_list pointer). */ + jit_float64_t f8; + jit_float64_t f9; + jit_float64_t f10; + jit_float64_t f11; + jit_float64_t f12; + jit_float64_t f13; + jit_float64_t f14; + jit_float64_t f15; +} jit_va_list_t; + +/* + * Prototypes + */ +#define jit_get_reg_pair() _jit_get_reg_pair(_jit) +static int32_t _jit_get_reg_pair(jit_state_t*); +#define jit_unget_reg_pair(regno) _jit_unget_reg_pair(_jit,regno) +static void _jit_unget_reg_pair(jit_state_t*,int32_t); +#define jit_get_reg_but_zero(flags) _jit_get_reg_but_zero(_jit,flags) +static int32_t _jit_get_reg_but_zero(jit_state_t*,int32_t); +#define jit_unget_reg_but_zero(reg) jit_unget_reg(reg) +#define patch(instr, node) _patch(_jit, instr, node) +static void _patch(jit_state_t*,jit_word_t,jit_node_t*); + +/* libgcc */ +extern void __clear_cache(void *, void *); + +#define PROTO 1 +# include "s390-cpu.c" +# include "s390-fpu.c" +#undef PROTO + +/* + * Initialization + */ +static const jit_register_t _rvs[] = { + { rc(gpr) | 0x0, "%r0" }, + { rc(gpr) | 0x1, "%r1" }, + { rc(gpr) | rc(sav) | 0xc, "%r12" }, + { rc(gpr) | rc(sav) | 0xb, "%r11" }, + { rc(gpr) | rc(sav) | 0xa, "%r10" }, + { rc(gpr) | rc(sav) | 0x9, "%r9" }, + { rc(gpr) | rc(sav) | 0x8, "%r8" }, + { rc(gpr) | rc(sav) | 0x7, "%r7" }, + { rc(gpr) | rc(arg) | rc(sav) | 0x6,"%r6" }, + { rc(gpr) | rc(arg) | 0x5, "%r5" }, + { rc(gpr) | rc(arg) | 0x4, "%r4" }, + { rc(gpr) | rc(arg) | 0x3, "%r3" }, + { rc(gpr) | rc(arg) | 0x2, "%r2" }, + { rc(sav) | 0xd, "%r13" }, /* used as JIT_FP */ + { 0xe, "%r14" }, + { rc(sav) | 0xf, "%r15" }, + { rc(fpr) | 0x1, "%f1" }, + { rc(fpr) | 0x3, "%f3" }, + { rc(fpr) | 0x5, "%f5" }, + { rc(fpr) | 0x7, "%f7" }, + { rc(fpr) | rc(sav) | 0xe, "%f14" }, + /* Do not use as temporary to simplify stack layout */ + { 0xf, "%f15" }, + { rc(fpr) | rc(sav) | 0x8, "%f8" }, + { rc(fpr) | rc(sav) | 0x9, "%f9" }, + { rc(fpr) | rc(sav) | 0xa, "%f10" }, + { rc(fpr) | rc(sav) | 0xb, "%f11" }, + { rc(fpr) | rc(sav) | 0xc, "%f12" }, + { rc(fpr) | rc(sav) | 0xd, "%f13" }, + { rc(fpr) | rc(arg) | 0x6, "%f6" }, + { rc(fpr) | rc(arg) | 0x4, "%f4" }, + { rc(fpr) | rc(arg) | 0x2, "%f2" }, + { rc(fpr) | rc(arg) | 0x0, "%f0" }, + { _NOREG, "<none>" }, +}; + +/* + * Implementation + */ +void +jit_get_cpu(void) +{ +} + +void +_jit_init(jit_state_t *_jit) +{ + _jitc->reglen = jit_size(_rvs) - 1; +} + +void +_jit_prolog(jit_state_t *_jit) +{ + int32_t offset; + + if (_jitc->function) + jit_epilog(); + assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0); + jit_regset_set_ui(&_jitc->regsav, 0); + offset = _jitc->functions.offset; + if (offset >= _jitc->functions.length) { + jit_realloc((jit_pointer_t *)&_jitc->functions.ptr, + _jitc->functions.length * sizeof(jit_function_t), + (_jitc->functions.length + 16) * sizeof(jit_function_t)); + _jitc->functions.length += 16; + } + _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++; + _jitc->function->self.size = stack_framesize; + _jitc->function->self.argi = _jitc->function->self.argf = + _jitc->function->self.aoff = _jitc->function->self.alen = 0; + /* preallocate 8 bytes if not using a constant data buffer */ + if (_jitc->no_data) + _jitc->function->self.aoff = -8; + _jitc->function->self.call = jit_call_default; + jit_alloc((jit_pointer_t *)&_jitc->function->regoff, + _jitc->reglen * sizeof(int32_t)); + + /* _no_link here does not mean the jit_link() call can be removed + * by rewriting as: + * _jitc->function->prolog = jit_new_node(jit_code_prolog); + */ + _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog); + jit_link(_jitc->function->prolog); + _jitc->function->prolog->w.w = offset; + _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog); + /* u: label value + * v: offset in blocks vector + * w: offset in functions vector + */ + _jitc->function->epilog->w.w = offset; + + jit_regset_new(&_jitc->function->regset); +} + +int32_t +_jit_allocai(jit_state_t *_jit, int32_t length) +{ + assert(_jitc->function); + switch (length) { + case 0: case 1: break; + case 2: _jitc->function->self.aoff &= -2; break; + case 3: case 4: _jitc->function->self.aoff &= -4; break; + default: _jitc->function->self.aoff &= -8; break; + } + _jitc->function->self.aoff -= length; + if (!_jitc->realize) { + jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length); + jit_dec_synth(); + } + return (_jitc->function->self.aoff); +} + +void +_jit_allocar(jit_state_t *_jit, int32_t u, int32_t v) +{ + int32_t reg; + assert(_jitc->function); + jit_inc_synth_ww(allocar, u, v); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(int32_t)); + _jitc->function->allocar = 1; + } + reg = jit_get_reg(jit_class_gpr); + jit_negr(reg, v); + jit_andi(reg, reg, -8); + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, reg); + jit_addr(JIT_SP, JIT_SP, reg); + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); + jit_dec_synth(); +} + +void +_jit_ret(jit_state_t *_jit) +{ + jit_node_t *instr; + assert(_jitc->function); + jit_inc_synth(ret); + /* jump to epilog */ + instr = jit_jmpi(); + jit_patch_at(instr, _jitc->function->epilog); + jit_dec_synth(); +} + +void +_jit_retr(jit_state_t *_jit, int32_t u) +{ + jit_inc_synth_w(retr, u); + jit_movr(JIT_RET, u); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_reti(jit_state_t *_jit, jit_word_t u) +{ + jit_inc_synth_w(reti, u); + jit_movi(JIT_RET, u); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_retr_f(jit_state_t *_jit, int32_t u) +{ + jit_inc_synth_w(retr_f, u); + jit_movr_f(JIT_FRET, u); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_reti_f(jit_state_t *_jit, jit_float32_t u) +{ + jit_inc_synth_f(reti_f, u); + jit_movi_f(JIT_FRET, u); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_retr_d(jit_state_t *_jit, int32_t u) +{ + jit_inc_synth_w(retr_d, u); + jit_movr_d(JIT_FRET, u); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_reti_d(jit_state_t *_jit, jit_float64_t u) +{ + jit_inc_synth_d(reti_d, u); + jit_movi_d(JIT_FRET, u); + jit_ret(); + jit_dec_synth(); +} + +void +_jit_epilog(jit_state_t *_jit) +{ + assert(_jitc->function); + assert(_jitc->function->epilog->next == NULL); + jit_link(_jitc->function->epilog); + _jitc->function = NULL; +} + +jit_bool_t +_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) +{ + if (u->code == jit_code_arg) + return (jit_arg_reg_p(u->u.w)); + assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); + return (jit_arg_f_reg_p(u->u.w)); +} + +void +_jit_ellipsis(jit_state_t *_jit) +{ + jit_inc_synth(ellipsis); + if (_jitc->prepare) { + jit_link_prepare(); + assert(!(_jitc->function->call.call & jit_call_varargs)); + _jitc->function->call.call |= jit_call_varargs; + } + else { + jit_link_prolog(); + assert(!(_jitc->function->self.call & jit_call_varargs)); + _jitc->function->self.call |= jit_call_varargs; + + /* Allocate va_list like object in the stack. */ + _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t)); + + /* Initialize gp offset in save area. */ + if (jit_arg_reg_p(_jitc->function->self.argi)) + _jitc->function->vagp = _jitc->function->self.argi; + else + _jitc->function->vagp = 5; + + /* Initialize fp offset in save area. */ + if (jit_arg_f_reg_p(_jitc->function->self.argf)) + _jitc->function->vafp = _jitc->function->self.argf; + else + _jitc->function->vafp = NUM_FLOAT_REG_ARGS; + } + jit_dec_synth(); +} + +void +_jit_va_push(jit_state_t *_jit, int32_t u) +{ + jit_inc_synth_w(va_push, u); + jit_pushargr(u); + jit_dec_synth(); +} + +jit_node_t * +_jit_arg(jit_state_t *_jit) +{ + jit_node_t *node; + int32_t offset; + assert(_jitc->function); + if (jit_arg_reg_p(_jitc->function->self.argi)) + offset = _jitc->function->self.argi++; + else { + offset = _jitc->function->self.size; + _jitc->function->self.size += sizeof(jit_word_t); + } + node = jit_new_node_ww(jit_code_arg, offset, + ++_jitc->function->self.argn); + jit_link_prolog(); + return (node); +} + +jit_node_t * +_jit_arg_f(jit_state_t *_jit) +{ + jit_node_t *node; + int32_t offset; + assert(_jitc->function); + if (jit_arg_f_reg_p(_jitc->function->self.argf)) + offset = _jitc->function->self.argf++; + else { + offset = _jitc->function->self.size; + _jitc->function->self.size += sizeof(jit_word_t); + } + node = jit_new_node_ww(jit_code_arg_f, offset, + ++_jitc->function->self.argn); + jit_link_prolog(); + return (node); +} + +jit_node_t * +_jit_arg_d(jit_state_t *_jit) +{ + jit_node_t *node; + int32_t offset; + assert(_jitc->function); + if (jit_arg_f_reg_p(_jitc->function->self.argf)) + offset = _jitc->function->self.argf++; + else { + offset = _jitc->function->self.size; + _jitc->function->self.size += sizeof(jit_float64_t); + } + node = jit_new_node_ww(jit_code_arg_d, offset, + ++_jitc->function->self.argn); + jit_link_prolog(); + return (node); +} + +void +_jit_getarg_c(jit_state_t *_jit, int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg); + jit_inc_synth_wp(getarg_c, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_c(u, _R2 - v->u.w); + else + jit_ldxi_c(u, JIT_FP, + v->u.w + (__WORDSIZE >> 3) - sizeof(int8_t)); + jit_dec_synth(); +} + +void +_jit_getarg_uc(jit_state_t *_jit, int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg); + jit_inc_synth_wp(getarg_uc, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_uc(u, _R2 - v->u.w); + else + jit_ldxi_uc(u, JIT_FP, + v->u.w + (__WORDSIZE >> 3) - sizeof(uint8_t)); + jit_dec_synth(); +} + +void +_jit_getarg_s(jit_state_t *_jit, int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg); + jit_inc_synth_wp(getarg_s, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_s(u, _R2 - v->u.w); + else + jit_ldxi_s(u, JIT_FP, + v->u.w + (__WORDSIZE >> 3) - sizeof(int16_t)); + jit_dec_synth(); +} + +void +_jit_getarg_us(jit_state_t *_jit, int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg); + jit_inc_synth_wp(getarg_us, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_us(u, _R2 - v->u.w); + else + jit_ldxi_us(u, JIT_FP, + v->u.w + (__WORDSIZE >> 3) - sizeof(uint16_t)); + jit_dec_synth(); +} + +void +_jit_getarg_i(jit_state_t *_jit, int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg); + jit_inc_synth_wp(getarg_i, u, v); + if (jit_arg_reg_p(v->u.w)) { +#if __WORDSIZE == 32 + jit_movr(u, _R2 - v->u.w); +#else + jit_extr_i(u, _R2 - v->u.w); +#endif + } + else + jit_ldxi_i(u, JIT_FP, + v->u.w + (__WORDSIZE >> 3) - sizeof(int32_t)); + jit_dec_synth(); +} + +#if __WORDSIZE == 64 +void +_jit_getarg_ui(jit_state_t *_jit, int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg); + jit_inc_synth_wp(getarg_ui, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_ui(u, _R2 - v->u.w); + else + jit_ldxi_ui(u, JIT_FP, + v->u.w + (__WORDSIZE >> 3) - sizeof(uint32_t)); + jit_dec_synth(); +} + +void +_jit_getarg_l(jit_state_t *_jit, int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg); + jit_inc_synth_wp(getarg_l, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_movr(u, _R2 - v->u.w); + else + jit_ldxi_l(u, JIT_FP, v->u.w); + jit_dec_synth(); +} +#endif + +void +_jit_putargr(jit_state_t *_jit, int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg); + jit_inc_synth_wp(putargr, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_movr(_R2 - v->u.w, u); + else + jit_stxi(v->u.w, JIT_FP, u); + jit_dec_synth(); +} + +void +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +{ + int32_t regno; + assert(v->code == jit_code_arg); + jit_inc_synth_wp(putargi, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_movi(_R2 - v->u.w, u); + else { + regno = jit_get_reg(jit_class_gpr); + jit_movi(regno, u); + jit_stxi(v->u.w, JIT_FP, regno); + jit_unget_reg(regno); + } + jit_dec_synth(); +} + +void +_jit_getarg_f(jit_state_t *_jit, int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg_f); + jit_inc_synth_wp(getarg_f, u, v); + if (jit_arg_f_reg_p(v->u.w)) + jit_movr_f(u, _F0 - v->u.w); + else + jit_ldxi_f(u, JIT_FP, + v->u.w +#if __WORDSIZE == 64 + + (__WORDSIZE >> 3) - sizeof(jit_float32_t) +#endif + ); + jit_dec_synth(); +} + +void +_jit_putargr_f(jit_state_t *_jit, int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg_f); + jit_inc_synth_wp(putargr_f, u, v); + if (jit_arg_f_reg_p(v->u.w)) + jit_movr_f(_F0 - v->u.w, u); + else + jit_stxi_f(v->u.w +#if __WORDSIZE == 64 + + (__WORDSIZE >> 3) - sizeof(jit_float32_t) +#endif + , JIT_FP, u); + jit_dec_synth(); +} + +void +_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v) +{ + int32_t regno; + assert(v->code == jit_code_arg_f); + jit_inc_synth_fp(putargi_f, u, v); + if (jit_arg_f_reg_p(v->u.w)) + jit_movi_f(_F0 - v->u.w, u); + else { + regno = jit_get_reg(jit_class_fpr); + jit_movi_f(regno, u); + jit_stxi_f(v->u.w +#if __WORDSIZE == 64 + + (__WORDSIZE >> 3) - sizeof(jit_float32_t) +#endif + , JIT_FP, regno); + jit_unget_reg(regno); + } + jit_dec_synth(); +} + +void +_jit_getarg_d(jit_state_t *_jit, int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg_d); + jit_inc_synth_wp(getarg_d, u, v); + if (jit_arg_f_reg_p(v->u.w)) + jit_movr_d(u, _F0 - v->u.w); + else + jit_ldxi_d(u, JIT_FP, v->u.w); + jit_dec_synth(); +} + +void +_jit_putargr_d(jit_state_t *_jit, int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg_d); + jit_inc_synth_wp(putargr_d, u, v); + if (jit_arg_f_reg_p(v->u.w)) + jit_movr_d(_F0 - v->u.w, u); + else + jit_stxi_d(v->u.w, JIT_FP, u); + jit_dec_synth(); +} + +void +_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) +{ + int32_t regno; + assert(v->code == jit_code_arg_d); + jit_inc_synth_dp(putargi_d, u, v); + if (jit_arg_f_reg_p(v->u.w)) + jit_movi_d(_F0 - v->u.w, u); + else { + regno = jit_get_reg(jit_class_fpr); + jit_movi_d(regno, u); + jit_stxi_d(v->u.w, JIT_FP, regno); + jit_unget_reg(regno); + } + jit_dec_synth(); +} + +void +_jit_pushargr(jit_state_t *_jit, int32_t u) +{ + assert(_jitc->function); + jit_inc_synth_w(pushargr, u); + jit_link_prepare(); + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movr(_R2 - _jitc->function->call.argi, u); + ++_jitc->function->call.argi; + } + else { + jit_stxi(_jitc->function->call.size + stack_framesize, JIT_SP, u); + _jitc->function->call.size += sizeof(jit_word_t); + } + jit_dec_synth(); +} + +void +_jit_pushargi(jit_state_t *_jit, jit_word_t u) +{ + int32_t regno; + assert(_jitc->function); + jit_inc_synth_w(pushargi, u); + jit_link_prepare(); + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movi(_R2 - _jitc->function->call.argi, u); + ++_jitc->function->call.argi; + } + else { + regno = jit_get_reg(jit_class_gpr); + jit_movi(regno, u); + jit_stxi(_jitc->function->call.size + stack_framesize, JIT_SP, regno); + jit_unget_reg(regno); + _jitc->function->call.size += sizeof(jit_word_t); + } + jit_dec_synth(); +} + +void +_jit_pushargr_f(jit_state_t *_jit, int32_t u) +{ + assert(_jitc->function); + jit_inc_synth_w(pushargr_f, u); + jit_link_prepare(); + if (jit_arg_f_reg_p(_jitc->function->call.argf)) { + jit_movr_f(_F0 - _jitc->function->call.argf, u); + ++_jitc->function->call.argf; + } + else { + jit_stxi_f(_jitc->function->call.size + stack_framesize +#if __WORDSIZE == 64 + + (__WORDSIZE >> 3) - sizeof(jit_float32_t) +#endif + , JIT_SP, u); + _jitc->function->call.size += sizeof(jit_word_t); + } + jit_dec_synth(); +} + +void +_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) +{ + int32_t regno; + assert(_jitc->function); + jit_inc_synth_f(pushargi_f, u); + jit_link_prepare(); + if (jit_arg_f_reg_p(_jitc->function->call.argf)) { + jit_movi_f(_F0 - _jitc->function->call.argf, u); + ++_jitc->function->call.argf; + } + else { + regno = jit_get_reg(jit_class_fpr); + jit_movi_f(regno, u); + jit_stxi_f(_jitc->function->call.size + stack_framesize +#if __WORDSIZE == 64 + + (__WORDSIZE >> 3) - sizeof(jit_float32_t) +#endif + , JIT_SP, regno); + jit_unget_reg(regno); + _jitc->function->call.size += sizeof(jit_word_t); + } + jit_dec_synth(); +} + +void +_jit_pushargr_d(jit_state_t *_jit, int32_t u) +{ + assert(_jitc->function); + jit_inc_synth_w(pushargr_d, u); + jit_link_prepare(); + if (jit_arg_f_reg_p(_jitc->function->call.argf)) { + jit_movr_d(_F0 - _jitc->function->call.argf, u); + ++_jitc->function->call.argf; + } + else { + jit_stxi_d(_jitc->function->call.size + stack_framesize, JIT_SP, u); + _jitc->function->call.size += sizeof(jit_float64_t); + } + jit_dec_synth(); +} + +void +_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) +{ + int32_t regno; + assert(_jitc->function); + jit_inc_synth_d(pushargi_d, u); + jit_link_prepare(); + if (jit_arg_f_reg_p(_jitc->function->call.argf)) { + jit_movi_d(_F0 - _jitc->function->call.argf, u); + ++_jitc->function->call.argf; + } + else { + regno = jit_get_reg(jit_class_fpr); + jit_movi_d(regno, u); + jit_stxi_d(_jitc->function->call.size + stack_framesize, JIT_SP, regno); + jit_unget_reg(regno); + _jitc->function->call.size += sizeof(jit_float64_t); + } + jit_dec_synth(); +} + +jit_bool_t +_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, int32_t regno) +{ + int32_t spec; + spec = jit_class(_rvs[regno].spec); + if (spec & jit_class_arg) { + regno = _R2 - regno; + if (regno >= 0 && regno < node->v.w) + return (1); + if (spec & jit_class_fpr) { + regno = _F0 - regno; + if (regno >= 0 && regno < node->w.w) + return (1); + } + } + return (0); +} + +void +_jit_finishr(jit_state_t *_jit, int32_t r0) +{ + jit_node_t *call; + assert(_jitc->function); + jit_inc_synth_w(finishr, r0); + if (_jitc->function->self.alen < _jitc->function->call.size) + _jitc->function->self.alen = _jitc->function->call.size; + call = jit_callr(r0); + call->v.w = _jitc->function->call.argi; + call->w.w = _jitc->function->call.argf; + _jitc->function->call.argi = _jitc->function->call.argf = + _jitc->function->call.size = 0; + _jitc->prepare = 0; + jit_dec_synth(); +} + +jit_node_t * +_jit_finishi(jit_state_t *_jit, jit_pointer_t i0) +{ + jit_node_t *node; + assert(_jitc->function); + jit_inc_synth_w(finishi, (jit_word_t)i0); + if (_jitc->function->self.alen < _jitc->function->call.size) + _jitc->function->self.alen = _jitc->function->call.size; + node = jit_calli(i0); + node->v.w = _jitc->function->call.argi; + node->w.w = _jitc->function->call.argf; + _jitc->function->call.argi = _jitc->function->call.argf = + _jitc->function->call.size = 0; + _jitc->prepare = 0; + jit_dec_synth(); + return (node); +} + +void +_jit_retval_c(jit_state_t *_jit, int32_t r0) +{ + jit_inc_synth_w(retval_c, r0); + jit_extr_c(r0, JIT_RET); + jit_dec_synth(); +} + +void +_jit_retval_uc(jit_state_t *_jit, int32_t r0) +{ + jit_inc_synth_w(retval_uc, r0); + jit_extr_uc(r0, JIT_RET); + jit_dec_synth(); +} + +void +_jit_retval_s(jit_state_t *_jit, int32_t r0) +{ + jit_inc_synth_w(retval_s, r0); + jit_extr_s(r0, JIT_RET); + jit_dec_synth(); +} + +void +_jit_retval_us(jit_state_t *_jit, int32_t r0) +{ + jit_inc_synth_w(retval_us, r0); + jit_extr_us(r0, JIT_RET); + jit_dec_synth(); +} + +void +_jit_retval_i(jit_state_t *_jit, int32_t r0) +{ + jit_inc_synth_w(retval_i, r0); +#if __WORDSIZE == 64 + jit_extr_i(r0, JIT_RET); +#else + jit_movr(r0, JIT_RET); +#endif + jit_dec_synth(); +} + +#if __WORDSIZE == 64 +void +_jit_retval_ui(jit_state_t *_jit, int32_t r0) +{ + jit_inc_synth_w(retval_ui, r0); + jit_extr_ui(r0, JIT_RET); + jit_dec_synth(); +} + +void +_jit_retval_l(jit_state_t *_jit, int32_t r0) +{ + jit_inc_synth_w(retval_l, r0); + jit_movr(r0, JIT_RET); + jit_dec_synth(); +} +#endif + +void +_jit_retval_f(jit_state_t *_jit, int32_t r0) +{ + jit_inc_synth_w(retval_f, r0); + jit_movr_f(r0, JIT_FRET); + jit_dec_synth(); +} + +void +_jit_retval_d(jit_state_t *_jit, int32_t r0) +{ + jit_inc_synth_w(retval_d, r0); + jit_movr_d(r0, JIT_FRET); + jit_dec_synth(); +} + +jit_pointer_t +_emit_code(jit_state_t *_jit) +{ + jit_node_t *node; + jit_node_t *temp; + jit_word_t word; + int32_t value; + int32_t offset; + struct { + jit_node_t *node; + jit_word_t word; +#if DEVEL_DISASSEMBLER + jit_word_t prevw; +#endif + int32_t patch_offset; + } undo; +#if DEVEL_DISASSEMBLER + jit_word_t prevw; +#endif + + _jitc->function = NULL; + + jit_reglive_setup(); + + undo.word = 0; + undo.node = NULL; + undo.patch_offset = 0; + +#define assert_data(node) /**/ +#define case_rr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.w)); \ + break +#define case_rw(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.w), node->v.w); \ + break +#define case_wr(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(node->u.w, rn(node->v.w)); \ + break +#define case_rrr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_rrrr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_rrw(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ + break +#define case_rrrw(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.q.l), rn(node->u.q.h), \ + rn(node->v.w), node->w.w); \ + break +#define case_rrf(name) \ + case jit_code_##name##i_f: \ + assert_data(node); \ + name##i_f(rn(node->u.w), rn(node->v.w), \ + (jit_float32_t *)node->w.n->u.w); \ + break +#define case_rrd(name) \ + case jit_code_##name##i_d: \ + assert_data(node); \ + name##i_d(rn(node->u.w), rn(node->v.w), \ + (jit_float64_t *)node->w.n->u.w); \ + break +#define case_wrr(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \ + break +#define case_brr(name, type) \ + case jit_code_##name##r##type: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##r##type(temp->u.w, rn(node->v.w), \ + rn(node->w.w)); \ + else { \ + word = name##r##type##_p(_jit->pc.w, \ + rn(node->v.w), \ + rn(node->w.w)); \ + patch(word, node); \ + } \ + break +#define case_brw(name, type) \ + case jit_code_##name##i##type: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##i##type(temp->u.w, \ + rn(node->v.w), node->w.w); \ + else { \ + word = name##i##type##_p(_jit->pc.w, \ + rn(node->v.w), node->w.w); \ + patch(word, node); \ + } \ + break; +#define case_brf(name) \ + case jit_code_##name##i_f: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##i_f(temp->u.w, rn(node->v.w), \ + (jit_float32_t *)node->w.n->u.w); \ + else { \ + word = name##i_f_p(_jit->pc.w, rn(node->v.w), \ + (jit_float32_t *)node->w.n->u.w);\ + patch(word, node); \ + } \ + break +#define case_brd(name) \ + case jit_code_##name##i_d: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##i_d(temp->u.w, rn(node->v.w), \ + (jit_float64_t *)node->w.n->u.w); \ + else { \ + word = name##i_d_p(_jit->pc.w, rn(node->v.w), \ + (jit_float64_t *)node->w.n->u.w);\ + patch(word, node); \ + } \ + break +#if DEVEL_DISASSEMBLER + prevw = _jit->pc.w; +#endif + for (node = _jitc->head; node; node = node->next) { + if (_jit->pc.uc >= _jitc->code.end) + return (NULL); + +#if DEVEL_DISASSEMBLER + node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw; + prevw = _jit->pc.w; +#endif + value = jit_classify(node->code); + jit_regarg_set(node, value); + switch (node->code) { + case jit_code_align: + assert(!(node->u.w & (node->u.w - 1)) && + node->u.w <= sizeof(jit_word_t)); + if (node->u.w == sizeof(jit_word_t) && + (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) + nop(sizeof(jit_word_t) - word); + break; + case jit_code_note: case jit_code_name: + node->u.w = _jit->pc.w; + break; + case jit_code_label: + if ((node->link || (node->flag & jit_flag_use)) && + (word = _jit->pc.w & 3)) + nop(4 - word); + /* remember label is defined */ + node->flag |= jit_flag_patch; + node->u.w = _jit->pc.w; + break; + case_rrr(add,); + case_rrw(add,); + case_rrr(addc,); + case_rrw(addc,); + case_rrr(addx,); + case_rrw(addx,); + case_rrr(sub,); + case_rrw(sub,); + case_rrr(subc,); + case_rrw(subc,); + case_rrr(subx,); + case_rrw(subx,); + case_rrw(rsb,); + case_rrr(mul,); + case_rrw(mul,); + case_rrrr(qmul,); + case_rrrw(qmul,); + case_rrrr(qmul, _u); + case_rrrw(qmul, _u); + case_rrr(div,); + case_rrw(div,); + case_rrr(div, _u); + case_rrw(div, _u); + case_rrr(rem,); + case_rrw(rem,); + case_rrr(rem, _u); + case_rrw(rem, _u); + case_rrrr(qdiv,); + case_rrrw(qdiv,); + case_rrrr(qdiv, _u); + case_rrrw(qdiv, _u); + case_rrr(lsh,); + case_rrw(lsh,); + case_rrr(rsh,); + case_rrw(rsh,); + case_rrr(rsh, _u); + case_rrw(rsh, _u); + case_rr(neg,); + case_rr(com,); + case_rrr(and,); + case_rrw(and,); + case_rrr(or,); + case_rrw(or,); + case_rrr(xor,); + case_rrw(xor,); + case_rr(trunc, _f_i); + case_rr(trunc, _d_i); +#if __WORDSIZE == 64 + case_rr(trunc, _f_l); + case_rr(trunc, _d_l); +#endif + case_rr(ld, _c); + case_rw(ld, _c); + case_rr(ld, _uc); + case_rw(ld, _uc); + case_rr(ld, _s); + case_rw(ld, _s); + case_rr(ld, _us); + case_rw(ld, _us); + case_rr(ld, _i); + case_rw(ld, _i); +#if __WORDSIZE == 64 + case_rr(ld, _ui); + case_rw(ld, _ui); + case_rr(ld, _l); + case_rw(ld, _l); +#endif + case_rrr(ldx, _c); + case_rrw(ldx, _c); + case_rrr(ldx, _uc); + case_rrw(ldx, _uc); + case_rrr(ldx, _s); + case_rrw(ldx, _s); + case_rrr(ldx, _us); + case_rrw(ldx, _us); + case_rrr(ldx, _i); + case_rrw(ldx, _i); +#if __WORDSIZE == 64 + case_rrr(ldx, _ui); + case_rrw(ldx, _ui); + case_rrr(ldx, _l); + case_rrw(ldx, _l); +#endif + case_rr(st, _c); + case_wr(st, _c); + case_rr(st, _s); + case_wr(st, _s); + case_rr(st, _i); + case_wr(st, _i); +#if __WORDSIZE == 64 + case_rr(st, _l); + case_wr(st, _l); +#endif + case_rrr(stx, _c); + case_wrr(stx, _c); + case_rrr(stx, _s); + case_wrr(stx, _s); + case_rrr(stx, _i); + case_wrr(stx, _i); +#if __WORDSIZE == 64 + case_rrr(stx, _l); + case_wrr(stx, _l); +#endif + case_rr(hton, _us); + case_rr(hton, _ui); +#if __WORDSIZE == 64 + case_rr(hton, _ul); +#endif + case_rr(ext, _c); + case_rr(ext, _uc); + case_rr(ext, _s); + case_rr(ext, _us); +#if __WORDSIZE == 64 + case_rr(ext, _i); + case_rr(ext, _ui); +#endif + case_rr(mov,); + case jit_code_movi: + if (node->flag & jit_flag_node) { + temp = node->v.n; + if (temp->code == jit_code_data || + (temp->code == jit_code_label && + (temp->flag & jit_flag_patch))) + movi(rn(node->u.w), temp->u.w); + else { + assert(temp->code == jit_code_label || + temp->code == jit_code_epilog); + word = movi_p(rn(node->u.w), temp->u.w); + patch(word, node); + } + } + else + movi(rn(node->u.w), node->v.w); + break; + case_rrr(lt,); + case_rrw(lt,); + case_rrr(lt, _u); + case_rrw(lt, _u); + case_rrr(le,); + case_rrw(le,); + case_rrr(le, _u); + case_rrw(le, _u); + case_rrr(eq,); + case_rrw(eq,); + case_rrr(ge,); + case_rrw(ge,); + case_rrr(ge, _u); + case_rrw(ge, _u); + case_rrr(gt,); + case_rrw(gt,); + case_rrr(gt, _u); + case_rrw(gt, _u); + case_rrr(ne,); + case_rrw(ne,); + case_brr(blt,); + case_brw(blt,); + case_brr(blt, _u); + case_brw(blt, _u); + case_brr(ble,); + case_brw(ble,); + case_brr(ble, _u); + case_brw(ble, _u); + case_brr(beq,); + case_brw(beq,); + case_brr(bge,); + case_brw(bge,); + case_brr(bge, _u); + case_brw(bge, _u); + case_brr(bgt,); + case_brw(bgt,); + case_brr(bgt, _u); + case_brw(bgt, _u); + case_brr(bne,); + case_brw(bne,); + case_brr(boadd,); + case_brw(boadd,); + case_brr(boadd, _u); + case_brw(boadd, _u); + case_brr(bxadd,); + case_brw(bxadd,); + case_brr(bxadd, _u); + case_brw(bxadd, _u); + case_brr(bosub,); + case_brw(bosub,); + case_brr(bosub, _u); + case_brw(bosub, _u); + case_brr(bxsub,); + case_brw(bxsub,); + case_brr(bxsub, _u); + case_brw(bxsub, _u); + case_brr(bms,); + case_brw(bms,); + case_brr(bmc,); + case_brw(bmc,); + case_rrr(add, _f); + case_rrf(add); + case_rrr(sub, _f); + case_rrf(sub); + case_rrf(rsb); + case_rrr(mul, _f); + case_rrf(mul); + case_rrr(div, _f); + case_rrf(div); + case_rr(abs, _f); + case_rr(neg, _f); + case_rr(sqrt, _f); + case_rr(ext, _f); + case_rr(ld, _f); + case_rw(ld, _f); + case_rrr(ldx, _f); + case_rrw(ldx, _f); + case_rr(st, _f); + case_wr(st, _f); + case_rrr(stx, _f); + case_wrr(stx, _f); + case_rr(mov, _f); + case jit_code_movi_f: + assert_data(node); + movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w); + break; + case_rr(ext, _d_f); + case_rrr(lt, _f); + case_rrf(lt); + case_rrr(le, _f); + case_rrf(le); + case_rrr(eq, _f); + case_rrf(eq); + case_rrr(ge, _f); + case_rrf(ge); + case_rrr(gt, _f); + case_rrf(gt); + case_rrr(ne, _f); + case_rrf(ne); + case_rrr(unlt, _f); + case_rrf(unlt); + case_rrr(unle, _f); + case_rrf(unle); + case_rrr(uneq, _f); + case_rrf(uneq); + case_rrr(unge, _f); + case_rrf(unge); + case_rrr(ungt, _f); + case_rrf(ungt); + case_rrr(ltgt, _f); + case_rrf(ltgt); + case_rrr(ord, _f); + case_rrf(ord); + case_rrr(unord, _f); + case_rrf(unord); + case_brr(blt, _f); + case_brf(blt); + case_brr(ble, _f); + case_brf(ble); + case_brr(beq, _f); + case_brf(beq); + case_brr(bge, _f); + case_brf(bge); + case_brr(bgt, _f); + case_brf(bgt); + case_brr(bne, _f); + case_brf(bne); + case_brr(bunlt, _f); + case_brf(bunlt); + case_brr(bunle, _f); + case_brf(bunle); + case_brr(buneq, _f); + case_brf(buneq); + case_brr(bunge, _f); + case_brf(bunge); + case_brr(bungt, _f); + case_brf(bungt); + case_brr(bltgt, _f); + case_brf(bltgt); + case_brr(bord, _f); + case_brf(bord); + case_brr(bunord, _f); + case_brf(bunord); + case_rrr(add, _d); + case_rrd(add); + case_rrr(sub, _d); + case_rrd(sub); + case_rrd(rsb); + case_rrr(mul, _d); + case_rrd(mul); + case_rrr(div, _d); + case_rrd(div); + case_rr(abs, _d); + case_rr(neg, _d); + case_rr(sqrt, _d); + case_rr(ext, _d); + case_rr(ld, _d); + case_rw(ld, _d); + case_rrr(ldx, _d); + case_rrw(ldx, _d); + case_rr(st, _d); + case_wr(st, _d); + case_rrr(stx, _d); + case_wrr(stx, _d); + case_rr(mov, _d); + case jit_code_movi_d: + assert_data(node); + movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w); + break; + case_rr(ext, _f_d); + case_rrr(lt, _d); + case_rrd(lt); + case_rrr(le, _d); + case_rrd(le); + case_rrr(eq, _d); + case_rrd(eq); + case_rrr(ge, _d); + case_rrd(ge); + case_rrr(gt, _d); + case_rrd(gt); + case_rrr(ne, _d); + case_rrd(ne); + case_rrr(unlt, _d); + case_rrd(unlt); + case_rrr(unle, _d); + case_rrd(unle); + case_rrr(uneq, _d); + case_rrd(uneq); + case_rrr(unge, _d); + case_rrd(unge); + case_rrr(ungt, _d); + case_rrd(ungt); + case_rrr(ltgt, _d); + case_rrd(ltgt); + case_rrr(ord, _d); + case_rrd(ord); + case_rrr(unord, _d); + case_rrd(unord); + case_brr(blt, _d); + case_brd(blt); + case_brr(ble, _d); + case_brd(ble); + case_brr(beq, _d); + case_brd(beq); + case_brr(bge, _d); + case_brd(bge); + case_brr(bgt, _d); + case_brd(bgt); + case_brr(bne, _d); + case_brd(bne); + case_brr(bunlt, _d); + case_brd(bunlt); + case_brr(bunle, _d); + case_brd(bunle); + case_brr(buneq, _d); + case_brd(buneq); + case_brr(bunge, _d); + case_brd(bunge); + case_brr(bungt, _d); + case_brd(bungt); + case_brr(bltgt, _d); + case_brd(bltgt); + case_brr(bord, _d); + case_brd(bord); + case_brr(bunord, _d); + case_brd(bunord); + case jit_code_jmpr: + jmpr(rn(node->u.w)); + break; + case jit_code_jmpi: + if (node->flag & jit_flag_node) { + temp = node->u.n; + assert(temp->code == jit_code_label || + temp->code == jit_code_epilog); + if (temp->flag & jit_flag_patch) + jmpi(temp->u.w); + else { + word = jmpi_p(_jit->pc.w); + patch(word, node); + } + } + else + jmpi(node->u.w); + break; + case jit_code_callr: + callr(rn(node->u.w)); + break; + case jit_code_calli: + if (node->flag & jit_flag_node) { + temp = node->u.n; + assert(temp->code == jit_code_label || + temp->code == jit_code_epilog); + if (temp->flag & jit_flag_patch) + calli(temp->u.w); + else { + word = calli_p(_jit->pc.w); + patch(word, node); + } + } + else + calli(node->u.w); + break; + case jit_code_prolog: + _jitc->function = _jitc->functions.ptr + node->w.w; + undo.node = node; + undo.word = _jit->pc.w; +#if DEVEL_DISASSEMBLER + undo.prevw = prevw; +#endif + undo.patch_offset = _jitc->patches.offset; + restart_function: + _jitc->again = 0; + prolog(node); + break; + case jit_code_epilog: + assert(_jitc->function == _jitc->functions.ptr + node->w.w); + if (_jitc->again) { + for (temp = undo.node->next; + temp != node; temp = temp->next) { + if (temp->code == jit_code_label || + temp->code == jit_code_epilog) + temp->flag &= ~jit_flag_patch; + } + temp->flag &= ~jit_flag_patch; + node = undo.node; + _jit->pc.w = undo.word; +#if DEVEL_DISASSEMBLER + prevw = undo.prevw; +#endif + _jitc->patches.offset = undo.patch_offset; + goto restart_function; + } + if (node->link && (word = _jit->pc.w & 3)) + nop(4 - word); + /* remember label is defined */ + node->flag |= jit_flag_patch; + node->u.w = _jit->pc.w; + epilog(node); + _jitc->function = NULL; + break; + case jit_code_va_start: + vastart(rn(node->u.w)); + break; + case jit_code_va_arg: + vaarg(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_va_arg_d: + vaarg_d(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_live: case jit_code_ellipsis: + case jit_code_va_push: + case jit_code_allocai: case jit_code_allocar: + case jit_code_arg: + case jit_code_arg_f: case jit_code_arg_d: + case jit_code_va_end: + case jit_code_ret: + case jit_code_retr: case jit_code_reti: + case jit_code_retr_f: case jit_code_reti_f: + case jit_code_retr_d: case jit_code_reti_d: + case jit_code_getarg_c: case jit_code_getarg_uc: + case jit_code_getarg_s: case jit_code_getarg_us: + case jit_code_getarg_i: +#if __WORDSIZE == 64 + case jit_code_getarg_ui: case jit_code_getarg_l: +#endif + case jit_code_getarg_f: case jit_code_getarg_d: + case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_f: case jit_code_putargi_f: + case jit_code_putargr_d: case jit_code_putargi_d: + case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_f: case jit_code_pushargi_f: + case jit_code_pushargr_d: case jit_code_pushargi_d: + case jit_code_retval_c: case jit_code_retval_uc: + case jit_code_retval_s: case jit_code_retval_us: + case jit_code_retval_i: +#if __WORDSIZE == 64 + case jit_code_retval_ui: case jit_code_retval_l: +#endif + case jit_code_retval_f: case jit_code_retval_d: + case jit_code_prepare: + case jit_code_finishr: case jit_code_finishi: + break; + default: + abort(); + } + jit_regarg_clr(node, value); + assert(_jitc->regarg == 0 && _jitc->synth == 0); + /* update register live state */ + jit_reglive(node); + } +#undef case_brw +#undef case_brr +#undef case_wrr +#undef case_rrw +#undef case_rrr +#undef case_wr +#undef case_rw +#undef case_rr + + for (offset = 0; offset < _jitc->patches.offset; offset++) { + node = _jitc->patches.ptr[offset].node; + word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w; + patch_at(_jitc->patches.ptr[offset].inst, word); + } + + jit_flush(_jit->code.ptr, _jit->pc.uc); + + return (_jit->code.ptr); +} + +#define CODE 1 +# include "s390-cpu.c" +# include "s390-fpu.c" +#undef CODE + +void +jit_flush(void *fptr, void *tptr) +{ +#if defined(__GNUC__) + jit_word_t f, t, s; + + s = sysconf(_SC_PAGE_SIZE); + f = (jit_word_t)fptr & -s; + t = (((jit_word_t)tptr) + s - 1) & -s; + __clear_cache((void *)f, (void *)t); +#endif +} + +void +_emit_ldxi(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_word_t i0) +{ + ldxi(rn(r0), rn(r1), i0); +} + +void +_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_gpr_t r1) +{ + stxi(i0, rn(r0), rn(r1)); +} + +void +_emit_ldxi_d(jit_state_t *_jit, jit_fpr_t r0, jit_gpr_t r1, jit_word_t i0) +{ + ldxi_d(rn(r0), rn(r1), i0); +} + +void +_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_fpr_t r1) +{ + stxi_d(i0, rn(r0), rn(r1)); +} + +static int32_t +_jit_get_reg_pair(jit_state_t *_jit) +{ + int32_t r1, r2; + /* Try to find a register pair for use with operations that + * require a odd based register pair. Search for the best + * match to avoid spills or at least a valid operation. + */ + + /* Try non callee save first */ + if (jit_reg_free_p(_R0) && jit_reg_free_p(_R1)) + r1 = _R0, r2 = _R1; + else if (jit_reg_free_p(_R2) && jit_reg_free_p(_R3)) + r1 = _R2, r2 = _R3; + else if (jit_reg_free_p(_R4) && jit_reg_free_p(_R5)) + r1 = _R4, r2 = _R5; + /* Try callee save registers */ + else if (jit_reg_free_p(_R10) && jit_reg_free_p(_R11)) + r1 = _R10, r2 = _R11; + else if (jit_reg_free_p(_R8) && jit_reg_free_p(_R9)) + r1 = _R8, r2 = _R9; + else if (jit_reg_free_p(_R6) && jit_reg_free_p(_R7)) + r1 = _R6, r2 = _R7; + + /* We *must* find a register pair */ + else if (jit_reg_free_if_spill_p(_R0) && jit_reg_free_if_spill_p(_R1)) + r1 = _R0, r2 = _R1; + else if (jit_reg_free_if_spill_p(_R2) && jit_reg_free_if_spill_p(_R3)) + r1 = _R2, r2 = _R3; + else if (jit_reg_free_if_spill_p(_R4) && jit_reg_free_if_spill_p(_R5)) + r1 = _R4, r2 = _R5; + else if (jit_reg_free_if_spill_p(_R10) && jit_reg_free_if_spill_p(_R11)) + r1 = _R10, r2 = _R11; + else if (jit_reg_free_if_spill_p(_R8) && jit_reg_free_if_spill_p(_R9)) + r1 = _R8, r2 = _R9; + else if (jit_reg_free_if_spill_p(_R6) && jit_reg_free_if_spill_p(_R7)) + r1 = _R6, r2 = _R7; + else + /* Do not jit_get_reg() all registers to avoid it */ + abort(); + + (void)jit_get_reg(jit_class_gpr|jit_class_named|r1); + (void)jit_get_reg(jit_class_gpr|jit_class_named|r2); + + return (r1); +} + +static void +_jit_unget_reg_pair(jit_state_t *_jit, int32_t reg) +{ + int32_t r1, r2; + r1 = reg; + switch (r1) { + case _R0: r2 = _R1; break; + case _R2: r2 = _R3; break; + case _R4: r2 = _R5; break; + case _R6: r2 = _R7; break; + case _R8: r2 = _R9; break; + case _R10: r2 = _R11; break; + default: abort(); + } + jit_unget_reg(r1); + jit_unget_reg(r2); +} + +static int32_t +_jit_get_reg_but_zero(jit_state_t *_jit, int32_t flags) +{ + int32_t reg; + reg = jit_get_reg(jit_class_gpr); + if (reg == _R0) { + reg = jit_get_reg(jit_class_gpr|flags); + jit_unget_reg(_R0); + } + return (reg); +} + +static void +_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node) +{ + int32_t flag; + + assert(node->flag & jit_flag_node); + if (node->code == jit_code_movi) + flag = node->v.n->flag; + else + flag = node->u.n->flag; + assert(!(flag & jit_flag_patch)); + if (_jitc->patches.offset >= _jitc->patches.length) { + jit_realloc((jit_pointer_t *)&_jitc->patches.ptr, + _jitc->patches.length * sizeof(jit_patch_t), + (_jitc->patches.length + 1024) * sizeof(jit_patch_t)); + _jitc->patches.length += 1024; + } + _jitc->patches.ptr[_jitc->patches.offset].inst = instr; + _jitc->patches.ptr[_jitc->patches.offset].node = node; + ++_jitc->patches.offset; +} diff --git a/deps/lightening/lightening/s390.h b/deps/lightening/lightening/s390.h new file mode 100644 index 0000000..0e74b2e --- /dev/null +++ b/deps/lightening/lightening/s390.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2013-2017 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#ifndef _jit_s390_h +#define _jit_s390_h + +#define JIT_HASH_CONSTS 1 +#define JIT_NUM_OPERANDS 2 + +/* + * Types + */ +#define JIT_FP _R13 +typedef enum { +#define jit_r(i) (_R12 + ((i) << 1)) +#define jit_r_num() 3 +#define jit_v(i) (_R11 + ((i) << 1)) +#define jit_v_num() 3 +#define jit_f(i) (_F8 + (i)) +#define jit_f_num() 6 +#define JIT_R0 _R12 +#define JIT_R1 _R10 +#define JIT_R2 _R8 +#define JIT_V0 _R11 +#define JIT_V1 _R9 +#define JIT_V2 _R7 + _R0, _R1, /* Volatile */ + _R12, /* Saved, GOT */ + _R11, _R10, _R9, _R8, /* Saved */ + _R7, /* Saved */ + _R6, /* Saved, parameter */ + _R5, _R4, _R3, /* Parameter passing */ + _R2, /* Volatile, parameter and return value */ + _R13, /* Saved, literal pool pointer */ + _R14, /* Volatile, return address */ + _R15, /* Saved, stack pointer */ +#define JIT_F0 _F8 +#define JIT_F1 _F9 +#define JIT_F2 _F10 +#define JIT_F3 _F11 +#define JIT_F4 _F12 +#define JIT_F5 _F13 + _F1, _F3, _F5, _F7, /* Volatile */ + _F14, _F15, _F8, _F9, /* Saved */ + _F10, _F11, _F12, _F13, /* Saved */ + _F6, _F4, _F2, /* Volatile, parameter */ + _F0, /* Volatile, parameter and return value */ + _NOREG, +#define JIT_NOREG _NOREG +} jit_reg_t; + +#endif /* _jit_s390_h */ diff --git a/deps/lightening/lightening/x86-cpu.c b/deps/lightening/lightening/x86-cpu.c new file mode 100644 index 0000000..e9e5299 --- /dev/null +++ b/deps/lightening/lightening/x86-cpu.c @@ -0,0 +1,2789 @@ +/* + * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +/* avoid using it due to partial stalls */ +#define USE_INC_DEC 0 + +#if __X32 +# define WIDE 0 +# define IF_WIDE(wide, narrow) narrow +#else +# define WIDE 1 +# define IF_WIDE(wide, narrow) wide +#endif + +#define _RAX_REGNO 0 +#define _RCX_REGNO 1 +#define _RDX_REGNO 2 +#define _RBX_REGNO 3 +#define _RSP_REGNO 4 +#define _RBP_REGNO 5 +#define _RSI_REGNO 6 +#define _RDI_REGNO 7 +#define _R8_REGNO 8 +#define _R9_REGNO 9 +#define _R10_REGNO 10 +#define _R11_REGNO 11 +#define _R12_REGNO 12 +#define _R13_REGNO 13 +#define _R14_REGNO 14 +#define _R15_REGNO 15 +#define r7(reg) ((reg) & 7) +#define r8(reg) ((reg) & 15) +#if __X32 +# define reg8_p(rn) ((rn) >= _RAX_REGNO && (rn) <= _RBX_REGNO) +#else +# define reg8_p(rn) 1 +#endif + +#define can_sign_extend_int_p(im) \ + IF_WIDE((((im) >= 0 && (long long)(im) <= 0x7fffffffLL) || \ + ((im) < 0 && (long long)(im) > -0x80000000LL)), \ + 1) +#define can_zero_extend_int_p(im) \ + IF_WIDE(((im) >= 0 && (im) < 0x80000000LL), \ + 1) +#define fits_uint32_p(im) \ + IF_WIDE((((im) & 0xffffffff00000000LL) == 0), \ + 1) + +#define _SCL1 0x00 +#define _SCL2 0x01 +#define _SCL4 0x02 +#define _SCL8 0x03 + +#define X86_ADD 0 +#define X86_OR 1 << 3 +#define X86_ADC 2 << 3 +#define X86_SBB 3 << 3 +#define X86_AND 4 << 3 +#define X86_SUB 5 << 3 +#define X86_XOR 6 << 3 +#define X86_CMP 7 << 3 +#define X86_ROL 0 +#define X86_ROR 1 +#define X86_RCL 2 +#define X86_RCR 3 +#define X86_SHL 4 +#define X86_SHR 5 +#define X86_SAR 7 +#define X86_NOT 2 +#define X86_NEG 3 +#define X86_MUL 4 +#define X86_IMUL 5 +#define X86_DIV 6 +#define X86_IDIV 7 + +#define FOR_EACH_CC(M) \ + M(o, O, 0x0) \ + M(no, NO, 0x1) \ + M(nae, NAE, 0x2) \ + M(b, B, 0x2) \ + M(c, C, 0x2) \ + M(ae, AE, 0x3) \ + M(nb, NB, 0x3) \ + M(nc, NC, 0x3) \ + M(e, E, 0x4) \ + M(z, Z, 0x4) \ + M(ne, NE, 0x5) \ + M(nz, NZ, 0x5) \ + M(be, BE, 0x6) \ + M(na, NA, 0x6) \ + M(a, A, 0x7) \ + M(nbe, NBE, 0x7) \ + M(s, S, 0x8) \ + M(ns, NS, 0x9) \ + M(p, P, 0xa) \ + M(pe, PE, 0xa) \ + M(np, NP, 0xb) \ + M(po, PO, 0xb) \ + M(l, L, 0xc) \ + M(nge, NGE, 0xc) \ + M(ge, GE, 0xd) \ + M(nl_, NL, 0xd) \ + M(le, LE, 0xe) \ + M(ng, NG, 0xe) \ + M(g, G, 0xf) \ + M(nle, NLE, 0xf) \ + /* EOL */ + +enum x86_cc +{ +#define DEFINE_ENUM(cc, CC, code) X86_CC_##CC = code, + FOR_EACH_CC(DEFINE_ENUM) +#undef DEFINE_ENUM +}; + +static inline void +mrm(jit_state_t *_jit, uint8_t md, uint8_t r, uint8_t m) +{ + emit_u8(_jit, (md<<6) | (r<<3) | m); +} + +static inline void +sib(jit_state_t *_jit, uint8_t sc, uint8_t i, uint8_t b) +{ + emit_u8(_jit, (sc<<6) | (i<<3) | b); +} + +static inline void +ic(jit_state_t *_jit, uint8_t c) +{ + emit_u8(_jit, c); +} + +static inline void +is(jit_state_t *_jit, uint16_t s) +{ + emit_u16(_jit, s); +} + +static inline void +ii(jit_state_t *_jit, uint32_t i) +{ + emit_u32(_jit, i); +} + +#if __X64 +static inline void +il(jit_state_t *_jit, uint64_t l) +{ + emit_u64(_jit, l); +} +#endif + +static void +rex(jit_state_t *_jit, int32_t l, int32_t w, + int32_t r, int32_t x, int32_t b) +{ +#if __X64 + int32_t v = 0x40 | (w << 3); + + if (r != _NOREG) + v |= (r & 8) >> 1; + if (x != _NOREG) + v |= (x & 8) >> 2; + if (b != _NOREG) + v |= (b & 8) >> 3; + if (l || v != 0x40) + ic(_jit, v); +#endif +} + +static void +rx(jit_state_t *_jit, int32_t rd, int32_t md, + int32_t rb, int32_t ri, int32_t ms) +{ + if (ri == _NOREG) { + if (rb == _NOREG) { +#if __X32 + mrm(_jit, 0x00, r7(rd), 0x05); +#else + mrm(_jit, 0x00, r7(rd), 0x04); + sib(_jit, _SCL1, 0x04, 0x05); +#endif + ii(_jit, md); + } else if (r7(rb) == _RSP_REGNO) { + if (md == 0) { + mrm(_jit, 0x00, r7(rd), 0x04); + sib(_jit, ms, 0x04, 0x04); + } + else if ((int8_t)md == md) { + mrm(_jit, 0x01, r7(rd), 0x04); + sib(_jit, ms, 0x04, 0x04); + ic(_jit, md); + } else { + mrm(_jit, 0x02, r7(rd), 0x04); + sib(_jit, ms, 0x04, 0x04); + ii(_jit, md); + } + } else { + if (md == 0 && r7(rb) != _RBP_REGNO) + mrm(_jit, 0x00, r7(rd), r7(rb)); + else if ((int8_t)md == md) { + mrm(_jit, 0x01, r7(rd), r7(rb)); + ic(_jit, md); + } else { + mrm(_jit, 0x02, r7(rd), r7(rb)); + ii(_jit, md); + } + } + } + else if (rb == _NOREG) { + mrm(_jit, 0x00, r7(rd), 0x04); + sib(_jit, ms, r7(ri), 0x05); + ii(_jit, md); + } + else if (r8(ri) != _RSP_REGNO) { + if (md == 0 && r7(rb) != _RBP_REGNO) { + mrm(_jit, 0x00, r7(rd), 0x04); + sib(_jit, ms, r7(ri), r7(rb)); + } else if ((int8_t)md == md) { + mrm(_jit, 0x01, r7(rd), 0x04); + sib(_jit, ms, r7(ri), r7(rb)); + ic(_jit, md); + } else { + mrm(_jit, 0x02, r7(rd), 0x04); + sib(_jit, ms, r7(ri), r7(rb)); + ic(_jit, md); + } + } else { + fprintf(stderr, "illegal index register"); + abort(); + } +} + +static void +pushr(jit_state_t *_jit, int32_t r0) +{ + _jit->frame_size += __WORDSIZE / 8; + rex(_jit, 0, WIDE, 0, 0, r0); + ic(_jit, 0x50 | r7(r0)); +} + +static void +popr(jit_state_t *_jit, int32_t r0) +{ + _jit->frame_size -= __WORDSIZE / 8; + rex(_jit, 0, WIDE, 0, 0, r0); + ic(_jit, 0x58 | r7(r0)); +} + +static void +nop(jit_state_t *_jit, int32_t count) +{ + switch (count) { + case 0: + break; + case 1: /* NOP */ + ic(_jit, 0x90); + break; + case 2: /* 66 NOP */ + ic(_jit, 0x66); ic(_jit, 0x90); + break; + case 3: /* NOP DWORD ptr [EAX] */ + ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x00); + break; + case 4: /* NOP DWORD ptr [EAX + 00H] */ + ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x40); ic(_jit, 0x00); + break; + case 5: /* NOP DWORD ptr [EAX + EAX*1 + 00H] */ + ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x44); ic(_jit, 0x00); + ic(_jit, 0x00); + break; + case 6: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00H] */ + ic(_jit, 0x66); ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x44); + ic(_jit, 0x00); ic(_jit, 0x00); + break; + case 7: /* NOP DWORD ptr [EAX + 00000000H] */ + ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x80); ii(_jit, 0x0000); + break; + case 8: /* NOP DWORD ptr [EAX + EAX*1 + 00000000H] */ + ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x84); ic(_jit, 0x00); + ii(_jit, 0x0000); + break; + case 9: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] */ + ic(_jit, 0x66); ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x84); + ic(_jit, 0x00); ii(_jit, 0x0000); + break; + default: + abort(); + } +} + +static void +movr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 != r1) { + rex(_jit, 0, 1, r1, _NOREG, r0); + ic(_jit, 0x89); + ic(_jit, 0xc0 | (r1 << 3) | r7(r0)); + } +} + +static void +movcr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xbe); + mrm(_jit, 0x03, r7(r0), r7(r1)); +} + +static void +movcr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xb6); + mrm(_jit, 0x03, r7(r0), r7(r1)); +} + +static void +movsr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xbf); + mrm(_jit, 0x03, r7(r0), r7(r1)); +} + +static void +movsr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xb7); + mrm(_jit, 0x03, r7(r0), r7(r1)); +} + +#if __X64 +static void +movir(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, 1, r0, _NOREG, r1); + ic(_jit, 0x63); + mrm(_jit, 0x03, r7(r0), r7(r1)); +} + +static void +movir_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, 0, r1, _NOREG, r0); + ic(_jit, 0x89); + ic(_jit, 0xc0 | (r1 << 3) | r7(r0)); +} +#endif + +static jit_reloc_t +mov_addr(jit_state_t *_jit, int32_t r0) +{ + uint8_t *pc_start = _jit->pc.uc; + rex(_jit, 0, WIDE, _NOREG, _NOREG, r0); + ic(_jit, 0xb8 | r7(r0)); + ptrdiff_t inst_start = _jit->pc.uc - pc_start; + return emit_abs_reloc(_jit, inst_start); +} + +static void +imovi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ +#if __X64 + if (fits_uint32_p(i0)) { + rex(_jit, 0, 0, _NOREG, _NOREG, r0); + ic(_jit, 0xb8 | r7(r0)); + ii(_jit, i0); + } else { + rex(_jit, 0, 1, _NOREG, _NOREG, r0); + ic(_jit, 0xb8 | r7(r0)); + il(_jit, i0); + } +#else + ic(_jit, 0xb8 | r7(r0)); + ii(_jit, i0); +#endif +} + +static void +alur(jit_state_t *_jit, int32_t code, int32_t r0, int32_t r1) +{ + rex(_jit, 0, WIDE, r1, _NOREG, r0); + ic(_jit, code | 0x01); + mrm(_jit, 0x03, r7(r1), r7(r0)); +} + +static inline void +icmpr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return alur(_jit, X86_CMP, r0, r1); +} +static inline void +iaddr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return alur(_jit, X86_ADD, r0, r1); +} +static inline void +iaddxr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return alur(_jit, X86_ADC, r0, r1); +} +static inline void +isubr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return alur(_jit, X86_SUB, r0, r1); +} +static inline void +isubxr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return alur(_jit, X86_SBB, r0, r1); +} +static inline void +iandr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return alur(_jit, X86_AND, r0, r1); +} +static inline void +iorr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return alur(_jit, X86_OR, r0, r1); +} +static inline void +ixorr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return alur(_jit, X86_XOR, r0, r1); +} + +static void +movi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (i0) + imovi(_jit, r0, i0); + else + ixorr(_jit, r0, r0); +} + +static void +alui(jit_state_t *_jit, int32_t code, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, _NOREG, _NOREG, r0); + if ((int8_t)i0 == i0) { + ic(_jit, 0x83); + ic(_jit, 0xc0 | code | r7(r0)); + ic(_jit, i0); + } else { + if (r0 == _RAX_REGNO) { + ic(_jit, code | 0x05); + } else { + ic(_jit, 0x81); + ic(_jit, 0xc0 | code | r7(r0)); + } + ii(_jit, i0); + } + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + alur(_jit, code, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static inline void +icmpi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + return alui(_jit, X86_CMP, r0, i0); +} +static inline void +iaddi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + return alui(_jit, X86_ADD, r0, i0); +} +static inline void +iaddxi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + return alui(_jit, X86_ADC, r0, i0); +} +static inline void +isubi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + return alui(_jit, X86_SUB, r0, i0); +} +static inline void +isubxi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + return alui(_jit, X86_SBB, r0, i0); +} +static inline void +iandi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + return alui(_jit, X86_AND, r0, i0); +} +static inline void +iori(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + return alui(_jit, X86_OR, r0, i0); +} +static inline void +ixori(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + return alui(_jit, X86_XOR, r0, i0); +} + +static void +unr(jit_state_t *_jit, int32_t code, int32_t r0) +{ + rex(_jit, 0, WIDE, _NOREG, _NOREG, r0); + ic(_jit, 0xf7); + mrm(_jit, 0x03, code, r7(r0)); +} + +static inline void +umulr(jit_state_t *_jit, int32_t r0) +{ + return unr(_jit, X86_IMUL, r0); +} +static inline void +umulr_u(jit_state_t *_jit, int32_t r0) +{ + return unr(_jit, X86_MUL, r0); +} +static inline void +idivr(jit_state_t *_jit, int32_t r0) +{ + return unr(_jit, X86_IDIV, r0); +} +static inline void +idivr_u(jit_state_t *_jit, int32_t r0) +{ + return unr(_jit, X86_DIV, r0); +} +static inline void +inegr(jit_state_t *_jit, int32_t r0) +{ + return unr(_jit, X86_NEG, r0); +} +static inline void +icomr(jit_state_t *_jit, int32_t r0) +{ + return unr(_jit, X86_NOT, r0); +} + +#if USE_INC_DEC +static void +incr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + movr(_jit, r0, r1); +# if __X64 + rex(_jit, 0, WIDE, _NOREG, _NOREG, r0); + ic(_jit, 0xff); + ic(_jit, 0xc0 | r7(r0)); +# else + ic(_jit, 0x40 | r7(r0)); +# endif +} + +static void +decr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + movr(_jit, r0, r1); +# if __X64 + rex(_jit, 0, WIDE, _NOREG, _NOREG, r0); + ic(_jit, 0xff); + ic(_jit, 0xc8 | r7(r0)); +# else + ic(_jit, 0x48 | r7(r0)); +# endif +} +#endif + +static void +lea(jit_state_t *_jit, int32_t md, int32_t rb, + int32_t ri, int32_t ms, int32_t rd) +{ + rex(_jit, 0, WIDE, rd, ri, rb); + ic(_jit, 0x8d); + rx(_jit, rd, md, rb, ri, ms); +} + +static void +xchgr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, WIDE, r1, _NOREG, r0); + ic(_jit, 0x87); + mrm(_jit, 0x03, r7(r1), r7(r0)); +} + +static void +xchgrm(jit_state_t *_jit, int32_t val_and_dst, int32_t loc) +{ + rex(_jit, 0, WIDE, val_and_dst, _NOREG, loc); + ic(_jit, 0x87); + rx(_jit, val_and_dst, 0, loc, _NOREG, _SCL1); +} + +static void +lock(jit_state_t *_jit) +{ + ic(_jit, 0xf0); +} + +static void +cmpxchgmr(jit_state_t *_jit, int32_t loc, int32_t desired) +{ + lock(_jit); + rex(_jit, 0, WIDE, desired, _NOREG, loc); + ic(_jit, 0x0f); + ic(_jit, 0xb1); + rx(_jit, desired, 0, loc, _NOREG, _SCL1); +} + +static void +testr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, WIDE, r1, _NOREG, r0); + ic(_jit, 0x85); + mrm(_jit, 0x03, r7(r1), r7(r0)); +} + +static void +testi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + rex(_jit, 0, WIDE, _NOREG, _NOREG, r0); + if (r0 == _RAX_REGNO) { + ic(_jit, 0xa9); + } else { + ic(_jit, 0xf7); + mrm(_jit, 0x03, 0x00, r7(r0)); + } + ii(_jit, i0); +} + +static void +negr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 == r1) { + inegr(_jit, r0); + } else { + ixorr(_jit, r0, r0); + isubr(_jit, r0, r1); + } +} + +static void +addr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) + iaddr(_jit, r0, r2); + else if (r0 == r2) + iaddr(_jit, r0, r1); + else + lea(_jit, 0, r1, r2, _SCL1, r0); +} + +static void +addi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) + movr(_jit, r0, r1); +#if USE_INC_DEC + else if (i0 == 1) + incr(_jit, r0, r1); + else if (i0 == -1) + decr(_jit, r0, r1); +#endif + else if (can_sign_extend_int_p(i0)) { + if (r0 == r1) + iaddi(_jit, r0, i0); + else + lea(_jit, i0, r1, _NOREG, _SCL1, r0); + } + else if (r0 != r1) { + movi(_jit, r0, i0); + iaddr(_jit, r0, r1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + iaddr(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +addcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) { + iaddr(_jit, r0, r1); + } else { + movr(_jit, r0, r1); + iaddr(_jit, r0, r2); + } +} + +static void +addci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + movr(_jit, r0, r1); + iaddi(_jit, r0, i0); + } + else if (r0 == r1) { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + iaddr(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } else { + movi(_jit, r0, i0); + iaddr(_jit, r0, r1); + } +} + +static void +addxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) { + iaddxr(_jit, r0, r1); + } else { + movr(_jit, r0, r1); + iaddxr(_jit, r0, r2); + } +} + +static void +addxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + movr(_jit, r0, r1); + iaddxi(_jit, r0, i0); + } + else if (r0 == r1) { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + iaddxr(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } else { + movi(_jit, r0, i0); + iaddxr(_jit, r0, r1); + } +} + +static void +subr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r1 == r2) + ixorr(_jit, r0, r0); + else if (r0 == r2) { + isubr(_jit, r0, r1); + inegr(_jit, r0); + } else { + movr(_jit, r0, r1); + isubr(_jit, r0, r2); + } +} + +static void +subi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) + movr(_jit, r0, r1); +#if USE_INC_DEC + else if (i0 == 1) + decr(_jit, r0, r1); + else if (i0 == -1) + incr(_jit, r0, r1); +#endif + else if (can_sign_extend_int_p(i0)) { + if (r0 == r1) + isubi(_jit, r0, i0); + else + lea(_jit, -i0, r1, _NOREG, _SCL1, r0); + } + else if (r0 != r1) { + movi(_jit, r0, -i0); + iaddr(_jit, r0, r1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + isubr(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +subcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2 && r0 != r1) { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, jit_gpr_regno(reg), r0); + movr(_jit, r0, r1); + isubr(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } else { + movr(_jit, r0, r1); + isubr(_jit, r0, r2); + } +} + +static void +subci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + movr(_jit, r0, r1); + if (can_sign_extend_int_p(i0)) { + isubi(_jit, r0, i0); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + isubr(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +subxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2 && r0 != r1) { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, jit_gpr_regno(reg), r0); + movr(_jit, r0, r1); + isubxr(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } else { + movr(_jit, r0, r1); + isubxr(_jit, r0, r2); + } +} + +static void +subxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + movr(_jit, r0, r1); + if (can_sign_extend_int_p(i0)) { + isubxi(_jit, r0, i0); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + imovi(_jit, jit_gpr_regno(reg), i0); + isubxr(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +irotshr(jit_state_t *_jit, int32_t code, int32_t r0) +{ + rex(_jit, 0, WIDE, _RCX_REGNO, _NOREG, r0); + ic(_jit, 0xd3); + mrm(_jit, 0x03, code, r7(r0)); +} + +static void +rotshr(jit_state_t *_jit, int32_t code, + int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == _RCX_REGNO) { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, jit_gpr_regno(reg), r1); + if (r2 != _RCX_REGNO) + movr(_jit, _RCX_REGNO, r2); + irotshr(_jit, code, jit_gpr_regno(reg)); + movr(_jit, _RCX_REGNO, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } else if (r2 != _RCX_REGNO) { + /* Already know that R0 isn't RCX. */ + pushr(_jit, _RCX_REGNO); + if (r1 == _RCX_REGNO) { + if (r0 == r2) + xchgr(_jit, r0, _RCX_REGNO); + else { + movr(_jit, r0, r1); + movr(_jit, _RCX_REGNO, r2); + } + } else { + movr(_jit, _RCX_REGNO, r2); + movr(_jit, r0, r1); + } + irotshr(_jit, code, r0); + popr(_jit, _RCX_REGNO); + } else { + movr(_jit, r0, r1); + irotshr(_jit, code, r0); + } +} + +static void +irotshi(jit_state_t *_jit, int32_t code, int32_t r0, jit_word_t i0) +{ + rex(_jit, 0, WIDE, _NOREG, _NOREG, r0); + if (i0 == 1) { + ic(_jit, 0xd1); + mrm(_jit, 0x03, code, r7(r0)); + } else { + ic(_jit, 0xc1); + mrm(_jit, 0x03, code, r7(r0)); + ic(_jit, i0); + } +} + +static void +rotshi(jit_state_t *_jit, int32_t code, + int32_t r0, int32_t r1, jit_word_t i0) +{ + movr(_jit, r0, r1); + if (i0) + irotshi(_jit, code, r0, i0); +} + +static void +lshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) + movr(_jit, r0, r1); + else if (i0 <= 3) + lea(_jit, 0, _NOREG, r1, i0 == 1 ? _SCL2 : i0 == 2 ? _SCL4 : _SCL8, r0); + else + rotshi(_jit, X86_SHL, r0, r1, i0); +} + +static void +lshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return rotshr(_jit, X86_SHL, r0, r1, r2); +} + +static void +rshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return rotshr(_jit, X86_SAR, r0, r1, r2); +} + +static void +rshi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + return rotshi(_jit, X86_SAR, r0, r1, i0); +} + +static void +rshr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return rotshr(_jit, X86_SHR, r0, r1, r2); +} + +static void +rshi_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + return rotshi(_jit, X86_SHR, r0, r1, i0); +} + +static void +imulr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xaf); + mrm(_jit, 0x03, r7(r0), r7(r1)); +} + +static void +imuli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, r0, _NOREG, r1); + if ((int8_t)i0 == i0) { + ic(_jit, 0x6b); + mrm(_jit, 0x03, r7(r0), r7(r1)); + ic(_jit, i0); + } else { + ic(_jit, 0x69); + mrm(_jit, 0x03, r7(r0), r7(r1)); + ii(_jit, i0); + } + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + imulr(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +mulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) + imulr(_jit, r0, r2); + else if (r0 == r2) { + imulr(_jit, r0, r1); + } else { + movr(_jit, r0, r1); + imulr(_jit, r0, r2); + } +} + +static int +ffsw(jit_word_t i) +{ + if (sizeof(int) == sizeof(i)) + return ffs(i); + int bit = ffs((int)i); + if (bit == 0) { + bit = ffs((int)((uint64_t)i >> 32)); + if (bit) + bit += 32; + } + return bit; +} + +static void +muli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + switch (i0) { + case 0: + ixorr(_jit, r0, r0); + break; + case 1: + movr(_jit, r0, r1); + break; + case -1: + negr(_jit, r0, r1); + break; + case 2: + lea(_jit, 0, _NOREG, r1, _SCL2, r0); + break; + case 4: + lea(_jit, 0, _NOREG, r1, _SCL4, r0); + break; + case 8: + lea(_jit, 0, _NOREG, r1, _SCL8, r0); + break; + default: + if (i0 > 0 && !(i0 & (i0 - 1))) + lshi(_jit, r0, r1, ffsw(i0) - 1); + else if (can_sign_extend_int_p(i0)) + imuli(_jit, r0, r1, i0); + else if (r0 != r1) { + movi(_jit, r0, i0); + imulr(_jit, r0, r1); + } + else + imuli(_jit, r0, r0, i0); + break; + } +} + +static void +iqmulr(jit_state_t *_jit, int32_t r0, int32_t r1, + int32_t r2, int32_t r3, jit_bool_t sign) +{ + if (r0 != _RAX_REGNO && r1 != _RAX_REGNO) + pushr(_jit, _RAX_REGNO); + if (r0 != _RDX_REGNO && r1 != _RDX_REGNO) + pushr(_jit, _RDX_REGNO); + + int32_t mul; + if (r3 == _RAX_REGNO) { + mul = r2; + } else { + mul = r3; + movr(_jit, _RAX_REGNO, r2); + } + if (sign) + umulr(_jit, mul); + else + umulr_u(_jit, mul); + + if (r0 == _RDX_REGNO && r1 == _RAX_REGNO) { + xchgr(_jit, _RAX_REGNO, _RDX_REGNO); + } else { + if (r0 != _RDX_REGNO) + movr(_jit, r0, _RAX_REGNO); + movr(_jit, r1, _RDX_REGNO); + if (r0 == _RDX_REGNO) + movr(_jit, r0, _RAX_REGNO); + } + + if (r0 != _RDX_REGNO && r1 != _RDX_REGNO) + popr(_jit, _RDX_REGNO); + if (r0 != _RAX_REGNO && r1 != _RAX_REGNO) + popr(_jit, _RAX_REGNO); +} + +static void +qmulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + return iqmulr(_jit, r0, r1, r2, r3, 1); +} + +static void +qmulr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + return iqmulr(_jit, r0, r1, r2, r3, 0); +} + +static void +iqmuli(jit_state_t *_jit, int32_t r0, int32_t r1, + int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + if (i0 == 0) { + ixorr(_jit, r0, r0); + ixorr(_jit, r1, r1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + if (sign) + qmulr(_jit, r0, r1, r2, jit_gpr_regno(reg)); + else + qmulr_u(_jit, r0, r1, r2, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +qmuli(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0) +{ + return iqmuli(_jit, r0, r1, r2, i0, 1); +} + +static void +qmuli_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0) +{ + return iqmuli(_jit, r0, r1, r2, i0, 0); +} + +static void +sign_extend_rdx_rax(jit_state_t *_jit) +{ + rex(_jit, 0, WIDE, 0, 0, 0); + ic(_jit, 0x99); +} + +static void +divremr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, + jit_bool_t sign, jit_bool_t divide) +{ + if (r0 != _RAX_REGNO) + pushr(_jit, _RAX_REGNO); + if (r0 != _RDX_REGNO) + pushr(_jit, _RDX_REGNO); + + int tmp_divisor = 0; + if (r2 == _RAX_REGNO || r2 == _RDX_REGNO) { + jit_gpr_t tmp = get_temp_gpr(_jit); + movr(_jit, jit_gpr_regno(tmp), r2); + r2 = jit_gpr_regno(tmp); + tmp_divisor = 1; + } + + movr(_jit, _RAX_REGNO, r1); + + if (sign) { + sign_extend_rdx_rax(_jit); + idivr(_jit, r2); + } else { + ixorr(_jit, _RDX_REGNO, _RDX_REGNO); + idivr_u(_jit, r2); + } + + if (divide) + movr(_jit, r0, _RAX_REGNO); + else + movr(_jit, r0, _RDX_REGNO); + + if (tmp_divisor) + unget_temp_gpr(_jit); + + if (r0 != _RDX_REGNO) + popr(_jit, _RDX_REGNO); + if (r0 != _RAX_REGNO) + popr(_jit, _RAX_REGNO); +} + +static void +divremi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0, + jit_bool_t sign, jit_bool_t divide) +{ + jit_gpr_t tmp = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(tmp), i0); + + divremr(_jit, r0, r1, jit_gpr_regno(tmp), sign, divide); + unget_temp_gpr(_jit); +} + +static void +divr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return divremr(_jit, r0, r1, r2, 1, 1); +} + +static void +divi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + return divremi(_jit, r0, r1, i0, 1, 1); +} + +static void +divr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return divremr(_jit, r0, r1, r2, 0, 1); +} + +static void +divi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + return divremi(_jit, r0, r1, i0, 0, 1); +} + + +static void +remr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return divremr(_jit, r0, r1, r2, 1, 0); +} + +static void +remi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + return divremi(_jit, r0, r1, i0, 1, 0); +} + +static void +remr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return divremr(_jit, r0, r1, r2, 0, 0); +} + +static void +remi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + return divremi(_jit, r0, r1, i0, 0, 0); +} + +static void +iqdivr(jit_state_t *_jit, int32_t r0, int32_t r1, + int32_t r2, int32_t r3, jit_bool_t sign) +{ + if (r0 != _RAX_REGNO && r1 != _RAX_REGNO) + pushr(_jit, _RAX_REGNO); + if (r0 != _RDX_REGNO && r1 != _RDX_REGNO) + pushr(_jit, _RDX_REGNO); + + int tmp_divisor = 0; + if (r3 == _RAX_REGNO || r3 == _RDX_REGNO) { + jit_gpr_t tmp = get_temp_gpr(_jit); + movr(_jit, jit_gpr_regno(tmp), r3); + r3 = jit_gpr_regno(tmp); + tmp_divisor = 1; + } + + movr(_jit, _RAX_REGNO, r2); + + if (sign) { + sign_extend_rdx_rax(_jit); + idivr(_jit, r3); + } else { + ixorr(_jit, _RDX_REGNO, _RDX_REGNO); + idivr_u(_jit, r3); + } + + if (r0 == _RDX_REGNO && r1 == _RAX_REGNO) { + xchgr(_jit, _RAX_REGNO, _RDX_REGNO); + } else { + if (r0 != _RDX_REGNO) + movr(_jit, r0, _RAX_REGNO); + movr(_jit, r1, _RDX_REGNO); + if (r0 == _RDX_REGNO) + movr(_jit, r0, _RAX_REGNO); + } + + if (tmp_divisor) + unget_temp_gpr(_jit); + + if (r0 != _RDX_REGNO && r1 != _RDX_REGNO) + popr(_jit, _RDX_REGNO); + if (r0 != _RAX_REGNO && r1 != _RAX_REGNO) + popr(_jit, _RAX_REGNO); +} + +static void +qdivr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + return iqdivr(_jit, r0, r1, r2, r3, 1); +} + +static void +qdivr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + return iqdivr(_jit, r0, r1, r2, r3, 0); +} + +static void +iqdivi(jit_state_t *_jit, int32_t r0, int32_t r1, + int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + if (sign) + qdivr(_jit, r0, r1, r2, jit_gpr_regno(reg)); + else + qdivr_u(_jit, r0, r1, r2, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +qdivi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0) +{ + return iqdivi(_jit, r0, r1, r2, i0, 1); +} + +static void +qdivi_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0) +{ + return iqdivi(_jit, r0, r1, r2, i0, 0); +} + +static void +comr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + movr(_jit, r0, r1); + icomr(_jit, r0); +} + +static void +andr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r1 == r2) + movr(_jit, r0, r1); + else if (r0 == r1) + iandr(_jit, r0, r2); + else if (r0 == r2) { + iandr(_jit, r0, r1); + } else { + movr(_jit, r0, r1); + iandr(_jit, r0, r2); + } +} + +static void +andi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + + if (i0 == 0) + ixorr(_jit, r0, r0); + else if (i0 == -1) + movr(_jit, r0, r1); + else if (r0 == r1) { + if (can_sign_extend_int_p(i0)) { + iandi(_jit, r0, i0); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + iandr(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } + } else { + movi(_jit, r0, i0); + iandr(_jit, r0, r1); + } +} + +static void +orr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r1 == r2) { + movr(_jit, r0, r1); + } else if (r0 == r1) { + iorr(_jit, r0, r2); + } else if (r0 == r2) { + iorr(_jit, r0, r1); + } else { + movr(_jit, r0, r1); + iorr(_jit, r0, r2); + } +} + +static void +ori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) + movr(_jit, r0, r1); + else if (i0 == -1) + movi(_jit, r0, -1); + else if (can_sign_extend_int_p(i0)) { + movr(_jit, r0, r1); + iori(_jit, r0, i0); + } + else if (r0 != r1) { + movi(_jit, r0, i0); + iorr(_jit, r0, r1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + iorr(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +xorr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r1 == r2) + ixorr(_jit, r0, r0); + else if (r0 == r1) + ixorr(_jit, r0, r2); + else if (r0 == r2) { + ixorr(_jit, r0, r1); + } else { + movr(_jit, r0, r1); + ixorr(_jit, r0, r2); + } +} + +static void +xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) + movr(_jit, r0, r1); + else if (i0 == -1) + comr(_jit, r0, r1); + else if (can_sign_extend_int_p(i0)) { + movr(_jit, r0, r1); + ixori(_jit, r0, i0); + } + else if (r0 != r1) { + movi(_jit, r0, i0); + ixorr(_jit, r0, r1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + ixorr(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +extr_c(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (reg8_p(r1)) { + movcr(_jit, r0, r1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, jit_gpr_regno(reg), r1); + movcr(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +extr_uc(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (reg8_p(r1)) { + movcr_u(_jit, r0, r1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, jit_gpr_regno(reg), r1); + movcr_u(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +extr_s(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return movsr(_jit, r0, r1); +} + +static void +extr_us(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return movsr_u(_jit, r0, r1); +} + +#if __X64 +static void +extr_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return movir(_jit, r0, r1); +} +static void +extr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return movir_u(_jit, r0, r1); +} +#endif + +static void +bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + extr_us(_jit, r0, r1); + ic(_jit, 0x66); + rex(_jit, 0, 0, _NOREG, _NOREG, r0); + ic(_jit, 0xc1); + mrm(_jit, 0x03, X86_ROR, r7(r0)); + ic(_jit, 8); +} + +static void +bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + movr(_jit, r0, r1); + rex(_jit, 0, 0, _NOREG, _NOREG, r0); + ic(_jit, 0x0f); + ic(_jit, 0xc8 | r7(r0)); +} + +#if __X64 +static void +bswapr_ul(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + movr(_jit, r0, r1); + rex(_jit, 0, 1, _NOREG, _NOREG, r0); + ic(_jit, 0x0f); + ic(_jit, 0xc8 | r7(r0)); +} +#endif + +static void +ldr_c(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xbe); + rx(_jit, r0, 0, r1, _NOREG, _SCL1); +} + +static void +ldi_c(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, r0, _NOREG, _NOREG); + ic(_jit, 0x0f); + ic(_jit, 0xbe); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + ldr_c(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldr_uc(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xb6); + rx(_jit, r0, 0, r1, _NOREG, _SCL1); +} + +static void +ldi_uc(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, r0, _NOREG, _NOREG); + ic(_jit, 0x0f); + ic(_jit, 0xb6); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + ldr_uc(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldr_s(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xbf); + rx(_jit, r0, 0, r1, _NOREG, _SCL1); +} + +static void +ldi_s(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, r0, _NOREG, _NOREG); + ic(_jit, 0x0f); + ic(_jit, 0xbf); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + ldr_s(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldr_us(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xb7); + rx(_jit, r0, 0, r1, _NOREG, _SCL1); +} + +static void +ldi_us(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, r0, _NOREG, _NOREG); + ic(_jit, 0x0f); + ic(_jit, 0xb7); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + ldr_us(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldr_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ +#if __X64 + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x63); +#else + ic(_jit, 0x8b); +#endif + rx(_jit, r0, 0, r1, _NOREG, _SCL1); +} + +static void +ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { +#if __X64 + rex(_jit, 0, WIDE, r0, _NOREG, _NOREG); + ic(_jit, 0x63); +#else + ic(_jit, 0x8b); +#endif + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + ldr_i(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +#if __X64 +static void +ldr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, 0, r0, _NOREG, r1); + ic(_jit, 0x63); + rx(_jit, r0, 0, r1, _NOREG, _SCL1); +} + +static void +ldi_ui(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, 0, r0, _NOREG, _NOREG); + ic(_jit, 0x63); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + ldr_ui(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldr_l(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, 1, r0, _NOREG, r1); + ic(_jit, 0x8b); + rx(_jit, r0, 0, r1, _NOREG, _SCL1); +} + +static void +ldi_l(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, 1, r0, _NOREG, _NOREG); + ic(_jit, 0x8b); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + ldr_l(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} +#endif + +static void +ldxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + rex(_jit, 0, WIDE, r0, r1, r2); + ic(_jit, 0x0f); + ic(_jit, 0xbe); + rx(_jit, r0, 0, r2, r1, _SCL1); +} + +static void +ldxi_c(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xbe); + rx(_jit, r0, i0, r1, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + ldxr_c(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_uc(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + rex(_jit, 0, WIDE, r0, r1, r2); + ic(_jit, 0x0f); + ic(_jit, 0xb6); + rx(_jit, r0, 0, r2, r1, _SCL1); +} + +static void +ldxi_uc(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xb6); + rx(_jit, r0, i0, r1, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + ldxr_uc(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + rex(_jit, 0, WIDE, r0, r1, r2); + ic(_jit, 0x0f); + ic(_jit, 0xbf); + rx(_jit, r0, 0, r2, r1, _SCL1); +} + +static void +ldxi_s(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xbf); + rx(_jit, r0, i0, r1, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + ldxr_s(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_us(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + rex(_jit, 0, WIDE, r0, r1, r2); + ic(_jit, 0x0f); + ic(_jit, 0xb7); + rx(_jit, r0, 0, r2, r1, _SCL1); +} + +static void +ldxi_us(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xb7); + rx(_jit, r0, i0, r1, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + ldxr_us(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ +#if __X64 + rex(_jit, 0, WIDE, r0, r1, r2); + ic(_jit, 0x63); +#else + ic(_jit, 0x8b); +#endif + rx(_jit, r0, 0, r2, r1, _SCL1); +} + +static void +ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { +#if __X64 + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x63); +#else + ic(_jit, 0x8b); +#endif + rx(_jit, r0, i0, r1, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + ldxr_i(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +#if __X64 +static void +ldxr_ui(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + rex(_jit, 0, 0, r0, r1, r2); + ic(_jit, 0x8b); + rx(_jit, r0, 0, r2, r1, _SCL1); +} + +static void +ldxi_ui(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, 0, r0, _NOREG, r1); + ic(_jit, 0x8b); + rx(_jit, r0, i0, r1, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + ldxr_ui(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + rex(_jit, 0, 1, r0, r1, r2); + ic(_jit, 0x8b); + rx(_jit, r0, 0, r2, r1, _SCL1); +} + +static void +ldxi_l(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, 1, r0, _NOREG, r1); + ic(_jit, 0x8b); + rx(_jit, r0, i0, r1, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + ldxr_l(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} +#endif + +static void stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1); + +static void +str_c(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (reg8_p(r1)) { + rex(_jit, 0, 0, r1, _NOREG, r0); + ic(_jit, 0x88); + rx(_jit, r1, 0, r0, _NOREG, _SCL1); + } else { + // See comment in stxi_c. + return stxi_c(_jit, 0, r0, r1); + } +} + +static void +sti_c(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_int_p(i0)) { + if (reg8_p(r0)) { + rex(_jit, 0, 0, r0, _NOREG, _NOREG); + ic(_jit, 0x88); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, jit_gpr_regno(reg), r0); + rex(_jit, 0, 0, jit_gpr_regno(reg), _NOREG, _NOREG); + ic(_jit, 0x88); + rx(_jit, jit_gpr_regno(reg), i0, _NOREG, _NOREG, _SCL1); + unget_temp_gpr(_jit); + } + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + str_c(_jit, jit_gpr_regno(reg), r0); + unget_temp_gpr(_jit); + } +} + +static void +str_s(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ic(_jit, 0x66); + rex(_jit, 0, 0, r1, _NOREG, r0); + ic(_jit, 0x89); + rx(_jit, r1, 0, r0, _NOREG, _SCL1); +} + +static void +sti_s(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_int_p(i0)) { + ic(_jit, 0x66); + rex(_jit, 0, 0, r0, _NOREG, _NOREG); + ic(_jit, 0x89); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + str_s(_jit, jit_gpr_regno(reg), r0); + unget_temp_gpr(_jit); + } +} + +static void +str_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, 0, r1, _NOREG, r0); + ic(_jit, 0x89); + rx(_jit, r1, 0, r0, _NOREG, _SCL1); +} + +static void +sti_i(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, 0, r0, _NOREG, _NOREG); + ic(_jit, 0x89); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + str_i(_jit, jit_gpr_regno(reg), r0); + unget_temp_gpr(_jit); + } +} + +#if __X64 +static void +str_l(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, 1, r1, _NOREG, r0); + ic(_jit, 0x89); + rx(_jit, r1, 0, r0, _NOREG, _SCL1); +} + +static void +sti_l(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, 1, r0, _NOREG, _NOREG); + ic(_jit, 0x89); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + str_l(_jit, jit_gpr_regno(reg), r0); + unget_temp_gpr(_jit); + } +} +#endif + +static void +stxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (reg8_p(r2)) { + rex(_jit, 0, 0, r2, r1, r0); + ic(_jit, 0x88); + rx(_jit, r2, 0, r0, r1, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movr(_jit, jit_gpr_regno(reg), r2); + rex(_jit, 0, 0, jit_gpr_regno(reg), r1, r0); + ic(_jit, 0x88); + rx(_jit, jit_gpr_regno(reg), 0, r0, r1, _SCL1); + unget_temp_gpr(_jit); + } +} + +static void +stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (can_sign_extend_int_p(i0)) { + if (reg8_p(r1)) { + rex(_jit, 1, 0, r1, _NOREG, r0); + ic(_jit, 0x88); + rx(_jit, r1, i0, r0, _NOREG, _SCL1); + } else { + // Here we have a hack. Normally tmp registers are just for the + // backend's use, but there are cases in which jit_move_operands + // can use a temp register too. In a move of an operand to memory + // this would result in two simultaneous uses of a temp register. + // Oddly this situation only applies on 32-bit x86 with byte + // stores -- this is the only platform on which reg8_p can be + // false -- so we just make a special case here. + ASSERT(r0 != r1); + int32_t tmp = r0 == _RAX_REGNO ? _RCX_REGNO : _RAX_REGNO; + ASSERT(reg8_p(tmp)); + pushr(_jit, tmp); + movr(_jit, tmp, r1); + if (r0 == _RSP_REGNO) + i0 += __WORDSIZE / 8; + rex(_jit, 0, 0, tmp, _NOREG, r0); + ic(_jit, 0x88); + rx(_jit, tmp, i0, r0, _NOREG, _SCL1); + popr(_jit, tmp); + } + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + stxr_c(_jit, jit_gpr_regno(reg), r0, r1); + unget_temp_gpr(_jit); + } +} + +static void +stxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + ic(_jit, 0x66); + rex(_jit, 0, 0, r2, r1, r0); + ic(_jit, 0x89); + rx(_jit, r2, 0, r0, r1, _SCL1); +} + +static void +stxi_s(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (can_sign_extend_int_p(i0)) { + ic(_jit, 0x66); + rex(_jit, 0, 0, r1, _NOREG, r0); + ic(_jit, 0x89); + rx(_jit, r1, i0, r0, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + stxr_s(_jit, jit_gpr_regno(reg), r0, r1); + unget_temp_gpr(_jit); + } +} + +static void +stxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + rex(_jit, 0, 0, r2, r1, r0); + ic(_jit, 0x89); + rx(_jit, r2, 0, r0, r1, _SCL1); +} + +static void +stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, 0, r1, _NOREG, r0); + ic(_jit, 0x89); + rx(_jit, r1, i0, r0, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + stxr_i(_jit, jit_gpr_regno(reg), r0, r1); + unget_temp_gpr(_jit); + } +} + +#if __X64 +static void +stxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + rex(_jit, 0, 1, r2, r1, r0); + ic(_jit, 0x89); + rx(_jit, r2, 0, r0, r1, _SCL1); +} + +static void +stxi_l(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, 1, r1, _NOREG, r0); + ic(_jit, 0x89); + rx(_jit, r1, i0, r0, _NOREG, _SCL1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + stxr_l(_jit, jit_gpr_regno(reg), r0, r1); + unget_temp_gpr(_jit); + } +} +#endif + +static jit_reloc_t +jccs(jit_state_t *_jit, int32_t code) +{ + ic(_jit, 0x70 | code); + return emit_rel8_reloc(_jit, 1); +} + +static jit_reloc_t +jcc(jit_state_t *_jit, int32_t code) +{ + ic(_jit, 0x0f); + ic(_jit, 0x80 | code); + return emit_rel32_reloc(_jit, 2); +} + +static void +jcci(jit_state_t *_jit, int32_t code, jit_word_t i0) +{ + ptrdiff_t rel8 = i0 - (_jit->pc.w + 1 + 1); + ptrdiff_t rel32 = i0 - (_jit->pc.w + 2 + 4); + if (INT8_MIN <= rel8 && rel8 <= INT8_MAX) + { + ic(_jit, 0x70 | code); + ic(_jit, rel8); + } + else + { + ASSERT(INT32_MIN <= rel32 && rel32 <= INT32_MAX); + ic(_jit, 0x0f); + ic(_jit, 0x80 | code); + ii(_jit, rel32); + } +} + +#define DEFINE_JUMPS(cc, CC, code) \ + static inline jit_reloc_t j##cc(jit_state_t *_jit) \ + { \ + return jcc(_jit, X86_CC_##CC); \ + } \ + static inline jit_reloc_t j##cc##s(jit_state_t *_jit) \ + { \ + return jccs(_jit, X86_CC_##CC); \ + } +FOR_EACH_CC(DEFINE_JUMPS) +#undef DEFINE_JUMPS + +static jit_reloc_t +jcr(jit_state_t *_jit, int32_t code, int32_t r0, int32_t r1) +{ + alur(_jit, X86_CMP, r0, r1); + return jcc(_jit, code); +} + +static jit_reloc_t +jci(jit_state_t *_jit, int32_t code, int32_t r0, jit_word_t i0) +{ + alui(_jit, X86_CMP, r0, i0); + return jcc(_jit, code); +} + +static jit_reloc_t +jci0(jit_state_t *_jit, int32_t code, int32_t r0) +{ + testr(_jit, r0, r0); + return jcc(_jit, code); +} + +static jit_reloc_t +bltr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr(_jit, X86_CC_L, r0, r1); +} + +static jit_reloc_t +blti(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (i1) return jci (_jit, X86_CC_L, r0, i1); + else return jci0(_jit, X86_CC_S, r0); +} + +static jit_reloc_t +bltr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr(_jit, X86_CC_B, r0, r1); +} + +static jit_reloc_t +blti_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (i1) return jci (_jit, X86_CC_B, r0, i1); + else return jci0(_jit, X86_CC_B, r0); +} + +static jit_reloc_t +bler(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr (_jit, X86_CC_LE, r0, r1); +} + +static jit_reloc_t +blei(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (i1) return jci (_jit, X86_CC_LE, r0, i1); + else return jci0(_jit, X86_CC_LE, r0); +} + +static jit_reloc_t +bler_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr (_jit, X86_CC_BE, r0, r1); +} + +static jit_reloc_t +blei_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (i1) return jci (_jit, X86_CC_BE, r0, i1); + else return jci0(_jit, X86_CC_BE, r0); +} + +static jit_reloc_t +beqr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr (_jit, X86_CC_E, r0, r1); +} + +static jit_reloc_t +beqi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (i1) return jci (_jit, X86_CC_E, r0, i1); + else return jci0(_jit, X86_CC_E, r0); +} + +static jit_reloc_t +bger(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr (_jit, X86_CC_GE, r0, r1); +} + +static jit_reloc_t +bgei(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (i1) return jci (_jit, X86_CC_GE, r0, i1); + else return jci0(_jit, X86_CC_NS, r0); +} + +static jit_reloc_t +bger_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr (_jit, X86_CC_AE, r0, r1); +} + +static jit_reloc_t +bgei_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return jci (_jit, X86_CC_AE, r0, i1); +} + +static jit_reloc_t +bgtr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr(_jit, X86_CC_G, r0, r1); +} + +static jit_reloc_t +bgti(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return jci(_jit, X86_CC_G, r0, i1); +} + +static jit_reloc_t +bgtr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr(_jit, X86_CC_A, r0, r1); +} + +static jit_reloc_t +bgti_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (i1) return jci (_jit, X86_CC_A, r0, i1); + else return jci0(_jit, X86_CC_NE, r0); +} + +static jit_reloc_t +bner(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr(_jit, X86_CC_NE, r0, r1); +} + +static jit_reloc_t +bnei(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (i1) return jci (_jit, X86_CC_NE, r0, i1); + else return jci0(_jit, X86_CC_NE, r0); +} + +static jit_reloc_t +bmsr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + testr(_jit, r0, r1); + return jnz(_jit); +} + +static jit_reloc_t +bmsi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_zero_extend_int_p(i1)) { + testi(_jit, r0, i1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i1); + testr(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } + return jnz(_jit); +} + +static jit_reloc_t +bmcr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + testr(_jit, r0, r1); + return jz(_jit); +} + +static jit_reloc_t +bmci(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_zero_extend_int_p(i1)) { + testi(_jit, r0, i1); + } else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i1); + testr(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } + return jz(_jit); +} + +static jit_reloc_t +boaddr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + iaddr(_jit, r0, r1); + return jo(_jit); +} + +static jit_reloc_t +boaddi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_int_p(i1)) { + iaddi(_jit, r0, i1); + return jo(_jit); + } + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i1); + unget_temp_gpr(_jit); + return boaddr(_jit, r0, jit_gpr_regno(reg)); +} + +static jit_reloc_t +boaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + iaddr(_jit, r0, r1); + return jc(_jit); +} + +static jit_reloc_t +boaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_int_p(i1)) { + iaddi(_jit, r0, i1); + return jc(_jit); + } + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i1); + unget_temp_gpr(_jit); + return boaddr_u(_jit, r0, jit_gpr_regno(reg)); +} + +static jit_reloc_t +bxaddr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + iaddr(_jit, r0, r1); + return jno(_jit); +} + +static jit_reloc_t +bxaddi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_int_p(i1)) { + iaddi(_jit, r0, i1); + return jno(_jit); + } + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i1); + unget_temp_gpr(_jit); + return bxaddr(_jit, r0, jit_gpr_regno(reg)); +} + +static jit_reloc_t +bxaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + iaddr(_jit, r0, r1); + return jnc(_jit); +} + +static jit_reloc_t +bxaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_int_p(i1)) { + iaddi(_jit, r0, i1); + return jnc(_jit); + } + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i1); + unget_temp_gpr(_jit); + return bxaddr_u(_jit, r0, jit_gpr_regno(reg)); +} + +static jit_reloc_t +bosubr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + isubr(_jit, r0, r1); + return jo(_jit); +} + +static jit_reloc_t +bosubi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_int_p(i1)) { + isubi(_jit, r0, i1); + return jo(_jit); + } + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i1); + unget_temp_gpr(_jit); + return bosubr(_jit, r0, jit_gpr_regno(reg)); +} + +static jit_reloc_t +bosubr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + isubr(_jit, r0, r1); + return jc(_jit); +} + +static jit_reloc_t +bosubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_int_p(i1)) { + isubi(_jit, r0, i1); + return jc(_jit); + } + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i1); + unget_temp_gpr(_jit); + return bosubr_u(_jit, r0, jit_gpr_regno(reg)); +} + +static jit_reloc_t +bxsubr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + isubr(_jit, r0, r1); + return jno(_jit); +} + +static jit_reloc_t +bxsubi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_int_p(i1)) { + isubi(_jit, r0, i1); + return jno(_jit); + } + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i1); + unget_temp_gpr(_jit); + return bxsubr(_jit, r0, jit_gpr_regno(reg)); +} + +static jit_reloc_t +bxsubr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + isubr(_jit, r0, r1); + return jnc(_jit); +} + +static jit_reloc_t +bxsubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_int_p(i1)) { + isubi(_jit, r0, i1); + return jnc(_jit); + } + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i1); + unget_temp_gpr(_jit); + return bxsubr_u(_jit, r0, jit_gpr_regno(reg)); +} + +static void +callr(jit_state_t *_jit, int32_t r0) +{ + rex(_jit, 0, 0, _NOREG, _NOREG, r0); + ic(_jit, 0xff); + mrm(_jit, 0x03, 0x02, r7(r0)); +} + +static void +calli(jit_state_t *_jit, jit_word_t i0) +{ + ptrdiff_t rel32 = i0 - (_jit->pc.w + 1 + 4); + if (INT32_MIN <= rel32 && rel32 <= INT32_MAX) + { + ic(_jit, 0xe8); + ii(_jit, rel32); + } + else + { + jit_gpr_t reg = get_temp_gpr(_jit); + jit_patch_there(_jit, mov_addr(_jit, jit_gpr_regno(reg)), (void*)i0); + callr(_jit, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +jmpi_with_link(jit_state_t *_jit, jit_word_t i0) +{ + return calli(_jit, i0); +} + +static void +pop_link_register(jit_state_t *_jit) +{ + /* Treat this instruction as having no effect on the stack size; its + * effect is non-local (across functions) and handled manually. */ + + int saved_frame_size = _jit->frame_size; + popr(_jit, jit_gpr_regno (JIT_LR)); + _jit->frame_size = saved_frame_size; +} + +static void +push_link_register(jit_state_t *_jit) +{ + /* See comment in pop_link_register. */ + + int saved_frame_size = _jit->frame_size; + pushr(_jit, jit_gpr_regno (JIT_LR)); + _jit->frame_size = saved_frame_size; +} + +static void +jmpr(jit_state_t *_jit, int32_t r0) +{ + rex(_jit, 0, WIDE, _NOREG, _NOREG, r0); + ic(_jit, 0xff); + mrm(_jit, 0x03, 0x04, r7(r0)); +} + +static void +jmpi(jit_state_t *_jit, jit_word_t i0) +{ + ptrdiff_t rel8 = i0 - (_jit->pc.w + 1 + 1); + ptrdiff_t rel32 = i0 - (_jit->pc.w + 1 + 4); + if (INT8_MIN <= rel8 && rel8 <= INT8_MAX) + { + ic(_jit, 0xeb); + ic(_jit, rel8); + } + else if (INT32_MIN <= rel32 && rel32 <= INT32_MAX) + { + ic(_jit, 0xe9); + ii(_jit, rel32); + } + else + { + jit_gpr_t reg = get_temp_gpr(_jit); + jit_patch_there(_jit, mov_addr(_jit, jit_gpr_regno(reg)), (void*)i0); + jmpr(_jit, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static jit_reloc_t +jmp(jit_state_t *_jit) +{ + ic(_jit, 0xe9); + return emit_rel32_reloc(_jit, 1); +} + +static void +ret(jit_state_t *_jit) +{ + ic(_jit, 0xc3); +} + +static void +retr(jit_state_t *_jit, int32_t r0) +{ + movr(_jit, _RAX_REGNO, r0); + ret(_jit); +} + +static void +reti(jit_state_t *_jit, jit_word_t i0) +{ + movi(_jit, _RAX_REGNO, i0); + ret(_jit); +} + +static void +retval_c(jit_state_t *_jit, int32_t r0) +{ + extr_c(_jit, r0, _RAX_REGNO); +} + +static void +retval_uc(jit_state_t *_jit, int32_t r0) +{ + extr_uc(_jit, r0, _RAX_REGNO); +} + +static void +retval_s(jit_state_t *_jit, int32_t r0) +{ + extr_s(_jit, r0, _RAX_REGNO); +} + +static void +retval_us(jit_state_t *_jit, int32_t r0) +{ + extr_us(_jit, r0, _RAX_REGNO); +} + +static void +retval_i(jit_state_t *_jit, int32_t r0) +{ +#if __X32 + movr(_jit, r0, _RAX_REGNO); +#else + extr_i(_jit, r0, _RAX_REGNO); +#endif +} + +#if __X64 +static void +retval_ui(jit_state_t *_jit, int32_t r0) +{ + extr_ui(_jit, r0, _RAX_REGNO); +} + +static void +retval_l(jit_state_t *_jit, int32_t r0) +{ + movr(_jit, r0, _RAX_REGNO); +} +#endif + +static void +mfence(jit_state_t *_jit) +{ + ic(_jit, 0x0f); + ic(_jit, 0xae); + ic(_jit, 0xf0); +} + +static void +ldr_atomic(jit_state_t *_jit, int32_t dst, int32_t loc) +{ +#if __X64 + ldr_l(_jit, dst, loc); +#else + ldr_i(_jit, dst, loc); +#endif +} + +static void +str_atomic(jit_state_t *_jit, int32_t loc, int32_t val) +{ +#if __X64 + str_l(_jit, loc, val); +#else + str_i(_jit, loc, val); +#endif + mfence(_jit); +} + +static void +swap_atomic(jit_state_t *_jit, int32_t dst, int32_t loc, int32_t val) +{ + if (dst == val) { + xchgrm(_jit, dst, loc); + } else { + int32_t tmp = jit_gpr_regno(get_temp_gpr(_jit)); + movr(_jit, tmp, val); + xchgrm(_jit, tmp, loc); + movr(_jit, dst, tmp); + unget_temp_gpr(_jit); + } +} + +static void +cas_atomic(jit_state_t *_jit, int32_t dst, int32_t loc, int32_t expected, + int32_t desired) +{ + ASSERT(loc != expected); + ASSERT(loc != desired); + + if (dst == jit_gpr_regno(_RAX)) { + if (loc == dst) { + int32_t tmp = jit_gpr_regno(get_temp_gpr(_jit)); + movr(_jit, tmp ,loc); + movr(_jit, dst, expected); + cmpxchgmr(_jit, tmp, desired); + unget_temp_gpr(_jit); + } else { + movr(_jit, dst, expected); + cmpxchgmr(_jit, loc, desired); + } + } else if (loc == jit_gpr_regno(_RAX)) { + int32_t tmp = jit_gpr_regno(get_temp_gpr(_jit)); + movr(_jit, tmp, loc); + movr(_jit, jit_gpr_regno(_RAX), expected); + cmpxchgmr(_jit, tmp, desired); + movr(_jit, dst, jit_gpr_regno(_RAX)); + movr(_jit, loc, tmp); + unget_temp_gpr(_jit); + } else if (expected == jit_gpr_regno(_RAX)) { + int32_t tmp = jit_gpr_regno(get_temp_gpr(_jit)); + movr(_jit, tmp, expected); + cmpxchgmr(_jit, loc, desired); + movr(_jit, dst, jit_gpr_regno(_RAX)); + movr(_jit, expected, tmp); + unget_temp_gpr(_jit); + } else if (desired == jit_gpr_regno(_RAX)) { + int32_t tmp = jit_gpr_regno(get_temp_gpr(_jit)); + movr(_jit, tmp, desired); + movr(_jit, jit_gpr_regno(_RAX), expected); + cmpxchgmr(_jit, loc, tmp); + movr(_jit, dst, jit_gpr_regno(_RAX)); + movr(_jit, desired, tmp); + unget_temp_gpr(_jit); + } else { + int32_t tmp = jit_gpr_regno(get_temp_gpr(_jit)); + movr(_jit, tmp, jit_gpr_regno(_RAX)); + movr(_jit, jit_gpr_regno(_RAX), expected); + cmpxchgmr(_jit, loc, desired); + movr(_jit, dst, jit_gpr_regno(_RAX)); + movr(_jit, jit_gpr_regno(_RAX), tmp); + unget_temp_gpr(_jit); + } +} + +static void +breakpoint(jit_state_t *_jit) +{ + ic(_jit, 0xcc); +} diff --git a/deps/lightening/lightening/x86-sse.c b/deps/lightening/lightening/x86-sse.c new file mode 100644 index 0000000..ab66dc7 --- /dev/null +++ b/deps/lightening/lightening/x86-sse.c @@ -0,0 +1,1016 @@ +/* + * Copyright (C) 2012-2017, 2019 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#define _XMM0_REGNO 0 +#define _XMM1_REGNO 1 +#define _XMM2_REGNO 2 +#define _XMM3_REGNO 3 +#define _XMM4_REGNO 4 +#define _XMM5_REGNO 5 +#define _XMM6_REGNO 6 +#define _XMM7_REGNO 7 +#define _XMM8_REGNO 8 +#define _XMM9_REGNO 9 +#define _XMM10_REGNO 10 +#define _XMM11_REGNO 11 +#define _XMM12_REGNO 12 +#define _XMM13_REGNO 13 +#define _XMM14_REGNO 14 +#define _XMM15_REGNO 15 +#define X86_SSE_MOV 0x10 +#define X86_SSE_MOV1 0x11 +#define X86_SSE_MOVLP 0x12 +#define X86_SSE_MOVHP 0x16 +#define X86_SSE_MOVA 0x28 +#define X86_SSE_CVTIS 0x2a +#define X86_SSE_CVTTSI 0x2c +#define X86_SSE_CVTSI 0x2d +#define X86_SSE_UCOMI 0x2e +#define X86_SSE_COMI 0x2f +#define X86_SSE_ROUND 0x3a +#define X86_SSE_SQRT 0x51 +#define X86_SSE_RSQRT 0x52 +#define X86_SSE_RCP 0x53 +#define X86_SSE_AND 0x54 +#define X86_SSE_ANDN 0x55 +#define X86_SSE_OR 0x56 +#define X86_SSE_XOR 0x57 +#define X86_SSE_ADD 0x58 +#define X86_SSE_MUL 0x59 +#define X86_SSE_CVTSD 0x5a +#define X86_SSE_CVTDT 0x5b +#define X86_SSE_SUB 0x5c +#define X86_SSE_MIN 0x5d +#define X86_SSE_DIV 0x5e +#define X86_SSE_MAX 0x5f +#define X86_SSE_X2G 0x6e +#define X86_SSE_EQB 0x74 +#define X86_SSE_EQW 0x75 +#define X86_SSE_EQD 0x76 +#define X86_SSE_G2X 0x7e +#define X86_SSE_MOV2 0xd6 + +static void +sser(jit_state_t *_jit, int32_t c, int32_t r0, int32_t r1) +{ + rex(_jit, 0, 0, r0, 0, r1); + ic(_jit, 0x0f); + ic(_jit, c); + mrm(_jit, 0x03, r7(r0), r7(r1)); +} + +static void +ssexr(jit_state_t *_jit, int32_t p, int32_t c, + int32_t r0, int32_t r1) +{ + ic(_jit, p); + rex(_jit, 0, 0, r0, 0, r1); + ic(_jit, 0x0f); + ic(_jit, c); + mrm(_jit, 0x03, r7(r0), r7(r1)); +} + +static void +ssexi(jit_state_t *_jit, int32_t c, int32_t r0, + int32_t m, int32_t i) +{ + ic(_jit, 0x66); + rex(_jit, 0, 0, 0, 0, r0); + ic(_jit, 0x0f); + ic(_jit, c); + mrm(_jit, 0x03, r7(m), r7(r0)); + ic(_jit, i); +} + +static void +sselxr(jit_state_t *_jit, int32_t p, int32_t c, int32_t r0, int32_t r1) +{ + if (__X64) { + ic(_jit, p); + rex(_jit, 0, 1, r0, 0, r1); + ic(_jit, 0x0f); + ic(_jit, c); + mrm(_jit, 0x03, r7(r0), r7(r1)); + } else { + ssexr(_jit, p, c, r0, r1); + } +} + +static void +ssexrx(jit_state_t *_jit, int32_t px, int32_t code, int32_t md, + int32_t rb, int32_t ri, int32_t ms, int32_t rd) +{ + ic(_jit, px); + rex(_jit, 0, 0, rd, ri, rb); + ic(_jit, 0x0f); + ic(_jit, code); + rx(_jit, rd, md, rb, ri, ms); +} + +static void +movdlxr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0x66, X86_SSE_X2G, r0, r1); +} + +static void movdqxr(jit_state_t *_jit, int32_t r0, int32_t r1) maybe_unused; +static void +movdqxr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + sselxr(_jit, 0x66, X86_SSE_X2G, r0, r1); +} + +static void +movssmr(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms, int32_t rd) +{ + ssexrx(_jit, 0xf3, X86_SSE_MOV, md, rb, ri, ms, rd); +} +static void +movsdmr(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms, int32_t rd) +{ + ssexrx(_jit, 0xf2, X86_SSE_MOV, md, rb, ri, ms, rd); +} +static void +movssrm(jit_state_t *_jit, int32_t rs, int32_t md, int32_t mb, int32_t mi, int32_t ms) +{ + ssexrx(_jit, 0xf3, X86_SSE_MOV1, md, mb, mi, ms, rs); +} +static void +movsdrm(jit_state_t *_jit, int32_t rs, int32_t md, int32_t mb, int32_t mi, int32_t ms) +{ + ssexrx(_jit, 0xf2, X86_SSE_MOV1, md, mb, mi, ms, rs); +} + +static void +movr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 != r1) + ssexr(_jit, 0xf3, X86_SSE_MOV, r0, r1); +} + +static void +movr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 != r1) + ssexr(_jit, 0xf2, X86_SSE_MOV, r0, r1); +} + +static void +addssr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf3, X86_SSE_ADD, r0, r1); +} +static void +addsdr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf2, X86_SSE_ADD, r0, r1); +} +static void +subssr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf3, X86_SSE_SUB, r0, r1); +} +static void +subsdr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf2, X86_SSE_SUB, r0, r1); +} +static void +mulssr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf3, X86_SSE_MUL, r0, r1); +} +static void +mulsdr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf2, X86_SSE_MUL, r0, r1); +} +static void +divssr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf3, X86_SSE_DIV, r0, r1); +} +static void +divsdr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf2, X86_SSE_DIV, r0, r1); +} +static void +andpsr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + sser(_jit, X86_SSE_AND, r0, r1); +} +static void +andpdr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0x66, X86_SSE_AND, r0, r1); +} +static void +truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf3, X86_SSE_CVTTSI, r0, r1); +} +static void +truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf2, X86_SSE_CVTTSI, r0, r1); +} +#if __X64 +static void +truncr_f_l(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + sselxr(_jit, 0xf3, X86_SSE_CVTTSI, r0, r1); +} +static void +truncr_d_l(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + sselxr(_jit, 0xf2, X86_SSE_CVTTSI, r0, r1); +} +#endif +static void +extr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + sselxr(_jit, 0xf3, X86_SSE_CVTIS, r0, r1); +} +static void +extr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + sselxr(_jit, 0xf2, X86_SSE_CVTIS, r0, r1); +} + +static void +extr_f_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf3, X86_SSE_CVTSD, r0, r1); +} +static void +extr_d_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf2, X86_SSE_CVTSD, r0, r1); +} +static void +ucomissr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + sser(_jit, X86_SSE_UCOMI, r0, r1); +} +static void +ucomisdr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0x66, X86_SSE_UCOMI, r0, r1); +} +static void +xorpsr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + sser(_jit, X86_SSE_XOR, r0, r1); +} +static void +xorpdr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0x66, X86_SSE_XOR, r0, r1); +} +static void orpdr(jit_state_t *_jit, int32_t r0, int32_t r1) maybe_unused; +static void +orpdr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0x66, X86_SSE_OR, r0, r1); +} +static void +pcmpeqlr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0x66, X86_SSE_EQD, r0, r1); +} +static void +psrl(jit_state_t *_jit, int32_t r0, int32_t i0) +{ + ssexi(_jit, 0x72, r0, 0x02, i0); +} +static void +psrq(jit_state_t *_jit, int32_t r0, int32_t i0) +{ + ssexi(_jit, 0x73, r0, 0x02, i0); +} +static void +pslq(jit_state_t *_jit, int32_t r0, int32_t i0) +{ + ssexi(_jit, 0x73, r0, 0x06, i0); +} +static void +sqrtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf3, X86_SSE_SQRT, r0, r1); +} +static void +sqrtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf2, X86_SSE_SQRT, r0, r1); +} +static void +ldr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + movssmr(_jit, 0, r1, _NOREG, _SCL1, r0); +} +static void +str_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + movssrm(_jit, r1, 0, r0, _NOREG, _SCL1); +} +static void +ldr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + movsdmr(_jit, 0, r1, _NOREG, _SCL1, r0); +} +static void +str_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + movsdrm(_jit, r1, 0, r0, _NOREG, _SCL1); +} + +static void +movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t i0) +{ + union { + int32_t i; + jit_float32_t f; + } data; + + data.f = i0; + if (data.f == 0.0 && !(data.i & 0x80000000)) + xorpsr(_jit, r0, r0); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), data.i); + movdlxr(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t i0) +{ + union { + int32_t ii[2]; + jit_word_t w; + jit_float64_t d; + } data; + + data.d = i0; + if (data.d == 0.0 && !(data.ii[1] & 0x80000000)) + xorpdr(_jit, r0, r0); + else { + jit_gpr_t ireg = get_temp_gpr(_jit); +#if __X64 + movi(_jit, jit_gpr_regno(ireg), data.w); + movdqxr(_jit, r0, jit_gpr_regno(ireg)); + unget_temp_gpr(_jit); +#else + jit_fpr_t freg = get_temp_fpr(_jit); + movi(_jit, jit_gpr_regno(ireg), data.ii[1]); + movdlxr(_jit, jit_fpr_regno(freg), jit_gpr_regno(ireg)); + pslq(_jit, jit_fpr_regno(freg), 32); + movi(_jit, jit_gpr_regno(ireg), data.ii[0]); + movdlxr(_jit, r0, jit_gpr_regno(ireg)); + orpdr(_jit, r0, jit_fpr_regno(freg)); + unget_temp_fpr(_jit); + unget_temp_gpr(_jit); +#endif + } +} + +#if __X32 +static void +x87rx(jit_state_t *_jit, int32_t code, int32_t md, + int32_t rb, int32_t ri, int32_t ms) +{ + rex(_jit, 0, 1, rb, ri, _NOREG); + ic(_jit, 0xd8 | (code >> 3)); + rx(_jit, (code & 7), md, rb, ri, ms); +} + +static void +fldsm(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms) +{ + return x87rx(_jit, 010, md, rb, ri, ms); +} + +static void +fstsm(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms) +{ + return x87rx(_jit, 013, md, rb, ri, ms); +} + +static void +fldlm(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms) +{ + return x87rx(_jit, 050, md, rb, ri, ms); +} + +static void +fstlm(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms) +{ + return x87rx(_jit, 053, md, rb, ri, ms); +} +#endif + +static void +retval_f(jit_state_t *_jit, int32_t r0) +{ +#if __X32 + subi(_jit, _RSP_REGNO, _RSP_REGNO, 4); + fstsm(_jit, 0, _RSP_REGNO, _NOREG, _SCL1); + ldr_f(_jit, r0, _RSP_REGNO); + addi(_jit, _RSP_REGNO, _RSP_REGNO, 4); +#else + movr_f(_jit, r0, _XMM0_REGNO); +#endif +} + +static void +retval_d(jit_state_t *_jit, int32_t r0) +{ +#if __X32 + subi(_jit, _RSP_REGNO, _RSP_REGNO, 8); + fstlm(_jit, 0, _RSP_REGNO, _NOREG, _SCL1); + ldr_d(_jit, r0, _RSP_REGNO); + addi(_jit, _RSP_REGNO, _RSP_REGNO, 8); +#else + movr_d(_jit, r0, _XMM0_REGNO); +#endif +} + +static void +retr_f(jit_state_t *_jit, int32_t u) +{ +#if __X32 + subi(_jit, _RSP_REGNO, _RSP_REGNO, 4); + str_f(_jit, _RSP_REGNO, u); + fldsm(_jit, 0, _RSP_REGNO, _NOREG, _SCL1); + addi(_jit, _RSP_REGNO, _RSP_REGNO, 4); +#else + movr_f(_jit, _XMM0_REGNO, u); +#endif + ret(_jit); +} + +static void +retr_d(jit_state_t *_jit, int32_t u) +{ +#if __X32 + subi(_jit, _RSP_REGNO, _RSP_REGNO, 8); + str_d(_jit, _RSP_REGNO, u); + fldlm(_jit, 0, _RSP_REGNO, _NOREG, _SCL1); + addi(_jit, _RSP_REGNO, _RSP_REGNO, 8); +#else + movr_d(_jit, _XMM0_REGNO, u); +#endif + ret(_jit); +} + +static void +addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) + addssr(_jit, r0, r2); + else if (r0 == r2) + addssr(_jit, r0, r1); + else { + movr_f(_jit, r0, r1); + addssr(_jit, r0, r2); + } +} + +static void +addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) + addsdr(_jit, r0, r2); + else if (r0 == r2) + addsdr(_jit, r0, r1); + else { + movr_d(_jit, r0, r1); + addsdr(_jit, r0, r2); + } +} + +static void +subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) + subssr(_jit, r0, r2); + else if (r0 == r2) { + jit_fpr_t reg = get_temp_fpr(_jit); + movr_f(_jit, jit_fpr_regno(reg), r0); + movr_f(_jit, r0, r1); + subssr(_jit, r0, jit_fpr_regno(reg)); + unget_temp_fpr(_jit); + } + else { + movr_f(_jit, r0, r1); + subssr(_jit, r0, r2); + } +} + +static void +subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) + subsdr(_jit, r0, r2); + else if (r0 == r2) { + jit_fpr_t reg = get_temp_fpr(_jit); + movr_d(_jit, jit_fpr_regno(reg), r0); + movr_d(_jit, r0, r1); + subsdr(_jit, r0, jit_fpr_regno(reg)); + unget_temp_fpr(_jit); + } + else { + movr_d(_jit, r0, r1); + subsdr(_jit, r0, r2); + } +} + +static void +mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) + mulssr(_jit, r0, r2); + else if (r0 == r2) + mulssr(_jit, r0, r1); + else { + movr_f(_jit, r0, r1); + mulssr(_jit, r0, r2); + } +} + +static void +mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) + mulsdr(_jit, r0, r2); + else if (r0 == r2) + mulsdr(_jit, r0, r1); + else { + movr_d(_jit, r0, r1); + mulsdr(_jit, r0, r2); + } +} + +static void +divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) + divssr(_jit, r0, r2); + else if (r0 == r2) { + jit_fpr_t reg = get_temp_fpr(_jit); + movr_f(_jit, jit_fpr_regno(reg), r0); + movr_f(_jit, r0, r1); + divssr(_jit, r0, jit_fpr_regno(reg)); + unget_temp_fpr(_jit); + } + else { + movr_f(_jit, r0, r1); + divssr(_jit, r0, r2); + } +} + +static void +divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) + divsdr(_jit, r0, r2); + else if (r0 == r2) { + jit_fpr_t reg = get_temp_fpr(_jit); + movr_d(_jit, jit_fpr_regno(reg), r0); + movr_d(_jit, r0, r1); + divsdr(_jit, r0, jit_fpr_regno(reg)); + unget_temp_fpr(_jit); + } + else { + movr_d(_jit, r0, r1); + divsdr(_jit, r0, r2); + } +} + +static void +absr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 == r1) { + jit_fpr_t reg = get_temp_fpr(_jit); + pcmpeqlr(_jit, jit_fpr_regno(reg), jit_fpr_regno(reg)); + psrl(_jit, jit_fpr_regno(reg), 1); + andpsr(_jit, r0, jit_fpr_regno(reg)); + unget_temp_fpr(_jit); + } + else { + pcmpeqlr(_jit, r0, r0); + psrl(_jit, r0, 1); + andpsr(_jit, r0, r1); + } +} + +static void +absr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 == r1) { + jit_fpr_t reg = get_temp_fpr(_jit); + pcmpeqlr(_jit, jit_fpr_regno(reg), jit_fpr_regno(reg)); + psrq(_jit, jit_fpr_regno(reg), 1); + andpdr(_jit, r0, jit_fpr_regno(reg)); + unget_temp_fpr(_jit); + } + else { + pcmpeqlr(_jit, r0, r0); + psrq(_jit, r0, 1); + andpdr(_jit, r0, r1); + } +} + +static void +negr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + jit_gpr_t ireg = get_temp_gpr(_jit); + imovi(_jit, jit_gpr_regno(ireg), 0x80000000); + if (r0 == r1) { + jit_fpr_t freg = get_temp_fpr(_jit); + movdlxr(_jit, jit_fpr_regno(freg), jit_gpr_regno(ireg)); + xorpsr(_jit, r0, jit_fpr_regno(freg)); + unget_temp_fpr(_jit); + } else { + movdlxr(_jit, r0, jit_gpr_regno(ireg)); + xorpsr(_jit, r0, r1); + } + unget_temp_gpr(_jit); +} + +static void +negr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + jit_gpr_t ireg = get_temp_gpr(_jit); + imovi(_jit, jit_gpr_regno(ireg), 0x80000000); + if (r0 == r1) { + jit_fpr_t freg = get_temp_fpr(_jit); + movdlxr(_jit, jit_fpr_regno(freg), jit_gpr_regno(ireg)); + pslq(_jit, jit_fpr_regno(freg), 32); + xorpdr(_jit, r0, jit_fpr_regno(freg)); + unget_temp_fpr(_jit); + } else { + movdlxr(_jit, r0, jit_gpr_regno(ireg)); + pslq(_jit, r0, 32); + xorpdr(_jit, r0, r1); + } + unget_temp_gpr(_jit); +} + +static void +ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) + movssmr(_jit, i0, _NOREG, _NOREG, _SCL1, r0); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + ldr_f(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + movssmr(_jit, 0, r1, r2, _SCL1, r0); +} + +static void +ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) + movssmr(_jit, i0, r1, _NOREG, _SCL1, r0); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + ldxr_f(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_int_p(i0)) + movssrm(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + str_f(_jit, jit_gpr_regno(reg), r0); + unget_temp_gpr(_jit); + } +} + +static void +stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + movssrm(_jit, r2, 0, r0, r1, _SCL1); +} + +static void +stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (can_sign_extend_int_p(i0)) + movssrm(_jit, r1, i0, r0, _NOREG, _SCL1); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + stxr_f(_jit, jit_gpr_regno(reg), r0, r1); + unget_temp_gpr(_jit); + } +} + +static jit_reloc_t +bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomissr(_jit, r1, r0); + return ja(_jit); +} + +static jit_reloc_t +bler_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomissr(_jit, r1, r0); + return jae(_jit); +} + +static jit_reloc_t +beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomissr(_jit, r0, r1); + jit_reloc_t pos = jps(_jit); + jit_reloc_t ret = je(_jit); + jit_patch_here(_jit, pos); + return ret; +} + +static jit_reloc_t +bger_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomissr(_jit, r0, r1); + return jae(_jit); +} + +static jit_reloc_t +bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomissr(_jit, r0, r1); + return ja(_jit); +} + +static jit_reloc_t +bner_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomissr(_jit, r0, r1); + jit_reloc_t pos = jps(_jit); + jit_reloc_t zero = jzs(_jit); + jit_patch_here(_jit, pos); + jit_reloc_t ret = jmp(_jit); + jit_patch_here(_jit, zero); + return ret; +} + +static jit_reloc_t +bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomissr(_jit, r0, r1); + return jnae(_jit); +} + +static jit_reloc_t +bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomissr(_jit, r0, r1); + return jna(_jit); +} + +static jit_reloc_t +buneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomissr(_jit, r0, r1); + return je(_jit); +} + +static jit_reloc_t +bunger_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomissr(_jit, r1, r0); + return jna(_jit); +} + +static jit_reloc_t +bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomissr(_jit, r1, r0); + return jnae(_jit); +} + +static jit_reloc_t +bltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomissr(_jit, r0, r1); + return jne(_jit); +} + +static jit_reloc_t +bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomissr(_jit, r0, r1); + return jnp(_jit); +} + +static jit_reloc_t +bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomissr(_jit, r0, r1); + return jp(_jit); +} + +static void +ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) + movsdmr(_jit, i0, _NOREG, _NOREG, _SCL1, r0); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + ldr_d(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + movsdmr(_jit, 0, r1, r2, _SCL1, r0); +} + +static void +ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) + movsdmr(_jit, i0, r1, _NOREG, _SCL1, r0); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + ldxr_d(_jit, r0, r1, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); + } +} + +static void +sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_int_p(i0)) + movsdrm(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + str_d(_jit, jit_gpr_regno(reg), r0); + unget_temp_gpr(_jit); + } +} + +static void +stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + movsdrm(_jit, r2, 0, r0, r1, _SCL1); +} + +static void +stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (can_sign_extend_int_p(i0)) + movsdrm(_jit, r1, i0, r0, _NOREG, _SCL1); + else { + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + stxr_d(_jit, jit_gpr_regno(reg), r0, r1); + unget_temp_gpr(_jit); + } +} + +static jit_reloc_t +bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r1, r0); + return ja(_jit); +} + +static jit_reloc_t +bler_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r1, r0); + return jae(_jit); +} + +static jit_reloc_t +beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + jit_reloc_t pos = jps(_jit); + jit_reloc_t ret = je(_jit); + jit_patch_here(_jit, pos); + return ret; +} + +static jit_reloc_t +bger_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + return jae(_jit); +} + +static jit_reloc_t +bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + return ja(_jit); +} + +static jit_reloc_t +bner_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + jit_reloc_t pos = jps(_jit); + jit_reloc_t zero = jzs(_jit); + jit_patch_here(_jit, pos); + jit_reloc_t ret = jmp(_jit); + jit_patch_here(_jit, zero); + return ret; +} + +static jit_reloc_t +bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + return jnae(_jit); +} + +static jit_reloc_t +bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + return jna(_jit); +} + +static jit_reloc_t +buneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + return je(_jit); +} + +static jit_reloc_t +bunger_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r1, r0); + return jna(_jit); +} + +static jit_reloc_t +bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r1, r0); + return jnae(_jit); +} + +static jit_reloc_t +bltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + return jne(_jit); +} + +static jit_reloc_t +bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + return jnp(_jit); +} + +static jit_reloc_t +bunordr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + return jp(_jit); +} diff --git a/deps/lightening/lightening/x86.c b/deps/lightening/lightening/x86.c new file mode 100644 index 0000000..f8ac4b0 --- /dev/null +++ b/deps/lightening/lightening/x86.c @@ -0,0 +1,407 @@ +/* + * Copyright (C) 2012-2020 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#define _NOREG 0xffff + +typedef struct { + /* x87 present */ + uint32_t fpu : 1; + /* cmpxchg8b instruction */ + uint32_t cmpxchg8b : 1; + /* cmov and fcmov branchless conditional mov */ + uint32_t cmov : 1; + /* mmx registers/instructions available */ + uint32_t mmx : 1; + /* sse registers/instructions available */ + uint32_t sse : 1; + /* sse2 registers/instructions available */ + uint32_t sse2 : 1; + /* sse3 instructions available */ + uint32_t sse3 : 1; + /* pcmulqdq instruction */ + uint32_t pclmulqdq : 1; + /* ssse3 suplemental sse3 instructions available */ + uint32_t ssse3 : 1; + /* fused multiply/add using ymm state */ + uint32_t fma : 1; + /* cmpxchg16b instruction */ + uint32_t cmpxchg16b : 1; + /* sse4.1 instructions available */ + uint32_t sse4_1 : 1; + /* sse4.2 instructions available */ + uint32_t sse4_2 : 1; + /* movbe instruction available */ + uint32_t movbe : 1; + /* popcnt instruction available */ + uint32_t popcnt : 1; + /* aes instructions available */ + uint32_t aes : 1; + /* avx instructions available */ + uint32_t avx : 1; + /* lahf/sahf available in 64 bits mode */ + uint32_t lahf : 1; +} jit_cpu_t; + +static jit_cpu_t jit_cpu; + +static inline jit_reloc_t +emit_rel8_reloc (jit_state_t *_jit, uint8_t inst_start) +{ + uint8_t *loc = _jit->pc.uc; + emit_u8 (_jit, 0); + return jit_reloc(_jit, JIT_RELOC_REL8, inst_start, loc, _jit->pc.uc, 0); +} + +static inline jit_reloc_t +emit_rel32_reloc (jit_state_t *_jit, uint8_t inst_start) +{ + uint8_t *loc = _jit->pc.uc; + emit_u32 (_jit, 0); + return jit_reloc(_jit, JIT_RELOC_REL32, inst_start, loc, _jit->pc.uc, 0); +} + +#include "x86-cpu.c" +#include "x86-sse.c" + +jit_bool_t +jit_get_cpu(void) +{ + union { + struct { + uint32_t sse3 : 1; + uint32_t pclmulqdq : 1; + uint32_t dtes64 : 1; /* amd reserved */ + uint32_t monitor : 1; + uint32_t ds_cpl : 1; /* amd reserved */ + uint32_t vmx : 1; /* amd reserved */ + uint32_t smx : 1; /* amd reserved */ + uint32_t est : 1; /* amd reserved */ + uint32_t tm2 : 1; /* amd reserved */ + uint32_t ssse3 : 1; + uint32_t cntx_id : 1; /* amd reserved */ + uint32_t __reserved0 : 1; + uint32_t fma : 1; + uint32_t cmpxchg16b : 1; + uint32_t xtpr : 1; /* amd reserved */ + uint32_t pdcm : 1; /* amd reserved */ + uint32_t __reserved1 : 1; + uint32_t pcid : 1; /* amd reserved */ + uint32_t dca : 1; /* amd reserved */ + uint32_t sse4_1 : 1; + uint32_t sse4_2 : 1; + uint32_t x2apic : 1; /* amd reserved */ + uint32_t movbe : 1; /* amd reserved */ + uint32_t popcnt : 1; + uint32_t tsc : 1; /* amd reserved */ + uint32_t aes : 1; + uint32_t xsave : 1; + uint32_t osxsave : 1; + uint32_t avx : 1; + uint32_t __reserved2 : 1; /* amd F16C */ + uint32_t __reserved3 : 1; + uint32_t __alwayszero : 1; /* amd RAZ */ + } bits; + jit_uword_t cpuid; + } ecx; + union { + struct { + uint32_t fpu : 1; + uint32_t vme : 1; + uint32_t de : 1; + uint32_t pse : 1; + uint32_t tsc : 1; + uint32_t msr : 1; + uint32_t pae : 1; + uint32_t mce : 1; + uint32_t cmpxchg8b : 1; + uint32_t apic : 1; + uint32_t __reserved0 : 1; + uint32_t sep : 1; + uint32_t mtrr : 1; + uint32_t pge : 1; + uint32_t mca : 1; + uint32_t cmov : 1; + uint32_t pat : 1; + uint32_t pse36 : 1; + uint32_t psn : 1; /* amd reserved */ + uint32_t clfsh : 1; + uint32_t __reserved1 : 1; + uint32_t ds : 1; /* amd reserved */ + uint32_t acpi : 1; /* amd reserved */ + uint32_t mmx : 1; + uint32_t fxsr : 1; + uint32_t sse : 1; + uint32_t sse2 : 1; + uint32_t ss : 1; /* amd reserved */ + uint32_t htt : 1; + uint32_t tm : 1; /* amd reserved */ + uint32_t __reserved2 : 1; + uint32_t pbe : 1; /* amd reserved */ + } bits; + jit_uword_t cpuid; + } edx; +#if __X32 + int ac, flags; +#endif + jit_uword_t eax, ebx; + +#if __X32 + /* adapted from glibc __sysconf */ + __asm__ volatile ("pushfl;\n\t" + "popl %0;\n\t" + "movl $0x240000, %1;\n\t" + "xorl %0, %1;\n\t" + "pushl %1;\n\t" + "popfl;\n\t" + "pushfl;\n\t" + "popl %1;\n\t" + "xorl %0, %1;\n\t" + "pushl %0;\n\t" + "popfl" + : "=r" (flags), "=r" (ac)); + + /* i386 or i486 without cpuid */ + if ((ac & (1 << 21)) == 0) + /* probably without x87 as well */ + return 0; +#endif + + /* query %eax = 1 function */ + __asm__ volatile ( +#if __X32 || __X64_32 + "xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" +#else + "xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1" +#endif + : "=a" (eax), "=r" (ebx), + "=c" (ecx.cpuid), "=d" (edx.cpuid) + : "0" (1)); + + jit_cpu.fpu = edx.bits.fpu; + jit_cpu.cmpxchg8b = edx.bits.cmpxchg8b; + jit_cpu.cmov = edx.bits.cmov; + jit_cpu.mmx = edx.bits.mmx; + jit_cpu.sse = edx.bits.sse; + jit_cpu.sse2 = edx.bits.sse2; + jit_cpu.sse3 = ecx.bits.sse3; + jit_cpu.pclmulqdq = ecx.bits.pclmulqdq; + jit_cpu.ssse3 = ecx.bits.ssse3; + jit_cpu.fma = ecx.bits.fma; + jit_cpu.cmpxchg16b = ecx.bits.cmpxchg16b; + jit_cpu.sse4_1 = ecx.bits.sse4_1; + jit_cpu.sse4_2 = ecx.bits.sse4_2; + jit_cpu.movbe = ecx.bits.movbe; + jit_cpu.popcnt = ecx.bits.popcnt; + jit_cpu.aes = ecx.bits.aes; + jit_cpu.avx = ecx.bits.avx; + + /* query %eax = 0x80000001 function */ +#if __X64 + __asm__ volatile ( +# if __X64_32 + "xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" +# else + "xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1" +# endif + : "=a" (eax), "=r" (ebx), + "=c" (ecx.cpuid), "=d" (edx.cpuid) + : "0" (0x80000001)); + jit_cpu.lahf = ecx.cpuid & 1; +#endif + + return jit_cpu.sse2; +} + +jit_bool_t +jit_init(jit_state_t *_jit) +{ + return jit_cpu.sse2; +} + +static const jit_gpr_t abi_gpr_args[] = { +#if __X32 + /* No GPRs in args. */ +#elif __CYGWIN__ + _RCX, _RDX, _R8, _R9 +#else + _RDI, _RSI, _RDX, _RCX, _R8, _R9 +#endif +}; + +static const jit_fpr_t abi_fpr_args[] = { +#if __X32 + /* No FPRs in args. */ +#elif __CYGWIN__ + _XMM0, _XMM1, _XMM2, _XMM3 +#else + _XMM0, _XMM1, _XMM2, _XMM3, _XMM4, _XMM5, _XMM6, _XMM7 +#endif +}; + +static const int abi_gpr_arg_count = sizeof(abi_gpr_args) / sizeof(abi_gpr_args[0]); +static const int abi_fpr_arg_count = sizeof(abi_fpr_args) / sizeof(abi_fpr_args[0]); + +struct abi_arg_iterator +{ + const jit_operand_t *args; + size_t argc; + + size_t arg_idx; + size_t gpr_idx; + size_t fpr_idx; + size_t stack_size; + size_t stack_padding; +}; + +static size_t +jit_operand_abi_sizeof(enum jit_operand_abi abi) +{ + switch (abi) { + case JIT_OPERAND_ABI_UINT8: + case JIT_OPERAND_ABI_INT8: + return 1; + case JIT_OPERAND_ABI_UINT16: + case JIT_OPERAND_ABI_INT16: + return 2; + case JIT_OPERAND_ABI_UINT32: + case JIT_OPERAND_ABI_INT32: + return 4; + case JIT_OPERAND_ABI_UINT64: + case JIT_OPERAND_ABI_INT64: + return 8; + case JIT_OPERAND_ABI_POINTER: + return CHOOSE_32_64(4, 8); + case JIT_OPERAND_ABI_FLOAT: + return 4; + case JIT_OPERAND_ABI_DOUBLE: + return 8; + default: + abort(); + } +} + +static size_t +round_size_up_to_words(size_t bytes) +{ + size_t word_size = CHOOSE_32_64(4, 8); + size_t words = (bytes + word_size - 1) / word_size; + return words * word_size; +} + +static size_t +jit_initial_frame_size (void) +{ + return __WORDSIZE / 8; // Saved return address is on stack. +} + +static void +reset_abi_arg_iterator(struct abi_arg_iterator *iter, size_t argc, + const jit_operand_t *args) +{ + memset(iter, 0, sizeof *iter); + iter->argc = argc; + iter->args = args; +#if __CYGWIN__ && __X64 + // Reserve slots on the stack for 4 register parameters (8 bytes each). + iter->stack_size = 32; +#endif +} + +static void +next_abi_arg(struct abi_arg_iterator *iter, jit_operand_t *arg) +{ + ASSERT(iter->arg_idx < iter->argc); + enum jit_operand_abi abi = iter->args[iter->arg_idx].abi; + if (is_gpr_arg(abi) && iter->gpr_idx < abi_gpr_arg_count) { + *arg = jit_operand_gpr (abi, abi_gpr_args[iter->gpr_idx++]); +#ifdef __CYGWIN__ + iter->fpr_idx++; +#endif + } else if (is_fpr_arg(abi) && iter->fpr_idx < abi_fpr_arg_count) { + *arg = jit_operand_fpr (abi, abi_fpr_args[iter->fpr_idx++]); +#ifdef __CYGWIN__ + iter->gpr_idx++; +#endif + } else { + *arg = jit_operand_mem (abi, JIT_SP, iter->stack_size); + size_t bytes = jit_operand_abi_sizeof (abi); + iter->stack_size += round_size_up_to_words (bytes); + } + iter->arg_idx++; +} + +static void +jit_flush(void *fptr, void *tptr) +{ +} + +static inline size_t +jit_stack_alignment(void) +{ + return 16; +} + +static void +jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc, jit_pointer_t addr) +{ + uint8_t *loc = _jit->start + reloc.offset; + uint8_t *start = loc - reloc.inst_start_offset; + uint8_t *end = _jit->pc.uc; + jit_imm_t i0 = (jit_imm_t)addr; + + if (loc == start) + return; + + if (start < (uint8_t*)addr && (uint8_t*)addr <= end) + return; + + switch (reloc.kind) + { + case JIT_RELOC_ABSOLUTE: { + _jit->pc.uc = start; + ASSERT((loc[-1] & ~7) == 0xb8); // MOVI + int32_t r0 = loc[-1] & 7; + if (start != loc - 1) { + ASSERT(start == loc - 2); + r0 |= (loc[-2] & 1) << 3; + } + return movi(_jit, r0, i0); + } + case JIT_RELOC_REL8: + ASSERT((loc[-1] & ~0xf) == 0x70 || loc[-1] == 0xeb); // JCCSI or JMPSI + /* Nothing useful to do. */ + return; + case JIT_RELOC_REL32: + _jit->pc.uc = start; + if (start[0] == 0xe9) { // JMP + return jmpi(_jit, i0); + } + ASSERT(start[0] == 0x0f); // JCC + return jcci(_jit, start[1] & ~0x80, i0); + default: + /* We don't emit other kinds of reloc. */ + abort (); + } +} + +static void* +bless_function_pointer(void *ptr) +{ + return ptr; +} diff --git a/deps/lightening/lightening/x86.h b/deps/lightening/lightening/x86.h new file mode 100644 index 0000000..4eaaf95 --- /dev/null +++ b/deps/lightening/lightening/x86.h @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#ifndef _jit_x86_h +#define _jit_x86_h + +#if __WORDSIZE == 32 +# if defined(__x86_64__) +# define __X64 1 +# define __X64_32 1 +# define __X32 0 +# else +# define __X64 0 +# define __X64_32 0 +# define __X32 1 +# endif +#else +# define __X64 1 +# define __X64_32 0 +# define __X32 0 +#endif + +#define _RAX JIT_GPR(0) +#define _RCX JIT_GPR(1) +#define _RDX JIT_GPR(2) +#define _RBX JIT_GPR(3) +#define _RSP JIT_GPR(4) +#define _RBP JIT_GPR(5) +#define _RSI JIT_GPR(6) +#define _RDI JIT_GPR(7) + +#define _XMM0 JIT_FPR(0) +#define _XMM1 JIT_FPR(1) +#define _XMM2 JIT_FPR(2) +#define _XMM3 JIT_FPR(3) +#define _XMM4 JIT_FPR(4) +#define _XMM5 JIT_FPR(5) +#define _XMM6 JIT_FPR(6) +#define _XMM7 JIT_FPR(7) + +#if __X64 +# define _R8 JIT_GPR(8) +# define _R9 JIT_GPR(9) +# define _R10 JIT_GPR(10) +# define _R11 JIT_GPR(11) +# define _R12 JIT_GPR(12) +# define _R13 JIT_GPR(13) +# define _R14 JIT_GPR(14) +# define _R15 JIT_GPR(15) +# define _XMM8 JIT_FPR(8) +# define _XMM9 JIT_FPR(9) +# define _XMM10 JIT_FPR(10) +# define _XMM11 JIT_FPR(11) +# define _XMM12 JIT_FPR(12) +# define _XMM13 JIT_FPR(13) +# define _XMM14 JIT_FPR(14) +# define _XMM15 JIT_FPR(15) +#endif + +#define JIT_SP _RSP +#define JIT_LR JIT_TMP0 +#if __X32 +# define JIT_R0 _RAX +# define JIT_R1 _RCX +# define JIT_R2 _RDX +# define JIT_V0 _RBP +# define JIT_V1 _RSI +# define JIT_V2 _RDI +# define JIT_TMP0 _RBX +# define JIT_F0 _XMM0 +# define JIT_F1 _XMM1 +# define JIT_F2 _XMM2 +# define JIT_F3 _XMM3 +# define JIT_F4 _XMM4 +# define JIT_F5 _XMM5 +# define JIT_F6 _XMM6 +# define JIT_FTMP _XMM7 +# define JIT_PLATFORM_CALLEE_SAVE_GPRS JIT_TMP0 +#elif __CYGWIN__ +# define JIT_R0 _RAX +# define JIT_R1 _RCX +# define JIT_R2 _RDX +# define JIT_R3 _R8 +# define JIT_R4 _R9 +# define JIT_R5 _R10 +# define JIT_TMP0 _R11 +# define JIT_V0 _RBX +# define JIT_V1 _RSI +# define JIT_V2 _RDI +# define JIT_V3 _R12 +# define JIT_V4 _R13 +# define JIT_V5 _R14 +# define JIT_V6 _R15 +# define JIT_F0 _XMM0 +# define JIT_F1 _XMM1 +# define JIT_F2 _XMM2 +# define JIT_F3 _XMM3 +# define JIT_F4 _XMM4 +# define JIT_FTMP _XMM5 +# define JIT_VF0 _XMM6 +# define JIT_VF1 _XMM7 +# define JIT_VF2 _XMM8 +# define JIT_VF3 _XMM9 +# define JIT_VF4 _XMM10 +# define JIT_VF5 _XMM11 +# define JIT_VF6 _XMM12 +# define JIT_VF7 _XMM13 +# define JIT_VF8 _XMM14 +# define JIT_VF9 _XMM15 +# define JIT_PLATFORM_CALLEE_SAVE_GPRS /**/ +#else +# define JIT_R0 _RAX +# define JIT_R1 _RCX +# define JIT_R2 _RDX +# define JIT_R3 _RSI +# define JIT_R4 _RDI +# define JIT_R5 _R8 +# define JIT_R6 _R9 +# define JIT_R7 _R10 +# define JIT_TMP0 _R11 +# define JIT_V0 _RBX +# define JIT_V1 _R12 +# define JIT_V2 _R13 +# define JIT_V3 _R14 +# define JIT_V4 _R15 +# define JIT_F0 _XMM0 +# define JIT_F1 _XMM1 +# define JIT_F2 _XMM2 +# define JIT_F3 _XMM3 +# define JIT_F4 _XMM4 +# define JIT_F5 _XMM5 +# define JIT_F6 _XMM6 +# define JIT_F7 _XMM7 +# define JIT_F8 _XMM8 +# define JIT_F9 _XMM9 +# define JIT_F10 _XMM10 +# define JIT_F11 _XMM11 +# define JIT_F12 _XMM12 +# define JIT_F13 _XMM13 +# define JIT_F14 _XMM14 +# define JIT_FTMP _XMM15 +# define JIT_PLATFORM_CALLEE_SAVE_GPRS /**/ +#endif + +#define JIT_PLATFORM_CALLEE_SAVE_FPRS + +#endif /* _jit_x86_h */ diff --git a/deps/lightening/lightning.texi b/deps/lightening/lightning.texi new file mode 100644 index 0000000..88f397a --- /dev/null +++ b/deps/lightening/lightning.texi @@ -0,0 +1,1760 @@ +\input texinfo.tex @c -*- texinfo -*- +@c %**start of header (This is for running Texinfo on a region.) + +@setfilename lightning.info + +@set TITLE Using @sc{gnu} @i{lightning} +@set TOPIC installing and using + +@settitle @value{TITLE} + +@c --------------------------------------------------------------------- +@c Common macros +@c --------------------------------------------------------------------- + +@macro bulletize{a} +@item +\a\ +@end macro + +@macro rem{a} +@r{@i{\a\}} +@end macro + +@macro gnu{} +@sc{gnu} +@end macro + +@macro lightning{} +@gnu{} @i{lightning} +@end macro + +@c --------------------------------------------------------------------- +@c Macros for Texinfo 3.1/4.0 compatibility +@c --------------------------------------------------------------------- + +@c @hlink (macro), @url and @email are used instead of @uref for Texinfo 3.1 +@c compatibility +@macro hlink{url, link} +\link\ (\url\) +@end macro + +@c ifhtml can only be true in Texinfo 4.0, which has uref +@ifhtml +@unmacro hlink + +@macro hlink{url, link} +@uref{\url\, \link\} +@end macro + +@macro email{mail} +@uref{mailto:\mail\, , \mail\} +@end macro + +@macro url{url} +@uref{\url\} +@end macro +@end ifhtml + +@c --------------------------------------------------------------------- +@c References to the other half of the manual +@c --------------------------------------------------------------------- + +@macro usingref{node, name} +@ref{\node\, , \name\} +@end macro + +@c --------------------------------------------------------------------- +@c End of macro section +@c --------------------------------------------------------------------- + +@set UPDATED 18 June 2018 +@set UPDATED-MONTH June 2018 +@set EDITION 2.1.2 +@set VERSION 2.1.2 + +@ifnottex +@dircategory Software development +@direntry +* lightning: (lightning). Library for dynamic code generation. +@end direntry +@end ifnottex + +@ifnottex +@node Top +@top @lightning{} + +@iftex +@macro comma +@verbatim{|,|} +@end macro +@end iftex + +@ifnottex +@macro comma +@verb{|,|} +@end macro +@end ifnottex + +This document describes @value{TOPIC} the @lightning{} library for +dynamic code generation. + +@menu +* Overview:: What GNU lightning is +* Installation:: Configuring and installing GNU lightning +* The instruction set:: The RISC instruction set used in GNU lightning +* GNU lightning examples:: GNU lightning's examples +* Reentrancy:: Re-entrant usage of GNU lightning +* Customizations:: Advanced code generation customizations +* Acknowledgements:: Acknowledgements for GNU lightning +@end menu +@end ifnottex + +@node Overview +@chapter Introduction to @lightning{} + +@iftex +This document describes @value{TOPIC} the @lightning{} library for +dynamic code generation. +@end iftex + +Dynamic code generation is the generation of machine code +at runtime. It is typically used to strip a layer of interpretation +by allowing compilation to occur at runtime. One of the most +well-known applications of dynamic code generation is perhaps that +of interpreters that compile source code to an intermediate bytecode +form, which is then recompiled to machine code at run-time: this +approach effectively combines the portability of bytecode +representations with the speed of machine code. Another common +application of dynamic code generation is in the field of hardware +simulators and binary emulators, which can use the same techniques +to translate simulated instructions to the instructions of the +underlying machine. + +Yet other applications come to mind: for example, windowing +@dfn{bitblt} operations, matrix manipulations, and network packet +filters. Albeit very powerful and relatively well known within the +compiler community, dynamic code generation techniques are rarely +exploited to their full potential and, with the exception of the +two applications described above, have remained curiosities because +of their portability and functionality barriers: binary instructions +are generated, so programs using dynamic code generation must be +retargeted for each machine; in addition, coding a run-time code +generator is a tedious and error-prone task more than a difficult one. + +@lightning{} provides a portable, fast and easily retargetable dynamic +code generation system. + +To be portable, @lightning{} abstracts over current architectures' +quirks and unorthogonalities. The interface that it exposes to is that +of a standardized RISC architecture loosely based on the SPARC and MIPS +chips. There are a few general-purpose registers (six, not including +those used to receive and pass parameters between subroutines), and +arithmetic operations involve three operands---either three registers +or two registers and an arbitrarily sized immediate value. + +On one hand, this architecture is general enough that it is possible to +generate pretty efficient code even on CISC architectures such as the +Intel x86 or the Motorola 68k families. On the other hand, it matches +real architectures closely enough that, most of the time, the +compiler's constant folding pass ends up generating code which +assembles machine instructions without further tests. + +@node Installation +@chapter Configuring and installing @lightning{} + +The first thing to do to use @lightning{} is to configure the +program, picking the set of macros to be used on the host +architecture; this configuration is automatically performed by +the @file{configure} shell script; to run it, merely type: +@example + ./configure +@end example + +@lightning{} supports the @code{--enable-disassembler} option, that +enables linking to GNU binutils and optionally print human readable +disassembly of the jit code. This option can be disabled by the +@code{--disable-disassembler} option. + +Another option that @file{configure} accepts is +@code{--enable-assertions}, which enables several consistency checks in +the run-time assemblers. These are not usually needed, so you can +decide to simply forget about it; also remember that these consistency +checks tend to slow down your code generator. + +After you've configured @lightning{}, run @file{make} as usual. + +@lightning{} has an extensive set of tests to validate it is working +correctly in the build host. To test it run: +@example + make check +@end example + +The next important step is: +@example + make install +@end example + +This ends the process of installing @lightning{}. + +@node The instruction set +@chapter @lightning{}'s instruction set + +@lightning{}'s instruction set was designed by deriving instructions +that closely match those of most existing RISC architectures, or +that can be easily syntesized if absent. Each instruction is composed +of: +@itemize @bullet +@item +an operation, like @code{sub} or @code{mul} + +@item +most times, a register/immediate flag (@code{r} or @code{i}) + +@item +an unsigned modifier (@code{u}), a type identifier or two, when applicable. +@end itemize + +Examples of legal mnemonics are @code{addr} (integer add, with three +register operands) and @code{muli} (integer multiply, with two +register operands and an immediate operand). Each instruction takes +two or three operands; in most cases, one of them can be an immediate +value instead of a register. + +Most @lightning{} integer operations are signed wordsize operations, +with the exception of operations that convert types, or load or store +values to/from memory. When applicable, the types and C types are as +follow: + +@example + _c @r{signed char} + _uc @r{unsigned char} + _s @r{short} + _us @r{unsigned short} + _i @r{int} + _ui @r{unsigned int} + _l @r{long} + _f @r{float} + _d @r{double} +@end example + +Most integer operations do not need a type modifier, and when loading or +storing values to memory there is an alias to the proper operation +using wordsize operands, that is, if ommited, the type is @r{int} on +32-bit architectures and @r{long} on 64-bit architectures. Note +that lightning also expects @code{sizeof(void*)} to match the wordsize. + +When an unsigned operation result differs from the equivalent signed +operation, there is a the @code{_u} modifier. + +There are at least seven integer registers, of which six are +general-purpose, while the last is used to contain the frame pointer +(@code{FP}). The frame pointer can be used to allocate and access local +variables on the stack, using the @code{allocai} or @code{allocar} +instruction. + +Of the general-purpose registers, at least three are guaranteed to be +preserved across function calls (@code{V0}, @code{V1} and +@code{V2}) and at least three are not (@code{R0}, @code{R1} and +@code{R2}). Six registers are not very much, but this +restriction was forced by the need to target CISC architectures +which, like the x86, are poor of registers; anyway, backends can +specify the actual number of available registers with the calls +@code{JIT_R_NUM} (for caller-save registers) and @code{JIT_V_NUM} +(for callee-save registers). + +There are at least six floating-point registers, named @code{F0} to +@code{F5}. These are usually caller-save and are separate from the integer +registers on the supported architectures; on Intel architectures, +in 32 bit mode if SSE2 is not available or use of X87 is forced, +the register stack is mapped to a flat register file. As for the +integer registers, the macro @code{JIT_F_NUM} yields the number of +floating-point registers. + +The complete instruction set follows; as you can see, most non-memory +operations only take integers (either signed or unsigned) as operands; +this was done in order to reduce the instruction set, and because most +architectures only provide word and long word operations on registers. +There are instructions that allow operands to be extended to fit a larger +data type, both in a signed and in an unsigned way. + +@table @b +@item Binary ALU operations +These accept three operands; the last one can be an immediate. +@code{addx} operations must directly follow @code{addc}, and +@code{subx} must follow @code{subc}; otherwise, results are undefined. +Most, if not all, architectures do not support @r{float} or @r{double} +immediate operands; lightning emulates those operations by moving the +immediate to a temporary register and emiting the call with only +register operands. +@example +addr _f _d O1 = O2 + O3 +addi _f _d O1 = O2 + O3 +addxr O1 = O2 + (O3 + carry) +addxi O1 = O2 + (O3 + carry) +addcr O1 = O2 + O3, set carry +addci O1 = O2 + O3, set carry +subr _f _d O1 = O2 - O3 +subi _f _d O1 = O2 - O3 +subxr O1 = O2 - (O3 + carry) +subxi O1 = O2 - (O3 + carry) +subcr O1 = O2 - O3, set carry +subci O1 = O2 - O3, set carry +rsbr _f _d O1 = O3 - O1 +rsbi _f _d O1 = O3 - O1 +mulr _f _d O1 = O2 * O3 +muli _f _d O1 = O2 * O3 +divr _u _f _d O1 = O2 / O3 +divi _u _f _d O1 = O2 / O3 +remr _u O1 = O2 % O3 +remi _u O1 = O2 % O3 +andr O1 = O2 & O3 +andi O1 = O2 & O3 +orr O1 = O2 | O3 +ori O1 = O2 | O3 +xorr O1 = O2 ^ O3 +xori O1 = O2 ^ O3 +lshr O1 = O2 << O3 +lshi O1 = O2 << O3 +rshr _u O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.} +rshi _u O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.} +@end example + +@item Four operand binary ALU operations +These accept two result registers, and two operands; the last one can +be an immediate. The first two arguments cannot be the same register. + +@code{qmul} stores the low word of the result in @code{O1} and the +high word in @code{O2}. For unsigned multiplication, @code{O2} zero +means there was no overflow. For signed multiplication, no overflow +check is based on sign, and can be detected if @code{O2} is zero or +minus one. + +@code{qdiv} stores the quotient in @code{O1} and the remainder in +@code{O2}. It can be used as quick way to check if a division is +exact, in which case the remainder is zero. + +@example +qmulr _u O1 O2 = O3 * O4 +qmuli _u O1 O2 = O3 * O4 +qdivr _u O1 O2 = O3 / O4 +qdivi _u O1 O2 = O3 / O4 +@end example + +@item Unary ALU operations +These accept two operands, both of which must be registers. +@example +negr _f _d O1 = -O2 +comr O1 = ~O2 +@end example + +These unary ALU operations are only defined for float operands. +@example +absr _f _d O1 = fabs(O2) +sqrtr O1 = sqrt(O2) +@end example + +Besides requiring the @code{r} modifier, there are no unary operations +with an immediate operand. + +@item Compare instructions +These accept three operands; again, the last can be an immediate. +The last two operands are compared, and the first operand, that must be +an integer register, is set to either 0 or 1, according to whether the +given condition was met or not. + +The conditions given below are for the standard behavior of C, +where the ``unordered'' comparison result is mapped to false. + +@example +ltr _u _f _d O1 = (O2 < O3) +lti _u _f _d O1 = (O2 < O3) +ler _u _f _d O1 = (O2 <= O3) +lei _u _f _d O1 = (O2 <= O3) +gtr _u _f _d O1 = (O2 > O3) +gti _u _f _d O1 = (O2 > O3) +ger _u _f _d O1 = (O2 >= O3) +gei _u _f _d O1 = (O2 >= O3) +eqr _f _d O1 = (O2 == O3) +eqi _f _d O1 = (O2 == O3) +ner _f _d O1 = (O2 != O3) +nei _f _d O1 = (O2 != O3) +unltr _f _d O1 = !(O2 >= O3) +unler _f _d O1 = !(O2 > O3) +ungtr _f _d O1 = !(O2 <= O3) +unger _f _d O1 = !(O2 < O3) +uneqr _f _d O1 = !(O2 < O3) && !(O2 > O3) +ltgtr _f _d O1 = !(O2 >= O3) || !(O2 <= O3) +ordr _f _d O1 = (O2 == O2) && (O3 == O3) +unordr _f _d O1 = (O2 != O2) || (O3 != O3) +@end example + +@item Transfer operations +These accept two operands; for @code{ext} both of them must be +registers, while @code{mov} accepts an immediate value as the second +operand. + +Unlike @code{movr} and @code{movi}, the other instructions are used +to truncate a wordsize operand to a smaller integer data type or to +convert float data types. You can also use @code{extr} to convert an +integer to a floating point value: the usual options are @code{extr_f} +and @code{extr_d}. + +@example +movr _f _d O1 = O2 +movi _f _d O1 = O2 +extr _c _uc _s _us _i _ui _f _d O1 = O2 +truncr _f _d O1 = trunc(O2) +@end example + +In 64-bit architectures it may be required to use @code{truncr_f_i}, +@code{truncr_f_l}, @code{truncr_d_i} and @code{truncr_d_l} to match +the equivalent C code. Only the @code{_i} modifier is available in +32-bit architectures. + +@example +truncr_f_i = <int> O1 = <float> O2 +truncr_f_l = <long>O1 = <float> O2 +truncr_d_i = <int> O1 = <double>O2 +truncr_d_l = <long>O1 = <double>O2 +@end example + +The float conversion operations are @emph{destination first, +source second}, but the order of the types is reversed. This happens +for historical reasons. + +@example +extr_f_d = <double>O1 = <float> O2 +extr_d_f = <float> O1 = <double>O2 +@end example + +@item Network extensions +These accept two operands, both of which must be registers; these +two instructions actually perform the same task, yet they are +assigned to two mnemonics for the sake of convenience and +completeness. As usual, the first operand is the destination and +the second is the source. +The @code{_ul} variant is only available in 64-bit architectures. +@example +htonr _us _ui _ul @r{Host-to-network (big endian) order} +ntohr _us _ui _ul @r{Network-to-host order } +@end example + +@item Load operations +@code{ld} accepts two operands while @code{ldx} accepts three; +in both cases, the last can be either a register or an immediate +value. Values are extended (with or without sign, according to +the data type specification) to fit a whole register. +The @code{_ui} and @code{_l} types are only available in 64-bit +architectures. For convenience, there is a version without a +type modifier for integer or pointer operands that uses the +appropriate wordsize call. +@example +ldr _c _uc _s _us _i _ui _l _f _d O1 = *O2 +ldi _c _uc _s _us _i _ui _l _f _d O1 = *O2 +ldxr _c _uc _s _us _i _ui _l _f _d O1 = *(O2+O3) +ldxi _c _uc _s _us _i _ui _l _f _d O1 = *(O2+O3) +@end example + +@item Store operations +@code{st} accepts two operands while @code{stx} accepts three; in +both cases, the first can be either a register or an immediate +value. Values are sign-extended to fit a whole register. +@example +str _c _uc _s _us _i _ui _l _f _d *O1 = O2 +sti _c _uc _s _us _i _ui _l _f _d *O1 = O2 +stxr _c _uc _s _us _i _ui _l _f _d *(O1+O2) = O3 +stxi _c _uc _s _us _i _ui _l _f _d *(O1+O2) = O3 +@end example +As for the load operations, the @code{_ui} and @code{_l} types are +only available in 64-bit architectures, and for convenience, there +is a version without a type modifier for integer or pointer operands +that uses the appropriate wordsize call. + +@item Argument management +These are: +@example +prepare (not specified) +va_start (not specified) +pushargr _f _d +pushargi _f _d +va_push (not specified) +arg _c _uc _s _us _i _ui _l _f _d +getarg _c _uc _s _us _i _ui _l _f _d +va_arg _d +putargr _f _d +putargi _f _d +ret (not specified) +retr _f _d +reti _f _d +va_end (not specified) +retval _c _uc _s _us _i _ui _l _f _d +epilog (not specified) +@end example +As with other operations that use a type modifier, the @code{_ui} and +@code{_l} types are only available in 64-bit architectures, but there +are operations without a type modifier that alias to the appropriate +integer operation with wordsize operands. + +@code{prepare}, @code{pusharg}, and @code{retval} are used by the caller, +while @code{arg}, @code{getarg} and @code{ret} are used by the callee. +A code snippet that wants to call another procedure and has to pass +arguments must, in order: use the @code{prepare} instruction and use +the @code{pushargr} or @code{pushargi} to push the arguments @strong{in +left to right order}; and use @code{finish} or @code{call} (explained below) +to perform the actual call. + +@code{va_start} returns a @code{C} compatible @code{va_list}. To fetch +arguments, use @code{va_arg} for integers and @code{va_arg_d} for doubles. +@code{va_push} is required when passing a @code{va_list} to another function, +because not all architectures expect it as a single pointer. Known case +is DEC Alpha, that requires it as a structure passed by value. + +@code{arg}, @code{getarg} and @code{putarg} are used by the callee. +@code{arg} is different from other instruction in that it does not +actually generate any code: instead, it is a function which returns +a value to be passed to @code{getarg} or @code{putarg}. @footnote{``Return +a value'' means that @lightning{} code that compile these +instructions return a value when expanded.} You should call +@code{arg} as soon as possible, before any function call or, more +easily, right after the @code{prolog} instructions +(which is treated later). + +@code{getarg} accepts a register argument and a value returned by +@code{arg}, and will move that argument to the register, extending +it (with or without sign, according to the data type specification) +to fit a whole register. These instructions are more intimately +related to the usage of the @lightning{} instruction set in code +that generates other code, so they will be treated more +specifically in @ref{GNU lightning examples, , Generating code at +run-time}. + +@code{putarg} is a mix of @code{getarg} and @code{pusharg} in that +it accepts as first argument a register or immediate, and as +second argument a value returned by @code{arg}. It allows changing, +or restoring an argument to the current function, and is a +construct required to implement tail call optimization. Note that +arguments in registers are very cheap, but will be overwritten +at any moment, including on some operations, for example division, +that on several ports is implemented as a function call. + +Finally, the @code{retval} instruction fetches the return value of a +called function in a register. The @code{retval} instruction takes a +register argument and copies the return value of the previously called +function in that register. A function with a return value should use +@code{retr} or @code{reti} to put the return value in the return register +before returning. @xref{Fibonacci, the Fibonacci numbers}, for an example. + +@code{epilog} is an optional call, that marks the end of a function +body. It is automatically generated by @lightning{} if starting a new +function (what should be done after a @code{ret} call) or finishing +generating jit. +It is very important to note that the fact that @code{epilog} being +optional may cause a common mistake. Consider this: +@example +fun1: + prolog + ... + ret +fun2: + prolog +@end example +Because @code{epilog} is added when finding a new @code{prolog}, +this will cause the @code{fun2} label to actually be before the +return from @code{fun1}. Because @lightning{} will actually +understand it as: +@example +fun1: + prolog + ... + ret +fun2: + epilog + prolog +@end example + +You should observe a few rules when using these macros. First of +all, if calling a varargs function, you should use the @code{ellipsis} +call to mark the position of the ellipsis in the C prototype. + +You should not nest calls to @code{prepare} inside a +@code{prepare/finish} block. Doing this will result in undefined +behavior. Note that for functions with zero arguments you can use +just @code{call}. + +@item Branch instructions +Like @code{arg}, these also return a value which, in this case, +is to be used to compile forward branches as explained in +@ref{Fibonacci, , Fibonacci numbers}. They accept two operands to be +compared; of these, the last can be either a register or an immediate. +They are: +@example +bltr _u _f _d @r{if }(O2 < O3)@r{ goto }O1 +blti _u _f _d @r{if }(O2 < O3)@r{ goto }O1 +bler _u _f _d @r{if }(O2 <= O3)@r{ goto }O1 +blei _u _f _d @r{if }(O2 <= O3)@r{ goto }O1 +bgtr _u _f _d @r{if }(O2 > O3)@r{ goto }O1 +bgti _u _f _d @r{if }(O2 > O3)@r{ goto }O1 +bger _u _f _d @r{if }(O2 >= O3)@r{ goto }O1 +bgei _u _f _d @r{if }(O2 >= O3)@r{ goto }O1 +beqr _f _d @r{if }(O2 == O3)@r{ goto }O1 +beqi _f _d @r{if }(O2 == O3)@r{ goto }O1 +bner _f _d @r{if }(O2 != O3)@r{ goto }O1 +bnei _f _d @r{if }(O2 != O3)@r{ goto }O1 + +bunltr _f _d @r{if }!(O2 >= O3)@r{ goto }O1 +bunler _f _d @r{if }!(O2 > O3)@r{ goto }O1 +bungtr _f _d @r{if }!(O2 <= O3)@r{ goto }O1 +bunger _f _d @r{if }!(O2 < O3)@r{ goto }O1 +buneqr _f _d @r{if }!(O2 < O3) && !(O2 > O3)@r{ goto }O1 +bltgtr _f _d @r{if }!(O2 >= O3) || !(O2 <= O3)@r{ goto }O1 +bordr _f _d @r{if } (O2 == O2) && (O3 == O3)@r{ goto }O1 +bunordr _f _d @r{if }!(O2 != O2) || (O3 != O3)@r{ goto }O1 + +bmsr @r{if }O2 & O3@r{ goto }O1 +bmsi @r{if }O2 & O3@r{ goto }O1 +bmcr @r{if }!(O2 & O3)@r{ goto }O1 +bmci @r{if }!(O2 & O3)@r{ goto }O1@footnote{These mnemonics mean, respectively, @dfn{branch if mask set} and @dfn{branch if mask cleared}.} +boaddr _u O2 += O3@r{, goto }O1@r{ if overflow} +boaddi _u O2 += O3@r{, goto }O1@r{ if overflow} +bxaddr _u O2 += O3@r{, goto }O1@r{ if no overflow} +bxaddi _u O2 += O3@r{, goto }O1@r{ if no overflow} +bosubr _u O2 -= O3@r{, goto }O1@r{ if overflow} +bosubi _u O2 -= O3@r{, goto }O1@r{ if overflow} +bxsubr _u O2 -= O3@r{, goto }O1@r{ if no overflow} +bxsubi _u O2 -= O3@r{, goto }O1@r{ if no overflow} +@end example + +@item Jump and return operations +These accept one argument except @code{ret} and @code{jmpi} which +have none; the difference between @code{finishi} and @code{calli} +is that the latter does not clean the stack from pushed parameters +(if any) and the former must @strong{always} follow a @code{prepare} +instruction. +@example +callr (not specified) @r{function call to register O1} +calli (not specified) @r{function call to immediate O1} +finishr (not specified) @r{function call to register O1} +finishi (not specified) @r{function call to immediate O1} +jmpr (not specified) @r{unconditional jump to register} +jmpi (not specified) @r{unconditional jump} +ret (not specified) @r{return from subroutine} +retr _c _uc _s _us _i _ui _l _f _d +reti _c _uc _s _us _i _ui _l _f _d +retval _c _uc _s _us _i _ui _l _f _d @r{move return value} + @r{to register} +@end example + +Like branch instruction, @code{jmpi} also returns a value which is to +be used to compile forward branches. @xref{Fibonacci, , Fibonacci +numbers}. + +@item Labels +There are 3 @lightning{} instructions to create labels: +@example +label (not specified) @r{simple label} +forward (not specified) @r{forward label} +indirect (not specified) @r{special simple label} +@end example + +@code{label} is normally used as @code{patch_at} argument for backward +jumps. + +@example + jit_node_t *jump, *label; +label = jit_label(); + ... + jump = jit_beqr(JIT_R0, JIT_R1); + jit_patch_at(jump, label); +@end example + +@code{forward} is used to patch code generation before the actual +position of the label is known. + +@example + jit_node_t *jump, *label; +label = jit_forward(); + jump = jit_beqr(JIT_R0, JIT_R1); + jit_patch_at(jump, label); + ... + jit_link(label); +@end example + +@code{indirect} is useful when creating jump tables, and tells +@lightning{} to not optimize out a label that is not the target of +any jump, because an indirect jump may land where it is defined. + +@example + jit_node_t *jump, *label; + ... + jmpr(JIT_R0); @rem{/* may jump to label */} + ... +label = jit_indirect(); +@end example + +@code{indirect} is an special case of @code{note} and @code{name} +because it is a valid argument to @code{address}. + +Note that the usual idiom to write the previous example is +@example + jit_node_t *addr, *jump; +addr = jit_movi(JIT_R0, 0); @rem{/* immediate is ignored */} + ... + jmpr(JIT_R0); + ... + jit_patch(addr); @rem{/* implicit label added */} +@end example + +that automatically binds the implicit label added by @code{patch} with +the @code{movi}, but on some special conditions it is required to create +an "unbound" label. + +@item Function prolog + +These macros are used to set up a function prolog. The @code{allocai} +call accept a single integer argument and returns an offset value +for stack storage access. The @code{allocar} accepts two registers +arguments, the first is set to the offset for stack access, and the +second is the size in bytes argument. + +@example +prolog (not specified) @r{function prolog} +allocai (not specified) @r{reserve space on the stack} +allocar (not specified) @r{allocate space on the stack} +@end example + +@code{allocai} receives the number of bytes to allocate and returns +the offset from the frame pointer register @code{FP} to the base of +the area. + +@code{allocar} receives two register arguments. The first is where +to store the offset from the frame pointer register @code{FP} to the +base of the area. The second argument is the size in bytes. Note +that @code{allocar} is dynamic allocation, and special attention +should be taken when using it. If called in a loop, every iteration +will allocate stack space. Stack space is aligned from 8 to 64 bytes +depending on backend requirements, even if allocating only one byte. +It is advisable to not use it with @code{frame} and @code{tramp}; it +should work with @code{frame} with special care to call only once, +but is not supported if used in @code{tramp}, even if called only +once. + +As a small appetizer, here is a small function that adds 1 to the input +parameter (an @code{int}). I'm using an assembly-like syntax here which +is a bit different from the one used when writing real subroutines with +@lightning{}; the real syntax will be introduced in @xref{GNU lightning +examples, , Generating code at run-time}. + +@example +incr: + prolog +in = arg @rem{! We have an integer argument} + getarg R0, in @rem{! Move it to R0} + addi R0, R0, 1 @rem{! Add 1} + retr R0 @rem{! And return the result} +@end example + +And here is another function which uses the @code{printf} function from +the standard C library to write a number in hexadecimal notation: + +@example +printhex: + prolog +in = arg @rem{! Same as above} + getarg R0, in + prepare @rem{! Begin call sequence for printf} + pushargi "%x" @rem{! Push format string} + ellipsis @rem{! Varargs start here} + pushargr R0 @rem{! Push second argument} + finishi printf @rem{! Call printf} + ret @rem{! Return to caller} +@end example + +@item Trampolines, continuations and tail call optimization + +Frequently it is required to generate jit code that must jump to +code generated later, possibly from another @code{jit_context_t}. +These require compatible stack frames. + +@lightning{} provides two primitives from where trampolines, +continuations and tail call optimization can be implemented. + +@example +frame (not specified) @r{create stack frame} +tramp (not specified) @r{assume stack frame} +@end example + +@code{frame} receives an integer argument@footnote{It is not +automatically computed because it does not know about the +requirement of later generated code.} that defines the size in +bytes for the stack frame of the current, @code{C} callable, +jit function. To calculate this value, a good formula is maximum +number of arguments to any called native function times +eight@footnote{Times eight so that it works for double arguments. +And would not need conditionals for ports that pass arguments in +the stack.}, plus the sum of the arguments to any call to +@code{jit_allocai}. @lightning{} automatically adjusts this value +for any backend specific stack memory it may need, or any +alignment constraint. + +@code{frame} also instructs @lightning{} to save all callee +save registers in the prolog and reload in the epilog. + +@example +main: @rem{! jit entry point} + prolog @rem{! function prolog} + frame 256 @rem{! save all callee save registers and} + @rem{! reserve at least 256 bytes in stack} +main_loop: + ... + jmpi handler @rem{! jumps to external code} + ... + ret @rem{! return to the caller} +@end example + +@code{tramp} differs from @code{frame} only that a prolog and epilog +will not be generated. Note that @code{prolog} must still be used. +The code under @code{tramp} must be ready to be entered with a jump +at the prolog position, and instead of a return, it must end with +a non conditional jump. @code{tramp} exists solely for the fact +that it allows optimizing out prolog and epilog code that would +never be executed. + +@example +handler: @rem{! handler entry point} + prolog @rem{! function prolog} + tramp 256 @rem{! assumes all callee save registers} + @rem{! are saved and there is at least} + @rem{! 256 bytes in stack} + ... + jmpi main_loop @rem{! return to the main loop} +@end example + +@lightning{} only supports Tail Call Optimization using the +@code{tramp} construct. Any other way is not guaranteed to +work on all ports. + +An example of a simple (recursive) tail call optimization: + +@example +factorial: @rem{! Entry point of the factorial function} + prolog +in = arg @rem{! Receive an integer argument} + getarg R0, in @rem{! Move argument to RO} + prepare + pushargi 1 @rem{! This is the accumulator} + pushargr R0 @rem{! This is the argument} + finishi fact @rem{! Call the tail call optimized function} + retval R0 @rem{! Fetch the result} + retr R0 @rem{! Return it} + epilog @rem{! Epilog *before* label before prolog} + +fact: @rem{! Entry point of the helper function} + prolog + frame 16 @rem{! Reserve 16 bytes in the stack} +fact_entry: @rem{! This is the tail call entry point} +ac = arg @rem{! The accumulator is the first argument} +in = arg @rem{! The factorial argument} + getarg R0, ac @rem{! Move the accumulator to R0} + getarg R1, in @rem{! Move the argument to R1} + blei fact_out, R1, 1 @rem{! Done if argument is one or less} + mulr R0, R0, R1 @rem{! accumulator *= argument} + putargr R0, ac @rem{! Update the accumulator} + subi R1, R1, 1 @rem{! argument -= 1} + putargr R1, in @rem{! Update the argument} + jmpi fact_entry @rem{! Tail Call Optimize it!} +fact_out: + retr R0 @rem{! Return the accumulator} +@end example + +@item Predicates +@example +forward_p (not specified) @r{forward label predicate} +indirect_p (not specified) @r{indirect label predicate} +target_p (not specified) @r{used label predicate} +arg_register_p (not specified) @r{argument kind predicate} +callee_save_p (not specified) @r{callee save predicate} +pointer_p (not specified) @r{pointer predicate} +@end example + +@code{forward_p} expects a @code{jit_node_t*} argument, and +returns non zero if it is a forward label reference, that is, +a label returned by @code{forward}, that still needs a +@code{link} call. + +@code{indirect_p} expects a @code{jit_node_t*} argument, and returns +non zero if it is an indirect label reference, that is, a label that +was returned by @code{indirect}. + +@code{target_p} expects a @code{jit_node_t*} argument, that is any +kind of label, and will return non zero if there is at least one +jump or move referencing it. + +@code{arg_register_p} expects a @code{jit_node_t*} argument, that must +have been returned by @code{arg}, @code{arg_f} or @code{arg_d}, and +will return non zero if the argument lives in a register. This call +is useful to know the live range of register arguments, as those +are very fast to read and write, but have volatile values. + +@code{callee_save_p} exects a valid @code{JIT_Rn}, @code{JIT_Vn}, or +@code{JIT_Fn}, and will return non zero if the register is callee +save. This call is useful because on several ports, the @code{JIT_Rn} +and @code{JIT_Fn} registers are actually callee save; no need +to save and load the values when making function calls. + +@code{pointer_p} expects a pointer argument, and will return non +zero if the pointer is inside the generated jit code. Must be +called after @code{jit_emit} and before @code{jit_destroy_state}. +@end table + +@node GNU lightning examples +@chapter Generating code at run-time + +To use @lightning{}, you should include the @file{lightning.h} file that +is put in your include directory by the @samp{make install} command. + +Each of the instructions above translates to a macro or function call. +All you have to do is prepend @code{jit_} (lowercase) to opcode names +and @code{JIT_} (uppercase) to register names. Of course, parameters +are to be put between parentheses. + +This small tutorial presents three examples: + +@iftex +@itemize @bullet +@item +The @code{incr} function found in @ref{The instruction set, , +@lightning{}'s instruction set}: + +@item +A simple function call to @code{printf} + +@item +An RPN calculator. + +@item +Fibonacci numbers +@end itemize +@end iftex +@ifnottex +@menu +* incr:: A function which increments a number by one +* printf:: A simple function call to printf +* RPN calculator:: A more complex example, an RPN calculator +* Fibonacci:: Calculating Fibonacci numbers +@end menu +@end ifnottex + +@node incr +@section A function which increments a number by one + +Let's see how to create and use the sample @code{incr} function created +in @ref{The instruction set, , @lightning{}'s instruction set}: + +@example +#include <stdio.h> +#include <lightning.h> + +static jit_state_t *_jit; + +typedef int (*pifi)(int); @rem{/* Pointer to Int Function of Int */} + +int main(int argc, char *argv[]) +@{ + jit_node_t *in; + pifi incr; + + init_jit(argv[0]); + _jit = jit_new_state(); + + jit_prolog(); @rem{/* @t{ prolog } */} + in = jit_arg(); @rem{/* @t{ in = arg } */} + jit_getarg(JIT_R0, in); @rem{/* @t{ getarg R0 } */} + jit_addi(JIT_R0, JIT_R0, 1); @rem{/* @t{ addi R0@comma{} R0@comma{} 1 } */} + jit_retr(JIT_R0); @rem{/* @t{ retr R0 } */} + + incr = jit_emit(); + jit_clear_state(); + + @rem{/* call the generated code@comma{} passing 5 as an argument */} + printf("%d + 1 = %d\n", 5, incr(5)); + + jit_destroy_state(); + finish_jit(); + return 0; +@} +@end example + +Let's examine the code line by line (well, almost@dots{}): + +@table @t +@item #include <lightning.h> +You already know about this. It defines all of @lightning{}'s macros. + +@item static jit_state_t *_jit; +You might wonder about what is @code{jit_state_t}. It is a structure +that stores jit code generation information. The name @code{_jit} is +special, because since multiple jit generators can run at the same +time, you must either @r{#define _jit my_jit_state} or name it +@code{_jit}. + +@item typedef int (*pifi)(int); +Just a handy typedef for a pointer to a function that takes an +@code{int} and returns another. + +@item jit_node_t *in; +Declares a variable to hold an identifier for a function argument. It +is an opaque pointer, that will hold the return of a call to @code{arg} +and be used as argument to @code{getarg}. + +@item pifi incr; +Declares a function pointer variable to a function that receives an +@code{int} and returns an @code{int}. + +@item init_jit(argv[0]); +You must call this function before creating a @code{jit_state_t} +object. This function does global state initialization, and may need +to detect CPU or Operating System features. It receives a string +argument that is later used to read symbols from a shared object using +GNU binutils if disassembly was enabled at configure time. If no +disassembly will be performed a NULL pointer can be used as argument. + +@item _jit = jit_new_state(); +This call initializes a @lightning{} jit state. + +@item jit_prolog(); +Ok, so we start generating code for our beloved function@dots{} + +@item in = jit_arg(); +@itemx jit_getarg(JIT_R0, in); +We retrieve the first (and only) argument, an integer, and store it +into the general-purpose register @code{R0}. + +@item jit_addi(JIT_R0, JIT_R0, 1); +We add one to the content of the register. + +@item jit_retr(JIT_R0); +This instruction generates a standard function epilog that returns +the contents of the @code{R0} register. + +@item incr = jit_emit(); +This instruction is very important. It actually translates the +@lightning{} macros used before to machine code, flushes the generated +code area out of the processor's instruction cache and return a +pointer to the start of the code. + +@item jit_clear_state(); +This call cleanups any data not required for jit execution. Note +that it must be called after any call to @code{jit_print} or +@code{jit_address}, as this call destroy the @lightning{} +intermediate representation. + +@item printf("%d + 1 = %d", 5, incr(5)); +Calling our function is this simple---it is not distinguishable from +a normal C function call, the only difference being that @code{incr} +is a variable. + +@item jit_destroy_state(); +Releases all memory associated with the jit context. It should be +called after known the jit will no longer be called. + +@item finish_jit(); +This call cleanups any global state hold by @lightning{}, and is +advisable to call it once jit code will no longer be generated. +@end table + +@lightning{} abstracts two phases of dynamic code generation: selecting +instructions that map the standard representation, and emitting binary +code for these instructions. The client program has the responsibility +of describing the code to be generated using the standard @lightning{} +instruction set. + +Let's examine the code generated for @code{incr} on the SPARC and x86_64 +architecture (on the right is the code that an assembly-language +programmer would write): + +@table @b +@item SPARC +@example + save %sp, -112, %sp + mov %i0, %g2 retl + inc %g2 inc %o0 + mov %g2, %i0 + restore + retl + nop +@end example +In this case, @lightning{} introduces overhead to create a register +window (not knowing that the procedure is a leaf procedure) and to +move the argument to the general purpose register @code{R0} (which +maps to @code{%g2} on the SPARC). +@end table + +@table @b +@item x86_64 +@example + sub $0x30,%rsp + mov %rbp,(%rsp) + mov %rsp,%rbp + sub $0x18,%rsp + mov %rdi,%rax mov %rdi, %rax + add $0x1,%rax inc %rax + mov %rbp,%rsp + mov (%rsp),%rbp + add $0x30,%rsp + retq retq +@end example +In this case, the main overhead is due to the function's prolog and +epilog, and stack alignment after reserving stack space for word +to/from float conversions or moving data from/to x87 to/from SSE. +Note that besides allocating space to save callee saved registers, +no registers are saved/restored because @lightning{} notices those +registers are not modified. There is currently no logic to detect +if it needs to allocate stack space for type conversions neither +proper leaf function detection, but these are subject to change +(FIXME). +@end table + +@node printf +@section A simple function call to @code{printf} + +Again, here is the code for the example: + +@example +#include <stdio.h> +#include <lightning.h> + +static jit_state_t *_jit; + +typedef void (*pvfi)(int); @rem{/* Pointer to Void Function of Int */} + +int main(int argc, char *argv[]) +@{ + pvfi myFunction; @rem{/* ptr to generated code */} + jit_node_t *start, *end; @rem{/* a couple of labels */} + jit_node_t *in; @rem{/* to get the argument */} + + init_jit(argv[0]); + _jit = jit_new_state(); + + start = jit_note(__FILE__, __LINE__); + jit_prolog(); + in = jit_arg(); + jit_getarg(JIT_R1, in); + jit_pushargi((jit_word_t)"generated %d bytes\n"); + jit_ellipsis(); + jit_pushargr(JIT_R1); + jit_finishi(printf); + jit_ret(); + jit_epilog(); + end = jit_note(__FILE__, __LINE__); + + myFunction = jit_emit(); + + @rem{/* call the generated code@comma{} passing its size as argument */} + myFunction((char*)jit_address(end) - (char*)jit_address(start)); + jit_clear_state(); + + jit_disassemble(); + + jit_destroy_state(); + finish_jit(); + return 0; +@} +@end example + +The function shows how many bytes were generated. Most of the code +is not very interesting, as it resembles very closely the program +presented in @ref{incr, , A function which increments a number by one}. + +For this reason, we're going to concentrate on just a few statements. + +@table @t +@item start = jit_note(__FILE__, __LINE__); +@itemx @r{@dots{}} +@itemx end = jit_note(__FILE__, __LINE__); +These two instruction call the @code{jit_note} macro, which creates +a note in the jit code; arguments to @code{jit_note} usually are a +filename string and line number integer, but using NULL for the +string argument is perfectly valid if only need to create a simple +marker in the code. + +@item jit_ellipsis(); +@code{ellipsis} usually is only required if calling varargs functions +with double arguments, but it is a good practice to properly describe +the @r{@dots{}} in the call sequence. + +@item jit_pushargi((jit_word_t)"generated %d bytes\n"); +Note the use of the @code{(jit_word_t)} cast, that is used only +to avoid a compiler warning, due to using a pointer where a +wordsize integer type was expected. + +@item jit_prepare(); +@itemx @r{@dots{}} +@itemx jit_finishi(printf); +Once the arguments to @code{printf} have been pushed, what means +moving them to stack or register arguments, the @code{printf} +function is called and the stack cleaned. Note how @lightning{} +abstracts the differences between different architectures and +ABI's -- the client program does not know how parameter passing +works on the host architecture. + +@item jit_epilog(); +Usually it is not required to call @code{epilog}, but because it +is implicitly called when noticing the end of a function, if the +@code{end} variable was set with a @code{note} call after the +@code{ret}, it would not consider the function epilog. + +@item myFunction((char*)jit_address(end) - (char*)jit_address(start)); +This calls the generate jit function passing as argument the offset +difference from the @code{start} and @code{end} notes. The @code{address} +call must be done after the @code{emit} call or either a fatal error +will happen (if @lightning{} is built with assertions enable) or an +undefined value will be returned. + +@item jit_clear_state(); +Note that @code{jit_clear_state} was called after executing jit in +this example. It was done because it must be called after any call +to @code{jit_address} or @code{jit_print}. + +@item jit_disassemble(); +@code{disassemble} will dump the generated code to standard output, +unless @lightning{} was built with the disassembler disabled, in which +case no output will be shown. +@end table + +@node RPN calculator +@section A more complex example, an RPN calculator + +We create a small stack-based RPN calculator which applies a series +of operators to a given parameter and to other numeric operands. +Unlike previous examples, the code generator is fully parameterized +and is able to compile different formulas to different functions. +Here is the code for the expression compiler; a sample usage will +follow. + +Since @lightning{} does not provide push/pop instruction, this +example uses a stack-allocated area to store the data. Such an +area can be allocated using the macro @code{allocai}, which +receives the number of bytes to allocate and returns the offset +from the frame pointer register @code{FP} to the base of the +area. + +Usually, you will use the @code{ldxi} and @code{stxi} instruction +to access stack-allocated variables. However, it is possible to +use operations such as @code{add} to compute the address of the +variables, and pass the address around. + +@example +#include <stdio.h> +#include <lightning.h> + +typedef int (*pifi)(int); @rem{/* Pointer to Int Function of Int */} + +static jit_state_t *_jit; + +void stack_push(int reg, int *sp) +@{ + jit_stxi_i (*sp, JIT_FP, reg); + *sp += sizeof (int); +@} + +void stack_pop(int reg, int *sp) +@{ + *sp -= sizeof (int); + jit_ldxi_i (reg, JIT_FP, *sp); +@} + +jit_node_t *compile_rpn(char *expr) +@{ + jit_node_t *in, *fn; + int stack_base, stack_ptr; + + fn = jit_note(NULL, 0); + jit_prolog(); + in = jit_arg(); + stack_ptr = stack_base = jit_allocai (32 * sizeof (int)); + + jit_getarg_i(JIT_R2, in); + + while (*expr) @{ + char buf[32]; + int n; + if (sscanf(expr, "%[0-9]%n", buf, &n)) @{ + expr += n - 1; + stack_push(JIT_R0, &stack_ptr); + jit_movi(JIT_R0, atoi(buf)); + @} else if (*expr == 'x') @{ + stack_push(JIT_R0, &stack_ptr); + jit_movr(JIT_R0, JIT_R2); + @} else if (*expr == '+') @{ + stack_pop(JIT_R1, &stack_ptr); + jit_addr(JIT_R0, JIT_R1, JIT_R0); + @} else if (*expr == '-') @{ + stack_pop(JIT_R1, &stack_ptr); + jit_subr(JIT_R0, JIT_R1, JIT_R0); + @} else if (*expr == '*') @{ + stack_pop(JIT_R1, &stack_ptr); + jit_mulr(JIT_R0, JIT_R1, JIT_R0); + @} else if (*expr == '/') @{ + stack_pop(JIT_R1, &stack_ptr); + jit_divr(JIT_R0, JIT_R1, JIT_R0); + @} else @{ + fprintf(stderr, "cannot compile: %s\n", expr); + abort(); + @} + ++expr; + @} + jit_retr(JIT_R0); + jit_epilog(); + return fn; +@} +@end example + +The principle on which the calculator is based is easy: the stack top +is held in R0, while the remaining items of the stack are held in the +memory area that we allocate with @code{allocai}. Compiling a numeric +operand or the argument @code{x} pushes the old stack top onto the +stack and moves the operand into R0; compiling an operator pops the +second operand off the stack into R1, and compiles the operation so +that the result goes into R0, thus becoming the new stack top. + +This example allocates a fixed area for 32 @code{int}s. This is not +a problem when the function is a leaf like in this case; in a full-blown +compiler you will want to analyze the input and determine the number +of needed stack slots---a very simple example of register allocation. +The area is then managed like a stack using @code{stack_push} and +@code{stack_pop}. + +Source code for the client (which lies in the same source file) follows: + +@example +int main(int argc, char *argv[]) +@{ + jit_node_t *nc, *nf; + pifi c2f, f2c; + int i; + + init_jit(argv[0]); + _jit = jit_new_state(); + + nc = compile_rpn("32x9*5/+"); + nf = compile_rpn("x32-5*9/"); + (void)jit_emit(); + c2f = (pifi)jit_address(nc); + f2c = (pifi)jit_address(nf); + jit_clear_state(); + + printf("\nC:"); + for (i = 0; i <= 100; i += 10) printf("%3d ", i); + printf("\nF:"); + for (i = 0; i <= 100; i += 10) printf("%3d ", c2f(i)); + printf("\n"); + + printf("\nF:"); + for (i = 32; i <= 212; i += 18) printf("%3d ", i); + printf("\nC:"); + for (i = 32; i <= 212; i += 18) printf("%3d ", f2c(i)); + printf("\n"); + + jit_destroy_state(); + finish_jit(); + return 0; +@} +@end example + +The client displays a conversion table between Celsius and Fahrenheit +degrees (both Celsius-to-Fahrenheit and Fahrenheit-to-Celsius). The +formulas are, @math{F(c) = c*9/5+32} and @math{C(f) = (f-32)*5/9}, +respectively. + +Providing the formula as an argument to @code{compile_rpn} effectively +parameterizes code generation, making it possible to use the same code +to compile different functions; this is what makes dynamic code +generation so powerful. + +@node Fibonacci +@section Fibonacci numbers + +The code in this section calculates the Fibonacci sequence. That is +modeled by the recurrence relation: +@display + f(0) = 0 + f(1) = f(2) = 1 + f(n) = f(n-1) + f(n-2) +@end display + +The purpose of this example is to introduce branches. There are two +kind of branches: backward branches and forward branches. We'll +present the calculation in a recursive and iterative form; the +former only uses forward branches, while the latter uses both. + +@example +#include <stdio.h> +#include <lightning.h> + +static jit_state_t *_jit; + +typedef int (*pifi)(int); @rem{/* Pointer to Int Function of Int */} + +int main(int argc, char *argv[]) +@{ + pifi fib; + jit_node_t *label; + jit_node_t *call; + jit_node_t *in; @rem{/* offset of the argument */} + jit_node_t *ref; @rem{/* to patch the forward reference */} + jit_node_t *zero; @rem{/* to patch the forward reference */} + + init_jit(argv[0]); + _jit = jit_new_state(); + + label = jit_label(); + jit_prolog (); + in = jit_arg (); + jit_getarg (JIT_V0, in); @rem{/* R0 = n */} + zero = jit_beqi (JIT_R0, 0); + jit_movr (JIT_V0, JIT_R0); /* V0 = R0 */ + jit_movi (JIT_R0, 1); + ref = jit_blei (JIT_V0, 2); + jit_subi (JIT_V1, JIT_V0, 1); @rem{/* V1 = n-1 */} + jit_subi (JIT_V2, JIT_V0, 2); @rem{/* V2 = n-2 */} + jit_prepare(); + jit_pushargr(JIT_V1); + call = jit_finishi(NULL); + jit_patch_at(call, label); + jit_retval(JIT_V1); @rem{/* V1 = fib(n-1) */} + jit_prepare(); + jit_pushargr(JIT_V2); + call = jit_finishi(NULL); + jit_patch_at(call, label); + jit_retval(JIT_R0); @rem{/* R0 = fib(n-2) */} + jit_addr(JIT_R0, JIT_R0, JIT_V1); @rem{/* R0 = R0 + V1 */} + + jit_patch(ref); @rem{/* patch jump */} + jit_patch(zero); @rem{/* patch jump */} + jit_retr(JIT_R0); + + @rem{/* call the generated code@comma{} passing 32 as an argument */} + fib = jit_emit(); + jit_clear_state(); + printf("fib(%d) = %d\n", 32, fib(32)); + jit_destroy_state(); + finish_jit(); + return 0; +@} +@end example + +As said above, this is the first example of dynamically compiling +branches. Branch instructions have two operands containing the +values to be compared, and return a @code{jit_note_t *} object +to be patched. + +Because labels final address are only known after calling @code{emit}, +it is required to call @code{patch} or @code{patch_at}, what does +tell @lightning{} that the target to patch is actually a pointer to +a @code{jit_node_t *} object, otherwise, it would assume that is +a pointer to a C function. Note that conditional branches do not +receive a label argument, so they must be patched. + +You need to call @code{patch_at} on the return of value @code{calli}, +@code{finishi}, and @code{calli} if it is actually referencing a label +in the jit code. All branch instructions do not receive a label +argument. Note that @code{movi} is an special case, and patching it +is usually done to get the final address of a label, usually to later +call @code{jmpr}. + +Now, here is the iterative version: + +@example +#include <stdio.h> +#include <lightning.h> + +static jit_state_t *_jit; + +typedef int (*pifi)(int); @rem{/* Pointer to Int Function of Int */} + +int main(int argc, char *argv[]) +@{ + pifi fib; + jit_node_t *in; @rem{/* offset of the argument */} + jit_node_t *ref; @rem{/* to patch the forward reference */} + jit_node_t *zero; @rem{/* to patch the forward reference */} + jit_node_t *jump; @rem{/* jump to start of loop */} + jit_node_t *loop; @rem{/* start of the loop */} + + init_jit(argv[0]); + _jit = jit_new_state(); + + jit_prolog (); + in = jit_arg (); + jit_getarg (JIT_R0, in); @rem{/* R0 = n */} + zero = jit_beqi (JIT_R0, 0); + jit_movr (JIT_R1, JIT_R0); + jit_movi (JIT_R0, 1); + ref = jit_blti (JIT_R1, 2); + jit_subi (JIT_R2, JIT_R2, 2); + jit_movr (JIT_R1, JIT_R0); + + loop= jit_label(); + jit_subi (JIT_R2, JIT_R2, 1); @rem{/* decr. counter */} + jit_movr (JIT_V0, JIT_R0); /* V0 = R0 */ + jit_addr (JIT_R0, JIT_R0, JIT_R1); /* R0 = R0 + R1 */ + jit_movr (JIT_R1, JIT_V0); /* R1 = V0 */ + jump= jit_bnei (JIT_R2, 0); /* if (R2) goto loop; */ + jit_patch_at(jump, loop); + + jit_patch(ref); @rem{/* patch forward jump */} + jit_patch(zero); @rem{/* patch forward jump */} + jit_retr (JIT_R0); + + @rem{/* call the generated code@comma{} passing 36 as an argument */} + fib = jit_emit(); + jit_clear_state(); + printf("fib(%d) = %d\n", 36, fib(36)); + jit_destroy_state(); + finish_jit(); + return 0; +@} +@end example + +This code calculates the recurrence relation using iteration (a +@code{for} loop in high-level languages). There are no function +calls anymore: instead, there is a backward jump (the @code{bnei} at +the end of the loop). + +Note that the program must remember the address for backward jumps; +for forward jumps it is only required to remember the jump code, +and call @code{patch} for the implicit label. + +@node Reentrancy +@chapter Re-entrant usage of @lightning{} + +@lightning{} uses the special @code{_jit} identifier. To be able +to be able to use multiple jit generation states at the same +time, it is required to used code similar to: + +@example + struct jit_state lightning; + #define lightning _jit +@end example + +This will cause the symbol defined to @code{_jit} to be passed as +the first argument to the underlying @lightning{} implementation, +that is usually a function with an @code{_} (underscode) prefix +and with an argument named @code{_jit}, in the pattern: + +@example + static void _jit_mnemonic(jit_state_t *, jit_gpr_t, jit_gpr_t); + #define jit_mnemonic(u, v) _jit_mnemonic(_jit, u, v); +@end example + +The reason for this is to use the same syntax as the initial lightning +implementation and to avoid needing the user to keep adding an extra +argument to every call, as multiple jit states generating code in +paralell should be very uncommon. + +@section Registers +@chapter Accessing the whole register file + +As mentioned earlier in this chapter, all @lightning{} back-ends are +guaranteed to have at least six general-purpose integer registers and +six floating-point registers, but many back-ends will have more. + +To access the entire register files, you can use the +@code{JIT_R}, @code{JIT_V} and @code{JIT_F} macros. They +accept a parameter that identifies the register number, which +must be strictly less than @code{JIT_R_NUM}, @code{JIT_V_NUM} +and @code{JIT_F_NUM} respectively; the number need not be +constant. Of course, expressions like @code{JIT_R0} and +@code{JIT_R(0)} denote the same register, and likewise for +integer callee-saved, or floating-point, registers. + +@node Customizations +@chapter Customizations + +Frequently it is desirable to have more control over how code is +generated or how memory is used during jit generation or execution. + +@section Memory functions +To aid in complete control of memory allocation and deallocation +@lightning{} provides wrappers that default to standard @code{malloc}, +@code{realloc} and @code{free}. These are loosely based on the +GNU GMP counterparts, with the difference that they use the same +prototype of the system allocation functions, that is, no @code{size} +for @code{free} or @code{old_size} for @code{realloc}. + +@deftypefun void jit_set_memory_functions (@* void *(*@var{alloc_func_ptr}) (size_t), @* void *(*@var{realloc_func_ptr}) (void *, size_t), @* void (*@var{free_func_ptr}) (void *)) +@lightning{} guarantees that memory is only allocated or released +using these wrapped functions, but you must note that if lightning +was linked to GNU binutils, malloc is probably will be called multiple +times from there when initializing the disassembler. + +Because @code{init_jit} may call memory functions, if you need to call +@code{jit_set_memory_functions}, it must be called before @code{init_jit}, +otherwise, when calling @code{finish_jit}, a pointer allocated with the +previous or default wrappers will be passed. +@end deftypefun + +@deftypefun void jit_get_memory_functions (@* void *(**@var{alloc_func_ptr}) (size_t), @* void *(**@var{realloc_func_ptr}) (void *, size_t), @* void (**@var{free_func_ptr}) (void *)) +Get the current memory allocation function. Also, unlike the GNU GMP +counterpart, it is an error to pass @code{NULL} pointers as arguments. +@end deftypefun + +@section Alternate code buffer +To instruct @lightning{} to use an alternate code buffer it is required +to call @code{jit_realize} before @code{jit_emit}, and then query states +and customize as appropriate. + +@deftypefun void jit_realize () +Must be called once, before @code{jit_emit}, to instruct @lightning{} +that no other @code{jit_xyz} call will be made. +@end deftypefun + +@deftypefun jit_pointer_t jit_get_code (jit_word_t *@var{code_size}) +Returns NULL or the previous value set with @code{jit_set_code}, and +sets the @var{code_size} argument to an appropriate value. +If @code{jit_get_code} is called before @code{jit_emit}, the +@var{code_size} argument is set to the expected amount of bytes +required to generate code. +If @code{jit_get_code} is called after @code{jit_emit}, the +@var{code_size} argument is set to the exact amount of bytes used +by the code. +@end deftypefun + +@deftypefun void jit_set_code (jit_ponter_t @var{code}, jit_word_t @var{size}) +Instructs @lightning{} to output to the @var{code} argument and +use @var{size} as a guard to not write to invalid memory. If during +@code{jit_emit} @lightning{} finds out that the code would not fit +in @var{size} bytes, it halts code emit and returns @code{NULL}. +@end deftypefun + +A simple example of a loop using an alternate buffer is: + +@example + jit_uint8_t *code; + int *(func)(int); @rem{/* function pointer */} + jit_word_t code_size; + jit_word_t real_code_size; + @rem{...} + jit_realize(); @rem{/* ready to generate code */} + jit_get_code(&code_size); @rem{/* get expected code size */} + code_size = (code_size + 4095) & -4096; + do (;;) @{ + code = mmap(NULL, code_size, PROT_EXEC | PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + jit_set_code(code, code_size); + if ((func = jit_emit()) == NULL) @{ + munmap(code, code_size); + code_size += 4096; + @} + @} while (func == NULL); + jit_get_code(&real_code_size); @rem{/* query exact size of the code */} +@end example + +The first call to @code{jit_get_code} should return @code{NULL} and set +the @code{code_size} argument to the expected amount of bytes required +to emit code. +The second call to @code{jit_get_code} is after a successful call to +@code{jit_emit}, and will return the value previously set with +@code{jit_set_code} and set the @code{real_code_size} argument to the +exact amount of bytes used to emit the code. + +@section Alternate data buffer +Sometimes it may be desirable to customize how, or to prevent +@lightning{} from using an extra buffer for constants or debug +annotation. Usually when also using an alternate code buffer. + +@deftypefun jit_pointer_t jit_get_data (jit_word_t *@var{data_size}, jit_word_t *@var{note_size}) +Returns @code{NULL} or the previous value set with @code{jit_set_data}, +and sets the @var{data_size} argument to how many bytes are required +for the constants data buffer, and @var{note_size} to how many bytes +are required to store the debug note information. +Note that it always preallocate one debug note entry even if +@code{jit_name} or @code{jit_note} are never called, but will return +zero in the @var{data_size} argument if no constant is required; +constants are only used for the @code{float} and @code{double} operations +that have an immediate argument, and not in all @lightning{} ports. +@end deftypefun + +@deftypefun void jit_set_data (jit_pointer_t @var{data}, jit_word_t @var{size}, jit_word_t @var{flags}) + +@var{data} can be NULL if disabling constants and annotations, otherwise, +a valid pointer must be passed. An assertion is done that the data will +fit in @var{size} bytes (but that is a noop if @lightning{} was built +with @code{-DNDEBUG}). + +@var{size} tells the space in bytes available in @var{data}. + +@var{flags} can be zero to tell to just use the alternate data buffer, +or a composition of @code{JIT_DISABLE_DATA} and @code{JIT_DISABLE_NOTE} + +@table @t +@item JIT_DISABLE_DATA +@cindex JIT_DISABLE_DATA +Instructs @lightning{} to not use a constant table, but to use an +alternate method to synthesize those, usually with a larger code +sequence using stack space to transfer the value from a GPR to a +FPR register. + +@item JIT_DISABLE_NOTE +@cindex JIT_DISABLE_NOTE +Instructs @lightning{} to not store file or function name, and +line numbers in the constant buffer. +@end table +@end deftypefun + +A simple example of a preventing usage of a data buffer is: + +@example + @rem{...} + jit_realize(); @rem{/* ready to generate code */} + jit_get_data(NULL, NULL); + jit_set_data(NULL, 0, JIT_DISABLE_DATA | JIT_DISABLE_NOTE); + @rem{...} +@end example + +Or to only use a data buffer, if required: + +@example + jit_uint8_t *data; + jit_word_t data_size; + @rem{...} + jit_realize(); @rem{/* ready to generate code */} + jit_get_data(&data_size, NULL); + if (data_size) + data = malloc(data_size); + else + data = NULL; + jit_set_data(data, data_size, JIT_DISABLE_NOTE); + @rem{...} + if (data) + free(data); + @rem{...} +@end example + +@node Acknowledgements +@chapter Acknowledgements + +As far as I know, the first general-purpose portable dynamic code +generator is @sc{dcg}, by Dawson R.@: Engler and T.@: A.@: Proebsting. +Further work by Dawson R. Engler resulted in the @sc{vcode} system; +unlike @sc{dcg}, @sc{vcode} used no intermediate representation and +directly inspired @lightning{}. + +Thanks go to Ian Piumarta, who kindly accepted to release his own +program @sc{ccg} under the GNU General Public License, thereby allowing +@lightning{} to use the run-time assemblers he had wrote for @sc{ccg}. +@sc{ccg} provides a way of dynamically assemble programs written in the +underlying architecture's assembly language. So it is not portable, +yet very interesting. + +I also thank Steve Byrne for writing GNU Smalltalk, since @lightning{} +was first developed as a tool to be used in GNU Smalltalk's dynamic +translator from bytecodes to native code. + +@c %**end of header (This is for running Texinfo on a region.) + +@c *********************************************************************** + +@bye diff --git a/deps/lightening/tests/Makefile b/deps/lightening/tests/Makefile new file mode 100644 index 0000000..793f225 --- /dev/null +++ b/deps/lightening/tests/Makefile @@ -0,0 +1,87 @@ +TESTS ?= $(sort $(basename $(wildcard *.c))) +TARGETS ?= native ia32 aarch64 armv7 mips64el mipsel ppc64le + +# Suitable values of cross-compiler variables for Debian: +# +# make test CC_IA32=i668-linux-gnu-gcc CC_AARCH64=aarch64-linux-gnu-gcc +# +# The relevant packages that you need to run this: +# +# dpkg --add-architecture i386 +# dpkg --add-architecture arm64 +# apt-get update -qq +# apt-get install -y \ +# libc6-dev:amd64 gcc make \ +# qemu binfmt-support qemu-user-static \ +# gcc-i686-linux-gnu libc6-dev-i386-cross libc6:i386 \ +# gcc-aarch64-linux-gnu libc6-dev-arm64-cross libc6:arm64 +# +CC = gcc +CC_IA32=guix environment --pure -s i686-linux --ad-hoc gcc-toolchain -- gcc +CC_AARCH64=guix environment --pure -s aarch64-linux --ad-hoc gcc-toolchain -- gcc +CC_ARMv7=guix environment --pure -s armhf-linux --ad-hoc gcc-toolchain -- gcc +CC_MIPS64EL=guix environment --pure -s mips64el-linux --ad-hoc gcc-toolchain -- gcc +CC_MIPSEL=guix environment --pure -s mipsel-linux --ad-hoc gcc-toolchain -- gcc +CC_PPC64LE=guix environment --pure -s powerpc64le-linux --ad-hoc gcc-toolchain -- gcc +CFLAGS = -Wall -O0 -g $(DEBUG) +LDFLAGS = -lpthread +RUNNER = + +all: $(foreach TARGET,$(TARGETS),$(addprefix test-$(TARGET)-,$(TESTS))) + +check: $(addprefix test-$(TARGET),$(TARGETS)) + +test-vg-%: $(addprefix test-%-,$(TESTS)) + @echo "Running unit tests..." + @set -e; for test in $?; do \ + echo "Testing: $$test"; \ + valgrind -q --error-exitcode=1 ./$$test; \ + done + @echo "Success." + +test-%: $(addprefix test-%-,$(TESTS)) + @echo "Running unit tests..." + @set -e; for test in $?; do \ + echo "Testing: $$test"; \ + ./$$test; \ + done + @echo "Success." + +.PHONY: test check + +lightening-%.o: ../lightening.h ../lightening/*.c ../lightening/*.h + $(CC) $(CFLAGS) $(CPPFLAGS) -I.. -o $@ -c ../lightening/lightening.c + +test-native-%: %.c lightening-native.o test.h + $(CC) $(CFLAGS) $(CPPFLAGS) -I.. -o $@ lightening-native.o $< $(LDFLAGS) + +test-ia32-%: CC = $(CC_IA32) +test-ia32-%: %.c lightening-ia32.o test.h + $(CC) $(CFLAGS) $(CPPFLAGS) -I.. -o $@ lightening-ia32.o $< $(LDFLAGS) + +test-aarch64-%: CC = $(CC_AARCH64) +test-aarch64-%: %.c lightening-aarch64.o test.h + $(CC) $(CFLAGS) $(CPPFLAGS) -I.. -o $@ lightening-aarch64.o $< $(LDFLAGS) + +test-armv7-%: CC = $(CC_ARMv7) +test-armv7-%: %.c lightening-armv7.o test.h + $(CC) $(CFLAGS) $(CPPFLAGS) -I.. -o $@ lightening-armv7.o $< $(LDFLAGS) + +test-mips64el-%: CC = $(CC_MIPS64EL) +test-mips64el-%: %.c lightening-mips64el.o test.h + $(CC) $(CFLAGS) $(CPPFLAGS) -I.. -o $@ lightening-mips64el.o $< $(LDFLAGS) + +test-mipsel-%: CC = $(CC_MIPSEL) +test-mipsel-%: %.c lightening-mipsel.o test.h + $(CC) $(CFLAGS) $(CPPFLAGS) -I.. -o $@ lightening-mipsel.o $< $(LDFLAGS) + +test-ppc64le-%: CC = $(CC_PPC64LE) +test-ppc64le-%: %.c lightening-ppc64le.o test.h + $(CC) $(CFLAGS) $(CPPFLAGS) -I.. -o $@ lightening-ppc64le.o $< $(LDFLAGS) + +.PRECIOUS: $(foreach TARGET,$(TARGETS),$(addprefix test-$(TARGET)-,$(TESTS))) +.PRECIOUS: $(foreach TARGET,$(TARGETS),lightening-$(TARGET).o) + +clean: + rm -f $(foreach TARGET,$(TARGETS),$(addprefix test-$(TARGET)-,$(TESTS))) + rm -f $(foreach TARGET,$(TARGETS),lightening-$(TARGET).o) diff --git a/deps/lightening/tests/absr_d.c b/deps/lightening/tests/absr_d.c new file mode 100644 index 0000000..00b8fa4 --- /dev/null +++ b/deps/lightening/tests/absr_d.c @@ -0,0 +1,26 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0)); + + jit_absr_d(j, JIT_F0, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_d(j, JIT_F0); + + double (*f)(double) = jit_end(j, NULL); + + ASSERT(f(0.0) == 0.0); + ASSERT(f(-0.0) == 0.0); + ASSERT(f(0.5) == 0.5); + ASSERT(f(-0.5) == 0.5); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/absr_f.c b/deps/lightening/tests/absr_f.c new file mode 100644 index 0000000..e019b5f --- /dev/null +++ b/deps/lightening/tests/absr_f.c @@ -0,0 +1,26 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0)); + + jit_absr_f(j, JIT_F0, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_f(j, JIT_F0); + + float (*f)(float) = jit_end(j, NULL); + + ASSERT(f(0.0) == 0.0); + ASSERT(f(-0.0) == 0.0); + ASSERT(f(0.5) == 0.5); + ASSERT(f(-0.5) == 0.5); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/addi.c b/deps/lightening/tests/addi.c new file mode 100644 index 0000000..756d070 --- /dev/null +++ b/deps/lightening/tests/addi.c @@ -0,0 +1,25 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_addi(j, JIT_R0, JIT_R0, 69); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + int (*f)(int) = ret; + ASSERT(f(42) == 111); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/addr.c b/deps/lightening/tests/addr.c new file mode 100644 index 0000000..6ee76e2 --- /dev/null +++ b/deps/lightening/tests/addr.c @@ -0,0 +1,26 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_addr(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + int (*f)(int, int) = ret; + ASSERT(f(42, 69) == 111); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/addr_d.c b/deps/lightening/tests/addr_d.c new file mode 100644 index 0000000..1121620 --- /dev/null +++ b/deps/lightening/tests/addr_d.c @@ -0,0 +1,27 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1)); + + jit_addr_d(j, JIT_F0, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_d(j, JIT_F0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + double (*f)(double, double) = ret; + ASSERT(f(42., 69.) == 111.); + ASSERT(f(42.5, 69.5) == 112.); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/addr_f.c b/deps/lightening/tests/addr_f.c new file mode 100644 index 0000000..4317dfe --- /dev/null +++ b/deps/lightening/tests/addr_f.c @@ -0,0 +1,27 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1)); + + jit_addr_f(j, JIT_F0, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_f(j, JIT_F0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + float (*f)(float, float) = ret; + ASSERT(f(42.f, 69.f) == 111.f); + ASSERT(f(42.5f, 69.5f) == 112.f); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/addx.c b/deps/lightening/tests/addx.c new file mode 100644 index 0000000..417cd1a --- /dev/null +++ b/deps/lightening/tests/addx.c @@ -0,0 +1,63 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_movi(j, JIT_R2, 0); + jit_addcr(j, JIT_R0, JIT_R0, JIT_R1); + jit_addxi(j, JIT_R2, JIT_R2, 0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R2); + + size_t size = 0; + void* ret = jit_end(j, &size); + + jit_word_t (*f)(jit_word_t, jit_word_t) = ret; + + ASSERT(f(0, 0) == 0); + +#if __WORDSIZE == 32 + /* carry */ + ASSERT(f(0xffffffff, 0xffffffff) == 1); + /* overflow */ + ASSERT(f(0x7fffffff, 1) == 0); + /* overflow */ + ASSERT(f(0x7fffffff, 0x7fffffff) == 0); + /* carry */ + ASSERT(f(0x7fffffff, 0x80000000) == 0); + /* carry+overflow */ + ASSERT(f(0x80000000, 0x80000000) == 1); +#else + /* nothing */ + ASSERT(f(0xffffffff, 0xffffffff) == 0); + /* nothing */ + ASSERT(f(0x7fffffff, 1) == 0); + /* nothing */ + ASSERT(f(0x7fffffff, 0x7fffffff) == 0); + /* nothing */ + ASSERT(f(0x7fffffff, 0x80000000) == 0); + /* nothing */ + ASSERT(f(0x80000000, 0x80000000) == 0); + /* carry */ + ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 1); + /* overflow */ + ASSERT(f(0x7fffffffffffffff, 1) == 0); + /* overflow */ + ASSERT(f(0x7fffffffffffffff, 0x7fffffffffffffff) == 0); + /* overflow */ + ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == 0); + /* carry+overflow */ + ASSERT(f(0x8000000000000000, 0x8000000000000000) == 1); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/andi.c b/deps/lightening/tests/andi.c new file mode 100644 index 0000000..c6f39d7 --- /dev/null +++ b/deps/lightening/tests/andi.c @@ -0,0 +1,31 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_andi(j, JIT_R0, JIT_R0, 1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + jit_word_t (*f)(jit_word_t) = ret; + + ASSERT(f(0x7fffffff) == 1); + ASSERT(f(0x80000000) == 0); +#if __WORDSIZE == 64 + ASSERT(f(0x7fffffffffffffff) == 1); + ASSERT(f(0x8000000000000000) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/andr.c b/deps/lightening/tests/andr.c new file mode 100644 index 0000000..1114ef9 --- /dev/null +++ b/deps/lightening/tests/andr.c @@ -0,0 +1,48 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_andr(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + jit_word_t (*f)(jit_word_t, jit_word_t) = ret; + + ASSERT(f(0x7fffffff, 1) == 1); + ASSERT(f(1, 0x7fffffff) == 1); + ASSERT(f(0x80000000, 1) == 0); + ASSERT(f(1, 0x80000000) == 0); + ASSERT(f(0x7fffffff, 0x80000000) == 0); + ASSERT(f(0x80000000, 0x7fffffff) == 0); + ASSERT(f(0x7fffffff, 0xffffffff) == 0x7fffffff); + ASSERT(f(0xffffffff, 0x7fffffff) == 0x7fffffff); + ASSERT(f(0xffffffff, 0xffffffff) == 0xffffffff); + ASSERT(f(0x7fffffff, 0) == 0); + ASSERT(f(0, 0x7fffffff) == 0); +#if __WORDSIZE == 64 + ASSERT(f(0x7fffffffffffffff, 1) == 1); + ASSERT(f(1, 0x7fffffffffffffff) == 1); + ASSERT(f(0x8000000000000000, 1) == 0); + ASSERT(f(1, 0x8000000000000000) == 0); + ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == 0); + ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == 0); + ASSERT(f(0x7fffffffffffffff, 0xffffffffffffffff) == 0x7fffffffffffffff); + ASSERT(f(0xffffffffffffffff, 0x7fffffffffffffff) == 0x7fffffffffffffff); + ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 0xffffffffffffffff); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/beqi.c b/deps/lightening/tests/beqi.c new file mode 100644 index 0000000..dcb012f --- /dev/null +++ b/deps/lightening/tests/beqi.c @@ -0,0 +1,32 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_reloc_t r = jit_beqi(j, JIT_R0, -1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0); + ASSERT(f(1) == 0); + ASSERT(f(-1) == 1); + +#if __WORDSIZE == 64 + ASSERT(f(0xffffffffff) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/beqr.c b/deps/lightening/tests/beqr.c new file mode 100644 index 0000000..0100400 --- /dev/null +++ b/deps/lightening/tests/beqr.c @@ -0,0 +1,36 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_beqr(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 0); + ASSERT(f(1, 1) == 1); + +#if __WORDSIZE == 64 + ASSERT(f(0xffffffffff, -1) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/beqr_d.c b/deps/lightening/tests/beqr_d.c new file mode 100644 index 0000000..a84b6a7 --- /dev/null +++ b/deps/lightening/tests/beqr_d.c @@ -0,0 +1,35 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1)); + + jit_reloc_t r = jit_beqr_d(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(double, double) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 0); + ASSERT(f(1, 1) == 1); + + ASSERT(f(0, 0.0/0.0) == 0); + ASSERT(f(0.0/0.0, 0) == 0); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/beqr_f.c b/deps/lightening/tests/beqr_f.c new file mode 100644 index 0000000..7b5cc27 --- /dev/null +++ b/deps/lightening/tests/beqr_f.c @@ -0,0 +1,35 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1)); + + jit_reloc_t r = jit_beqr_f(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(float, float) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 0); + ASSERT(f(1, 1) == 1); + + ASSERT(f(0, 0.0/0.0) == 0); + ASSERT(f(0.0/0.0, 0) == 0); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bgei.c b/deps/lightening/tests/bgei.c new file mode 100644 index 0000000..f30901e --- /dev/null +++ b/deps/lightening/tests/bgei.c @@ -0,0 +1,32 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_reloc_t r = jit_bgei(j, JIT_R0, 0); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0) == 1); + ASSERT(f(1) == 1); + ASSERT(f(-1) == 0); + +#if __WORDSIZE == 64 + ASSERT(f(0xffffffffff) == 1); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bgei_u.c b/deps/lightening/tests/bgei_u.c new file mode 100644 index 0000000..d61089f --- /dev/null +++ b/deps/lightening/tests/bgei_u.c @@ -0,0 +1,32 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_reloc_t r = jit_bgei_u(j, JIT_R0, 1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0); + ASSERT(f(1) == 1); + ASSERT(f(-1) == 1); + +#if __WORDSIZE == 64 + ASSERT(f(0xff00000000) == 1); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bger.c b/deps/lightening/tests/bger.c new file mode 100644 index 0000000..920e820 --- /dev/null +++ b/deps/lightening/tests/bger.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_bger(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 1); +#if __WORDSIZE == 64 + ASSERT(f(0xffffffffff, 1) == 1); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bger_d.c b/deps/lightening/tests/bger_d.c new file mode 100644 index 0000000..712b118 --- /dev/null +++ b/deps/lightening/tests/bger_d.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1)); + + jit_reloc_t r = jit_bger_d(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(double, double) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 1); + + ASSERT(f(0, 0.0/0.0) == 0); + ASSERT(f(0.0/0.0, 0) == 0); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bger_f.c b/deps/lightening/tests/bger_f.c new file mode 100644 index 0000000..b9d5478 --- /dev/null +++ b/deps/lightening/tests/bger_f.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1)); + + jit_reloc_t r = jit_bger_f(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(float, float) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 1); + + ASSERT(f(0, 0.0/0.0) == 0); + ASSERT(f(0.0/0.0, 0) == 0); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bger_u.c b/deps/lightening/tests/bger_u.c new file mode 100644 index 0000000..1978765 --- /dev/null +++ b/deps/lightening/tests/bger_u.c @@ -0,0 +1,35 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_bger_u(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 0); + +#if __WORDSIZE == 64 + ASSERT(f(0xff00000000, 1) == 1); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bgti.c b/deps/lightening/tests/bgti.c new file mode 100644 index 0000000..89eecae --- /dev/null +++ b/deps/lightening/tests/bgti.c @@ -0,0 +1,33 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_reloc_t r = jit_bgti(j, JIT_R0, 1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0); + ASSERT(f(1) == 0); + ASSERT(f(2) == 1); + ASSERT(f(-1) == 0); + +#if __WORDSIZE == 64 + ASSERT(f(0xffffffffff) == 1); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bgti_u.c b/deps/lightening/tests/bgti_u.c new file mode 100644 index 0000000..51bb754 --- /dev/null +++ b/deps/lightening/tests/bgti_u.c @@ -0,0 +1,31 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_reloc_t r = jit_bgti_u(j, JIT_R0, 1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0); + ASSERT(f(1) == 0); + ASSERT(f(-1) == 1); +#if __WORDSIZE == 64 + ASSERT(f(0xff00000000) == 1); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bgtr.c b/deps/lightening/tests/bgtr.c new file mode 100644 index 0000000..c4dcd51 --- /dev/null +++ b/deps/lightening/tests/bgtr.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_bgtr(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 1); +#if __WORDSIZE == 64 + ASSERT(f(0xffffffffff, 1) == 1); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bgtr_d.c b/deps/lightening/tests/bgtr_d.c new file mode 100644 index 0000000..d3c2436 --- /dev/null +++ b/deps/lightening/tests/bgtr_d.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1)); + + jit_reloc_t r = jit_bgtr_d(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(double, double) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 1); + + ASSERT(f(0, 0.0/0.0) == 0); + ASSERT(f(0.0/0.0, 0) == 0); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bgtr_f.c b/deps/lightening/tests/bgtr_f.c new file mode 100644 index 0000000..91cb8c0 --- /dev/null +++ b/deps/lightening/tests/bgtr_f.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1)); + + jit_reloc_t r = jit_bgtr_f(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(float, float) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 1); + + ASSERT(f(0, 0.0/0.0) == 0); + ASSERT(f(0.0/0.0, 0) == 0); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bgtr_u.c b/deps/lightening/tests/bgtr_u.c new file mode 100644 index 0000000..34ad257 --- /dev/null +++ b/deps/lightening/tests/bgtr_u.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_bgtr_u(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 0); +#if __WORDSIZE == 64 + ASSERT(f(0xff00000000, 1) == 1); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/blei.c b/deps/lightening/tests/blei.c new file mode 100644 index 0000000..5725032 --- /dev/null +++ b/deps/lightening/tests/blei.c @@ -0,0 +1,31 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_reloc_t r = jit_blei(j, JIT_R0, 0); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0) == 1); + ASSERT(f(1) == 0); + ASSERT(f(-1) == 1); +#if __WORDSIZE == 64 + ASSERT(f(0xffffffffff) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/blei_u.c b/deps/lightening/tests/blei_u.c new file mode 100644 index 0000000..f6862ad --- /dev/null +++ b/deps/lightening/tests/blei_u.c @@ -0,0 +1,31 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_reloc_t r = jit_blei_u(j, JIT_R0, 1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0) == 1); + ASSERT(f(1) == 1); + ASSERT(f(-1) == 0); +#if __WORDSIZE == 64 + ASSERT(f(0xff00000000) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bler.c b/deps/lightening/tests/bler.c new file mode 100644 index 0000000..0b37785 --- /dev/null +++ b/deps/lightening/tests/bler.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_bler(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 0); +#if __WORDSIZE == 64 + ASSERT(f(0xffffffffff, 1) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bler_d.c b/deps/lightening/tests/bler_d.c new file mode 100644 index 0000000..507dac5 --- /dev/null +++ b/deps/lightening/tests/bler_d.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1)); + + jit_reloc_t r = jit_bler_d(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(double, double) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 0); + + ASSERT(f(0, 0.0/0.0) == 0); + ASSERT(f(0.0/0.0, 0) == 0); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bler_f.c b/deps/lightening/tests/bler_f.c new file mode 100644 index 0000000..191b649 --- /dev/null +++ b/deps/lightening/tests/bler_f.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1)); + + jit_reloc_t r = jit_bler_f(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(float, float) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 0); + + ASSERT(f(0, 0.0/0.0) == 0); + ASSERT(f(0.0/0.0, 0) == 0); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bler_u.c b/deps/lightening/tests/bler_u.c new file mode 100644 index 0000000..0830668 --- /dev/null +++ b/deps/lightening/tests/bler_u.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_bler_u(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 1); +#if __WORDSIZE == 64 + ASSERT(f(0xff00000000, 1) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bltgtr_d.c b/deps/lightening/tests/bltgtr_d.c new file mode 100644 index 0000000..3d8835d --- /dev/null +++ b/deps/lightening/tests/bltgtr_d.c @@ -0,0 +1,36 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1)); + + jit_reloc_t r = jit_bltgtr_d(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(double, double) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 1); + ASSERT(f(1, 1) == 0); + + ASSERT(f(0, 0.0/0.0) == 0); + ASSERT(f(0.0/0.0, 0) == 0); + ASSERT(f(0.0/0.0, 0.0/0.0) == 0); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bltgtr_f.c b/deps/lightening/tests/bltgtr_f.c new file mode 100644 index 0000000..fbdbc3b --- /dev/null +++ b/deps/lightening/tests/bltgtr_f.c @@ -0,0 +1,36 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1)); + + jit_reloc_t r = jit_bltgtr_f(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(float, float) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 1); + ASSERT(f(1, 1) == 0); + + ASSERT(f(0, 0.0/0.0) == 0); + ASSERT(f(0.0/0.0, 0) == 0); + ASSERT(f(0.0/0.0, 0.0/0.0) == 0); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/blti.c b/deps/lightening/tests/blti.c new file mode 100644 index 0000000..d073337 --- /dev/null +++ b/deps/lightening/tests/blti.c @@ -0,0 +1,31 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_reloc_t r = jit_blti(j, JIT_R0, 0); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0); + ASSERT(f(1) == 0); + ASSERT(f(-1) == 1); +#if __WORDSIZE == 64 + ASSERT(f(0xffffffffff) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/blti_u.c b/deps/lightening/tests/blti_u.c new file mode 100644 index 0000000..04a7037 --- /dev/null +++ b/deps/lightening/tests/blti_u.c @@ -0,0 +1,31 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_reloc_t r = jit_blti_u(j, JIT_R0, 1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0) == 1); + ASSERT(f(1) == 0); + ASSERT(f(-1) == 0); +#if __WORDSIZE == 64 + ASSERT(f(0xff00000000) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bltr.c b/deps/lightening/tests/bltr.c new file mode 100644 index 0000000..a928fab --- /dev/null +++ b/deps/lightening/tests/bltr.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_bltr(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 0); +#if __WORDSIZE == 64 + ASSERT(f(0xffffffffffff, 0) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bltr_d.c b/deps/lightening/tests/bltr_d.c new file mode 100644 index 0000000..2d62609 --- /dev/null +++ b/deps/lightening/tests/bltr_d.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1)); + + jit_reloc_t r = jit_bltr_d(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(double, double) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 0); + + ASSERT(f(0, 0.0/0.0) == 0); + ASSERT(f(0.0/0.0, 0) == 0); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bltr_f.c b/deps/lightening/tests/bltr_f.c new file mode 100644 index 0000000..eebd3da --- /dev/null +++ b/deps/lightening/tests/bltr_f.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1)); + + jit_reloc_t r = jit_bltr_f(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(float, float) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 0); + + ASSERT(f(0, 0.0/0.0) == 0); + ASSERT(f(0.0/0.0, 0) == 0); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bltr_u.c b/deps/lightening/tests/bltr_u.c new file mode 100644 index 0000000..c66f3d5 --- /dev/null +++ b/deps/lightening/tests/bltr_u.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_bltr_u(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 1); +#if __WORDSIZE == 64 + ASSERT(f(0xff00000000, 1) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bmci.c b/deps/lightening/tests/bmci.c new file mode 100644 index 0000000..e6c355a --- /dev/null +++ b/deps/lightening/tests/bmci.c @@ -0,0 +1,36 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + +#if __WORDSIZE == 64 + jit_reloc_t r = jit_bmci(j, JIT_R0, 0xff00000001); +#else + jit_reloc_t r = jit_bmci(j, JIT_R0, 1); +#endif + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0) == 1); + ASSERT(f(1) == 0); + ASSERT(f(-1) == 0); + ASSERT(f(2) == 1); +#if __WORDSIZE == 64 + ASSERT(f(0xfffffffff0) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bmcr.c b/deps/lightening/tests/bmcr.c new file mode 100644 index 0000000..64c1ee6 --- /dev/null +++ b/deps/lightening/tests/bmcr.c @@ -0,0 +1,38 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_bmcr(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 1); + ASSERT(f(1, 1) == 0); + ASSERT(f(1, -1) == 0); + ASSERT(f(-1, 1) == 0); + ASSERT(f(-1, -1) == 0); +#if __WORDSIZE == 64 + ASSERT(f(0xffffffffff, 0xff00000000) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bmsi.c b/deps/lightening/tests/bmsi.c new file mode 100644 index 0000000..d0919f7 --- /dev/null +++ b/deps/lightening/tests/bmsi.c @@ -0,0 +1,36 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + +#if __WORDSIZE == 64 + jit_reloc_t r = jit_bmsi(j, JIT_R0, 0xff00000001); +#else + jit_reloc_t r = jit_bmsi(j, JIT_R0, 1); +#endif + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0); + ASSERT(f(1) == 1); + ASSERT(f(-1) == 1); + ASSERT(f(2) == 0); +#if __WORDSIZE == 64 + ASSERT(f(0xfffffffff0) == 1); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bmsr.c b/deps/lightening/tests/bmsr.c new file mode 100644 index 0000000..b92eb6e --- /dev/null +++ b/deps/lightening/tests/bmsr.c @@ -0,0 +1,38 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_bmsr(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 0); + ASSERT(f(1, 1) == 1); + ASSERT(f(1, -1) == 1); + ASSERT(f(-1, 1) == 1); + ASSERT(f(-1, -1) == 1); +#if __WORDSIZE == 64 + ASSERT(f(0xfffffffff0, 0xff00000001) == 1); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bnei.c b/deps/lightening/tests/bnei.c new file mode 100644 index 0000000..ee077e3 --- /dev/null +++ b/deps/lightening/tests/bnei.c @@ -0,0 +1,31 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_reloc_t r = jit_bnei(j, JIT_R0, 0); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0); + ASSERT(f(1) == 1); + ASSERT(f(-1) == 1); +#if __WORDSIZE == 64 + ASSERT(f(0xff00000000) == 1); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bner.c b/deps/lightening/tests/bner.c new file mode 100644 index 0000000..7a8cd0f --- /dev/null +++ b/deps/lightening/tests/bner.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_bner(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 1); +#if __WORDSIZE == 64 + ASSERT(f(0xff00000000, 0x1000000000) == 1); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bner_d.c b/deps/lightening/tests/bner_d.c new file mode 100644 index 0000000..079fda4 --- /dev/null +++ b/deps/lightening/tests/bner_d.c @@ -0,0 +1,36 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1)); + + jit_reloc_t r = jit_bner_d(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(double, double) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 1); + ASSERT(f(1, 1) == 0); + + ASSERT(f(0, 0.0/0.0) == 1); + ASSERT(f(0.0/0.0, 0) == 1); + ASSERT(f(0.0/0.0, 0.0/0.0) == 1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bner_f.c b/deps/lightening/tests/bner_f.c new file mode 100644 index 0000000..011df67 --- /dev/null +++ b/deps/lightening/tests/bner_f.c @@ -0,0 +1,36 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1)); + + jit_reloc_t r = jit_bner_f(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(float, float) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 1); + ASSERT(f(1, 1) == 0); + + ASSERT(f(0, 0.0/0.0) == 1); + ASSERT(f(0.0/0.0, 0) == 1); + ASSERT(f(0.0/0.0, 0.0/0.0) == 1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/boaddi.c b/deps/lightening/tests/boaddi.c new file mode 100644 index 0000000..1e47297 --- /dev/null +++ b/deps/lightening/tests/boaddi.c @@ -0,0 +1,41 @@ +#include "test.h" + +static const jit_word_t overflowed = 0xcabba9e5; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_reloc_t r = jit_boaddi(j, JIT_R0, 1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + jit_patch_here(j, r); + jit_movi(j, JIT_R0, overflowed); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(-1) == 0); + ASSERT(f(0) == 1); + ASSERT(f(1) == 2); + +#if __WORDSIZE == 32 + ASSERT(f(0x7fffffff) == overflowed); + ASSERT(f(0x80000000) == 0x80000001); + ASSERT(f(0xffffffff) == 0); +#else + ASSERT(f(0x7fffffffffffffff) == overflowed); + ASSERT(f(0x8000000000000000) == 0x8000000000000001); + ASSERT(f(0xffffffffffffffff) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/boaddi_u.c b/deps/lightening/tests/boaddi_u.c new file mode 100644 index 0000000..21c71df --- /dev/null +++ b/deps/lightening/tests/boaddi_u.c @@ -0,0 +1,41 @@ +#include "test.h" + +static const jit_word_t overflowed = 0xcabba9e5; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_reloc_t r = jit_boaddi_u(j, JIT_R0, 1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + jit_patch_here(j, r); + jit_movi(j, JIT_R0, overflowed); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(-1) == overflowed); + ASSERT(f(0) == 1); + ASSERT(f(1) == 2); + +#if __WORDSIZE == 32 + ASSERT(f(0x7fffffff) == 0x80000000); + ASSERT(f(0x80000000) == 0x80000001); + ASSERT(f(0xffffffff) == overflowed); +#else + ASSERT(f(0x7fffffffffffffff) == 0x8000000000000000); + ASSERT(f(0x8000000000000000) == 0x8000000000000001); + ASSERT(f(0xffffffffffffffff) == overflowed); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/boaddr.c b/deps/lightening/tests/boaddr.c new file mode 100644 index 0000000..8bab91e --- /dev/null +++ b/deps/lightening/tests/boaddr.c @@ -0,0 +1,51 @@ +#include "test.h" + +static const jit_word_t overflowed = 0xcabba9e5; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_boaddr(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + jit_patch_here(j, r); + jit_movi(j, JIT_R0, overflowed); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(1, 1) == 2); + +#if __WORDSIZE == 32 + ASSERT(f(0xffffffff, 0xffffffff) == -2); + ASSERT(f(0x7fffffff, 0) == 0x7fffffff); + ASSERT(f(0x7fffffff, 1) == overflowed); + ASSERT(f(0x7fffffff, 0x7fffffff) == overflowed); + ASSERT(f(0x7fffffff, 0x80000000) == -1); + ASSERT(f(0x80000000, 0x80000000) == overflowed); +#else + ASSERT(f(0xffffffff, 0xffffffff) == 0xffffffffull + 0xffffffffull); + ASSERT(f(0x7fffffff, 1) == 0x80000000); + ASSERT(f(0x7fffffff, 0x7fffffff) == 0x7fffffffull + 0x7fffffffull); + ASSERT(f(0x7fffffff, 0x80000000) == 0xffffffff); + ASSERT(f(0x80000000, 0x80000000) == 0x100000000); + ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == -2); + ASSERT(f(0x7fffffffffffffff, 1) == overflowed); + ASSERT(f(0x7fffffffffffffff, 0x7fffffffffffffff) == overflowed); + ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == -1); + ASSERT(f(0x8000000000000000, 0x8000000000000000) == overflowed); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/boaddr_u.c b/deps/lightening/tests/boaddr_u.c new file mode 100644 index 0000000..f4bacde --- /dev/null +++ b/deps/lightening/tests/boaddr_u.c @@ -0,0 +1,51 @@ +#include "test.h" + +static const jit_word_t overflowed = 0xcabba9e5; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_boaddr_u(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + jit_patch_here(j, r); + jit_movi(j, JIT_R0, overflowed); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(1, 1) == 2); + +#if __WORDSIZE == 32 + ASSERT(f(0xffffffff, 0xffffffff) == overflowed); + ASSERT(f(0x7fffffff, 0) == 0x7fffffff); + ASSERT(f(0x7fffffff, 1) == 0x80000000); + ASSERT(f(0x7fffffff, 0x7fffffff) == 0x7fffffffu + 0x7fffffffu); + ASSERT(f(0x7fffffff, 0x80000000) == 0xffffffff); + ASSERT(f(0x80000000, 0x80000000) == overflowed); +#else + ASSERT(f(0xffffffff, 0xffffffff) == 0xffffffffull + 0xffffffffull); + ASSERT(f(0x7fffffff, 1) == 0x80000000); + ASSERT(f(0x7fffffff, 0x7fffffff) == 0x7fffffffull + 0x7fffffffull); + ASSERT(f(0x7fffffff, 0x80000000) == 0xffffffff); + ASSERT(f(0x80000000, 0x80000000) == 0x100000000); + ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == overflowed); + ASSERT(f(0x7fffffffffffffff, 1) == 0x8000000000000000); + ASSERT(f(0x7fffffffffffffff, 0x7fffffffffffffff) == -2); + ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == -1); + ASSERT(f(0x8000000000000000, 0x8000000000000000) == overflowed); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bordr_d.c b/deps/lightening/tests/bordr_d.c new file mode 100644 index 0000000..9227f22 --- /dev/null +++ b/deps/lightening/tests/bordr_d.c @@ -0,0 +1,36 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1)); + + jit_reloc_t r = jit_bordr_d(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(double, double) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 1); + ASSERT(f(1, 1) == 1); + + ASSERT(f(0, 0.0/0.0) == 0); + ASSERT(f(0.0/0.0, 0) == 0); + ASSERT(f(0.0/0.0, 0.0/0.0) == 0); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bordr_f.c b/deps/lightening/tests/bordr_f.c new file mode 100644 index 0000000..25808e5 --- /dev/null +++ b/deps/lightening/tests/bordr_f.c @@ -0,0 +1,36 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1)); + + jit_reloc_t r = jit_bordr_f(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(float, float) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 1); + ASSERT(f(1, 1) == 1); + + ASSERT(f(0, 0.0/0.0) == 0); + ASSERT(f(0.0/0.0, 0) == 0); + ASSERT(f(0.0/0.0, 0.0/0.0) == 0); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bosubi.c b/deps/lightening/tests/bosubi.c new file mode 100644 index 0000000..f10d90a --- /dev/null +++ b/deps/lightening/tests/bosubi.c @@ -0,0 +1,41 @@ +#include "test.h" + +static const jit_word_t overflowed = 0xcabba9e5; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_reloc_t r = jit_bosubi(j, JIT_R0, 1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + jit_patch_here(j, r); + jit_movi(j, JIT_R0, overflowed); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(-1) == -2); + ASSERT(f(0) == -1); + ASSERT(f(1) == 0); + +#if __WORDSIZE == 32 + ASSERT(f(0x7fffffff) == 0x7ffffffe); + ASSERT(f(0x80000000) == overflowed); + ASSERT(f(0x80000001) == 0x80000000); +#else + ASSERT(f(0x7fffffffffffffff) == 0x7ffffffffffffffe); + ASSERT(f(0x8000000000000000) == overflowed); + ASSERT(f(0x8000000000000001) == 0x8000000000000000); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bosubi_u.c b/deps/lightening/tests/bosubi_u.c new file mode 100644 index 0000000..50af6ad --- /dev/null +++ b/deps/lightening/tests/bosubi_u.c @@ -0,0 +1,37 @@ +#include "test.h" + +static const jit_word_t overflowed = 0xcabba9e5; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_reloc_t r = jit_bosubi_u(j, JIT_R0, 1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + jit_patch_here(j, r); + jit_movi(j, JIT_R0, overflowed); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(-1) == -2); + ASSERT(f(0) == overflowed); + ASSERT(f(1) == 0); + +#if __WORDSIZE == 32 + ASSERT(f(0x80000000) == 0x7fffffff); +#else + ASSERT(f(0x8000000000000000) == 0x7fffffffffffffff); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bosubr.c b/deps/lightening/tests/bosubr.c new file mode 100644 index 0000000..cf68ad6 --- /dev/null +++ b/deps/lightening/tests/bosubr.c @@ -0,0 +1,48 @@ +#include "test.h" + +static const jit_word_t overflowed = 0xcabba9e5; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_bosubr(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + jit_patch_here(j, r); + jit_movi(j, JIT_R0, overflowed); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == -1); + ASSERT(f(1, 1) == 0); + ASSERT(f(1, -1) == 2); + +#if __WORDSIZE == 32 + ASSERT(f(0xffffffff, 0xffffffff) == 0); + ASSERT(f(0x7fffffff, 0) == 0x7fffffff); + ASSERT(f(0x7fffffff, 1) == 0x7ffffffe); + ASSERT(f(0x7fffffff, 0x7fffffff) == 0); + ASSERT(f(0x80000000, 0x7fffffff) == overflowed); + ASSERT(f(0x7fffffff, 0x80000000) == overflowed); + ASSERT(f(0x80000000, 0x80000000) == 0); +#else + ASSERT(f(0x7fffffffffffffff, 0x7fffffffffffffff) == 0); + ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == overflowed); + ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == overflowed); + ASSERT(f(0x8000000000000000, 0x8000000000000000) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bosubr_u.c b/deps/lightening/tests/bosubr_u.c new file mode 100644 index 0000000..b5e6b39 --- /dev/null +++ b/deps/lightening/tests/bosubr_u.c @@ -0,0 +1,47 @@ +#include "test.h" + +static const jit_word_t overflowed = 0xcabba9e5; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_bosubr_u(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + jit_patch_here(j, r); + jit_movi(j, JIT_R0, overflowed); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(1, 1) == 0); + ASSERT(f(0, 1) == overflowed); + ASSERT(f(1, 0) == 1); + +#if __WORDSIZE == 32 + ASSERT(f(0xffffffff, 0xffffffff) == 0); + ASSERT(f(0x7fffffff, 0) == 0x7fffffff); + ASSERT(f(0x7fffffff, 1) == 0x7ffffffe); + ASSERT(f(0x7fffffff, 0x7fffffff) == 0); + ASSERT(f(0x7fffffff, 0x80000000) == overflowed); + ASSERT(f(0x80000000, 0x80000000) == 0); +#else + ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 0); + ASSERT(f(0x7fffffffffffffff, 0x7fffffffffffffff) == 0); + ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == overflowed); + ASSERT(f(0x8000000000000000, 0x8000000000000000) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bswapr_ui.c b/deps/lightening/tests/bswapr_ui.c new file mode 100644 index 0000000..c1eb9fd --- /dev/null +++ b/deps/lightening/tests/bswapr_ui.c @@ -0,0 +1,28 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_bswapr_ui(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0); + ASSERT(f(0x12345678) == 0x78563412); +#if __WORDSIZE > 32 + ASSERT(f(0xff12345678) == 0x78563412); + ASSERT(f(0xff00000000) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bswapr_ul.c b/deps/lightening/tests/bswapr_ul.c new file mode 100644 index 0000000..a3a11b3 --- /dev/null +++ b/deps/lightening/tests/bswapr_ul.c @@ -0,0 +1,27 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ +#if __WORDSIZE > 32 + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_bswapr_ul(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0); + ASSERT(f(0x12345678) == 0x7856341200000000); + ASSERT(f(0xff12345678) == 0x78563412ff000000); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bswapr_us.c b/deps/lightening/tests/bswapr_us.c new file mode 100644 index 0000000..0ff777e --- /dev/null +++ b/deps/lightening/tests/bswapr_us.c @@ -0,0 +1,24 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_bswapr_us(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0); + ASSERT(f(0x12345678) == 0x7856); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/buneqr_d.c b/deps/lightening/tests/buneqr_d.c new file mode 100644 index 0000000..1d08e32 --- /dev/null +++ b/deps/lightening/tests/buneqr_d.c @@ -0,0 +1,35 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1)); + + jit_reloc_t r = jit_buneqr_d(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(double, double) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 0); + ASSERT(f(1, 1) == 1); + + ASSERT(f(0, 0.0/0.0) == 1); + ASSERT(f(0.0/0.0, 0) == 1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/buneqr_f.c b/deps/lightening/tests/buneqr_f.c new file mode 100644 index 0000000..49d9062 --- /dev/null +++ b/deps/lightening/tests/buneqr_f.c @@ -0,0 +1,35 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1)); + + jit_reloc_t r = jit_buneqr_f(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(float, float) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 0); + ASSERT(f(1, 1) == 1); + + ASSERT(f(0, 0.0/0.0) == 1); + ASSERT(f(0.0/0.0, 0) == 1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bunger_d.c b/deps/lightening/tests/bunger_d.c new file mode 100644 index 0000000..57888af --- /dev/null +++ b/deps/lightening/tests/bunger_d.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1)); + + jit_reloc_t r = jit_bunger_d(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(double, double) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 1); + + ASSERT(f(0, 0.0/0.0) == 1); + ASSERT(f(0.0/0.0, 0) == 1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bunger_f.c b/deps/lightening/tests/bunger_f.c new file mode 100644 index 0000000..f3103dc --- /dev/null +++ b/deps/lightening/tests/bunger_f.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1)); + + jit_reloc_t r = jit_bunger_f(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(float, float) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 1); + + ASSERT(f(0, 0.0/0.0) == 1); + ASSERT(f(0.0/0.0, 0) == 1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bungtr_d.c b/deps/lightening/tests/bungtr_d.c new file mode 100644 index 0000000..649d61f --- /dev/null +++ b/deps/lightening/tests/bungtr_d.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1)); + + jit_reloc_t r = jit_bungtr_d(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(double, double) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 1); + + ASSERT(f(0, 0.0/0.0) == 1); + ASSERT(f(0.0/0.0, 0) == 1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bungtr_f.c b/deps/lightening/tests/bungtr_f.c new file mode 100644 index 0000000..fea66dc --- /dev/null +++ b/deps/lightening/tests/bungtr_f.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1)); + + jit_reloc_t r = jit_bungtr_f(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(float, float) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 1); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 1); + + ASSERT(f(0, 0.0/0.0) == 1); + ASSERT(f(0.0/0.0, 0) == 1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bunler_d.c b/deps/lightening/tests/bunler_d.c new file mode 100644 index 0000000..e59382c --- /dev/null +++ b/deps/lightening/tests/bunler_d.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1)); + + jit_reloc_t r = jit_bunler_d(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(double, double) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 0); + + ASSERT(f(0, 0.0/0.0) == 1); + ASSERT(f(0.0/0.0, 0) == 1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bunler_f.c b/deps/lightening/tests/bunler_f.c new file mode 100644 index 0000000..fddce6b --- /dev/null +++ b/deps/lightening/tests/bunler_f.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1)); + + jit_reloc_t r = jit_bunler_f(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(float, float) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 1); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 0); + + ASSERT(f(0, 0.0/0.0) == 1); + ASSERT(f(0.0/0.0, 0) == 1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bunltr_d.c b/deps/lightening/tests/bunltr_d.c new file mode 100644 index 0000000..2ab0051 --- /dev/null +++ b/deps/lightening/tests/bunltr_d.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1)); + + jit_reloc_t r = jit_bunltr_d(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(double, double) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 0); + + ASSERT(f(0, 0.0/0.0) == 1); + ASSERT(f(0.0/0.0, 0) == 1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bunltr_f.c b/deps/lightening/tests/bunltr_f.c new file mode 100644 index 0000000..ade228b --- /dev/null +++ b/deps/lightening/tests/bunltr_f.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1)); + + jit_reloc_t r = jit_bunltr_f(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(float, float) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 1); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 1); + ASSERT(f(0, -1) == 0); + + ASSERT(f(0, 0.0/0.0) == 1); + ASSERT(f(0.0/0.0, 0) == 1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bunordr_d.c b/deps/lightening/tests/bunordr_d.c new file mode 100644 index 0000000..6b04f0e --- /dev/null +++ b/deps/lightening/tests/bunordr_d.c @@ -0,0 +1,36 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1)); + + jit_reloc_t r = jit_bunordr_d(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(double, double) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 0); + ASSERT(f(1, 1) == 0); + + ASSERT(f(0, 0.0/0.0) == 1); + ASSERT(f(0.0/0.0, 0) == 1); + ASSERT(f(0.0/0.0, 0.0/0.0) == 1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bunordr_f.c b/deps/lightening/tests/bunordr_f.c new file mode 100644 index 0000000..ce4fc7b --- /dev/null +++ b/deps/lightening/tests/bunordr_f.c @@ -0,0 +1,36 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1)); + + jit_reloc_t r = jit_bunordr_f(j, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 0); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_reti(j, 1); + + jit_word_t (*f)(float, float) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == 0); + ASSERT(f(1, 0) == 0); + ASSERT(f(-1, 0) == 0); + ASSERT(f(0, -1) == 0); + ASSERT(f(1, 1) == 0); + + ASSERT(f(0, 0.0/0.0) == 1); + ASSERT(f(0.0/0.0, 0) == 1); + ASSERT(f(0.0/0.0, 0.0/0.0) == 1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bxaddi.c b/deps/lightening/tests/bxaddi.c new file mode 100644 index 0000000..6e872da --- /dev/null +++ b/deps/lightening/tests/bxaddi.c @@ -0,0 +1,39 @@ +#include "test.h" + +static const jit_word_t overflowed = 0xcabba9e5; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_reloc_t r = jit_bxaddi(j, JIT_R0, 1); + jit_movi(j, JIT_R0, overflowed); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(-1) == 0); + ASSERT(f(0) == 1); + ASSERT(f(1) == 2); + +#if __WORDSIZE == 32 + ASSERT(f(0x7fffffff) == overflowed); + ASSERT(f(0x80000000) == 0x80000001); + ASSERT(f(0xffffffff) == 0); +#else + ASSERT(f(0x7fffffffffffffff) == overflowed); + ASSERT(f(0x8000000000000000) == 0x8000000000000001); + ASSERT(f(0xffffffffffffffff) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bxaddi_u.c b/deps/lightening/tests/bxaddi_u.c new file mode 100644 index 0000000..e71aeb7 --- /dev/null +++ b/deps/lightening/tests/bxaddi_u.c @@ -0,0 +1,39 @@ +#include "test.h" + +static const jit_word_t overflowed = 0xcabba9e5; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_reloc_t r = jit_bxaddi_u(j, JIT_R0, 1); + jit_movi(j, JIT_R0, overflowed); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(-1) == overflowed); + ASSERT(f(0) == 1); + ASSERT(f(1) == 2); + +#if __WORDSIZE == 32 + ASSERT(f(0x7fffffff) == 0x80000000); + ASSERT(f(0x80000000) == 0x80000001); + ASSERT(f(0xffffffff) == overflowed); +#else + ASSERT(f(0x7fffffffffffffff) == 0x8000000000000000); + ASSERT(f(0x8000000000000000) == 0x8000000000000001); + ASSERT(f(0xffffffffffffffff) == overflowed); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bxaddr.c b/deps/lightening/tests/bxaddr.c new file mode 100644 index 0000000..c1f6f23 --- /dev/null +++ b/deps/lightening/tests/bxaddr.c @@ -0,0 +1,49 @@ +#include "test.h" + +static const jit_word_t overflowed = 0xcabba9e5; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_bxaddr(j, JIT_R0, JIT_R1); + jit_movi(j, JIT_R0, overflowed); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(1, 1) == 2); + +#if __WORDSIZE == 32 + ASSERT(f(0xffffffff, 0xffffffff) == -2); + ASSERT(f(0x7fffffff, 0) == 0x7fffffff); + ASSERT(f(0x7fffffff, 1) == overflowed); + ASSERT(f(0x7fffffff, 0x7fffffff) == overflowed); + ASSERT(f(0x7fffffff, 0x80000000) == -1); + ASSERT(f(0x80000000, 0x80000000) == overflowed); +#else + ASSERT(f(0xffffffff, 0xffffffff) == 0xffffffffull + 0xffffffffull); + ASSERT(f(0x7fffffff, 1) == 0x80000000); + ASSERT(f(0x7fffffff, 0x7fffffff) == 0x7fffffffull + 0x7fffffffull); + ASSERT(f(0x7fffffff, 0x80000000) == 0xffffffff); + ASSERT(f(0x80000000, 0x80000000) == 0x100000000); + ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == -2); + ASSERT(f(0x7fffffffffffffff, 1) == overflowed); + ASSERT(f(0x7fffffffffffffff, 0x7fffffffffffffff) == overflowed); + ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == -1); + ASSERT(f(0x8000000000000000, 0x8000000000000000) == overflowed); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bxaddr_u.c b/deps/lightening/tests/bxaddr_u.c new file mode 100644 index 0000000..d674f82 --- /dev/null +++ b/deps/lightening/tests/bxaddr_u.c @@ -0,0 +1,49 @@ +#include "test.h" + +static const jit_word_t overflowed = 0xcabba9e5; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_bxaddr_u(j, JIT_R0, JIT_R1); + jit_movi(j, JIT_R0, overflowed); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(1, 1) == 2); + +#if __WORDSIZE == 32 + ASSERT(f(0xffffffff, 0xffffffff) == overflowed); + ASSERT(f(0x7fffffff, 0) == 0x7fffffff); + ASSERT(f(0x7fffffff, 1) == 0x80000000); + ASSERT(f(0x7fffffff, 0x7fffffff) == 0x7fffffffu + 0x7fffffffu); + ASSERT(f(0x7fffffff, 0x80000000) == 0xffffffff); + ASSERT(f(0x80000000, 0x80000000) == overflowed); +#else + ASSERT(f(0xffffffff, 0xffffffff) == 0xffffffffull + 0xffffffffull); + ASSERT(f(0x7fffffff, 1) == 0x80000000); + ASSERT(f(0x7fffffff, 0x7fffffff) == 0x7fffffffull + 0x7fffffffull); + ASSERT(f(0x7fffffff, 0x80000000) == 0xffffffff); + ASSERT(f(0x80000000, 0x80000000) == 0x100000000); + ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == overflowed); + ASSERT(f(0x7fffffffffffffff, 1) == 0x8000000000000000); + ASSERT(f(0x7fffffffffffffff, 0x7fffffffffffffff) == -2); + ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == -1); + ASSERT(f(0x8000000000000000, 0x8000000000000000) == overflowed); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bxsubi.c b/deps/lightening/tests/bxsubi.c new file mode 100644 index 0000000..1b642c7 --- /dev/null +++ b/deps/lightening/tests/bxsubi.c @@ -0,0 +1,39 @@ +#include "test.h" + +static const jit_word_t overflowed = 0xcabba9e5; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_reloc_t r = jit_bxsubi(j, JIT_R0, 1); + jit_movi(j, JIT_R0, overflowed); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(-1) == -2); + ASSERT(f(0) == -1); + ASSERT(f(1) == 0); + +#if __WORDSIZE == 32 + ASSERT(f(0x7fffffff) == 0x7ffffffe); + ASSERT(f(0x80000000) == overflowed); + ASSERT(f(0x80000001) == 0x80000000); +#else + ASSERT(f(0x7fffffffffffffff) == 0x7ffffffffffffffe); + ASSERT(f(0x8000000000000000) == overflowed); + ASSERT(f(0x8000000000000001) == 0x8000000000000000); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bxsubi_u.c b/deps/lightening/tests/bxsubi_u.c new file mode 100644 index 0000000..1345bd2 --- /dev/null +++ b/deps/lightening/tests/bxsubi_u.c @@ -0,0 +1,35 @@ +#include "test.h" + +static const jit_word_t overflowed = 0xcabba9e5; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_reloc_t r = jit_bxsubi_u(j, JIT_R0, 1); + jit_movi(j, JIT_R0, overflowed); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(-1) == -2); + ASSERT(f(0) == overflowed); + ASSERT(f(1) == 0); + +#if __WORDSIZE == 32 + ASSERT(f(0x80000000) == 0x7fffffff); +#else + ASSERT(f(0x8000000000000000) == 0x7fffffffffffffff); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bxsubr.c b/deps/lightening/tests/bxsubr.c new file mode 100644 index 0000000..d40d182 --- /dev/null +++ b/deps/lightening/tests/bxsubr.c @@ -0,0 +1,46 @@ +#include "test.h" + +static const jit_word_t overflowed = 0xcabba9e5; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_bxsubr(j, JIT_R0, JIT_R1); + jit_movi(j, JIT_R0, overflowed); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(0, 1) == -1); + ASSERT(f(1, 1) == 0); + ASSERT(f(1, -1) == 2); + +#if __WORDSIZE == 32 + ASSERT(f(0xffffffff, 0xffffffff) == 0); + ASSERT(f(0x7fffffff, 0) == 0x7fffffff); + ASSERT(f(0x7fffffff, 1) == 0x7ffffffe); + ASSERT(f(0x7fffffff, 0x7fffffff) == 0); + ASSERT(f(0x80000000, 0x7fffffff) == overflowed); + ASSERT(f(0x7fffffff, 0x80000000) == overflowed); + ASSERT(f(0x80000000, 0x80000000) == 0); +#else + ASSERT(f(0x7fffffffffffffff, 0x7fffffffffffffff) == 0); + ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == overflowed); + ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == overflowed); + ASSERT(f(0x8000000000000000, 0x8000000000000000) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/bxsubr_u.c b/deps/lightening/tests/bxsubr_u.c new file mode 100644 index 0000000..54a8d28 --- /dev/null +++ b/deps/lightening/tests/bxsubr_u.c @@ -0,0 +1,45 @@ +#include "test.h" + +static const jit_word_t overflowed = 0xcabba9e5; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_reloc_t r = jit_bxsubr_u(j, JIT_R0, JIT_R1); + jit_movi(j, JIT_R0, overflowed); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t, jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0, 0) == 0); + ASSERT(f(1, 1) == 0); + ASSERT(f(0, 1) == overflowed); + ASSERT(f(1, 0) == 1); + +#if __WORDSIZE == 32 + ASSERT(f(0xffffffff, 0xffffffff) == 0); + ASSERT(f(0x7fffffff, 0) == 0x7fffffff); + ASSERT(f(0x7fffffff, 1) == 0x7ffffffe); + ASSERT(f(0x7fffffff, 0x7fffffff) == 0); + ASSERT(f(0x7fffffff, 0x80000000) == overflowed); + ASSERT(f(0x80000000, 0x80000000) == 0); +#else + ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 0); + ASSERT(f(0x7fffffffffffffff, 0x7fffffffffffffff) == 0); + ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == overflowed); + ASSERT(f(0x8000000000000000, 0x8000000000000000) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/call_10.c b/deps/lightening/tests/call_10.c new file mode 100644 index 0000000..d99bcb8 --- /dev/null +++ b/deps/lightening/tests/call_10.c @@ -0,0 +1,54 @@ +#include "test.h" + +static int32_t f(int32_t a, int32_t b, int32_t c, int32_t d, int32_t e, + int32_t f, int32_t g, int32_t h, int32_t i, int32_t j) { + ASSERT(a == 0); + ASSERT(b == 1); + ASSERT(c == 2); + ASSERT(d == 3); + ASSERT(e == 4); + ASSERT(f == 5); + ASSERT(g == 6); + ASSERT(h == 7); + ASSERT(i == 8); + ASSERT(j == 9); + return 42; +} + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0)); + + jit_operand_t args[10] = { + jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 0 * sizeof(int32_t)), + jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 1 * sizeof(int32_t)), + jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 2 * sizeof(int32_t)), + jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 3 * sizeof(int32_t)), + jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 4 * sizeof(int32_t)), + jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 5 * sizeof(int32_t)), + jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 6 * sizeof(int32_t)), + jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 7 * sizeof(int32_t)), + jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 8 * sizeof(int32_t)), + jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 9 * sizeof(int32_t)) + }; + jit_calli(j, f, 10, args); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + size_t size = 0; + void* ret = jit_end(j, &size); + + int32_t (*f)(int32_t*) = ret; + + int32_t iargs[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + ASSERT(f(iargs) == 42); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/call_double.c b/deps/lightening/tests/call_double.c new file mode 100644 index 0000000..2aad1d0 --- /dev/null +++ b/deps/lightening/tests/call_double.c @@ -0,0 +1,38 @@ +#include "test.h" + +static double f(int32_t a, double b) { + return b + a; +} + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1)); + + jit_operand_t args[2] = { + jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 0), + jit_operand_mem(JIT_OPERAND_ABI_DOUBLE, JIT_R1, 0) + }; + jit_calli(j, f, 2, args); + jit_retval_d(j, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_d(j, JIT_F0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + double (*f)(int32_t*, double*) = ret; + + double d = 22.0f; + int32_t i = 20; + ASSERT(f(&i, &d) == 42.0f); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/call_float.c b/deps/lightening/tests/call_float.c new file mode 100644 index 0000000..e9bbd71 --- /dev/null +++ b/deps/lightening/tests/call_float.c @@ -0,0 +1,38 @@ +#include "test.h" + +static float f(int32_t a, float b) { + return b + a; +} + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1)); + + jit_operand_t args[2] = { + jit_operand_mem(JIT_OPERAND_ABI_INT32, JIT_R0, 0), + jit_operand_mem(JIT_OPERAND_ABI_FLOAT, JIT_R1, 0) + }; + jit_calli(j, f, 2, args); + jit_retval_f(j, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_f(j, JIT_F0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + float (*f)(int32_t*, float*) = ret; + + float d = 22.0f; + int32_t i = 20; + ASSERT(f(&i, &d) == 42.0f); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/callee_9.c b/deps/lightening/tests/callee_9.c new file mode 100644 index 0000000..b7f1a46 --- /dev/null +++ b/deps/lightening/tests/callee_9.c @@ -0,0 +1,68 @@ +#include "test.h" + +struct args +{ + int8_t a; + int16_t b; + int32_t c; + jit_word_t d; + uint16_t e; + float f; + double g; + float h; +}; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 3, 0, 0); + + jit_operand_t args[9] = { + jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr(JIT_OPERAND_ABI_INT8, JIT_R1), + jit_operand_gpr(JIT_OPERAND_ABI_INT16, JIT_R2), + jit_operand_gpr(JIT_OPERAND_ABI_INT32, JIT_V0), + jit_operand_gpr(JIT_OPERAND_ABI_WORD, JIT_V1), + jit_operand_gpr(JIT_OPERAND_ABI_UINT16, JIT_V2), + jit_operand_fpr(JIT_OPERAND_ABI_FLOAT, JIT_F0), + jit_operand_fpr(JIT_OPERAND_ABI_DOUBLE, JIT_F1), + jit_operand_fpr(JIT_OPERAND_ABI_FLOAT, JIT_F2), + }; + jit_load_args(j, 9, args); + jit_stxi_c(j, offsetof(struct args, a), JIT_R0, JIT_R1); // a + jit_stxi_s(j, offsetof(struct args, b), JIT_R0, JIT_R2); // b + jit_stxi_i(j, offsetof(struct args, c), JIT_R0, JIT_V0); // c + jit_stxi(j, offsetof(struct args, d), JIT_R0, JIT_V1); // d + jit_stxi_s(j, offsetof(struct args, e), JIT_R0, JIT_V2); // e + jit_stxi_f(j, offsetof(struct args, f), JIT_R0, JIT_F0); // f + jit_stxi_d(j, offsetof(struct args, g), JIT_R0, JIT_F1); // g + jit_stxi_f(j, offsetof(struct args, h), JIT_R0, JIT_F2); // h + + jit_leave_jit_abi(j, 3, 0, align); + jit_retr(j, JIT_R0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + struct args* (*f)(struct args*, int8_t, int16_t, int32_t, jit_word_t, + uint16_t, float, double, float) = ret; + + struct args in = { 0, 1, 2, 3, 4, 5, 6, 7 }; + struct args out; + ASSERT(f(&out, in.a, in.b, in.c, in.d, in.e, in.f, in.g, in.h) == &out); + ASSERT(in.a == out.a); + ASSERT(in.b == out.b); + ASSERT(in.c == out.c); + ASSERT(in.d == out.d); + ASSERT(in.e == out.e); + ASSERT(in.f == out.f); + ASSERT(in.g == out.g); + ASSERT(in.h == out.h); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/cas_atomic.c b/deps/lightening/tests/cas_atomic.c new file mode 100644 index 0000000..11c9a22 --- /dev/null +++ b/deps/lightening/tests/cas_atomic.c @@ -0,0 +1,33 @@ +#include "test.h" + +static long data[] = { 0x12121212, 0x00000000, 0x34343434 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_3(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_INT32, JIT_R1), + jit_operand_gpr (JIT_OPERAND_ABI_INT32, JIT_R2)); + + jit_cas_atomic(j, JIT_R0, JIT_R0, JIT_R1, JIT_R2); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(void*, int32_t, int32_t) = jit_end(j, NULL); + + ASSERT(data[0] == 0x12121212); + ASSERT(data[1] == 0x00); + ASSERT(data[2] == 0x34343434); + f(&data[1], 0, 0x0f0f0f0f); + ASSERT(data[0] == 0x12121212); + ASSERT(data[1] == 0x0f0f0f0f); + ASSERT(data[2] == 0x34343434); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/comr.c b/deps/lightening/tests/comr.c new file mode 100644 index 0000000..c2e7d18 --- /dev/null +++ b/deps/lightening/tests/comr.c @@ -0,0 +1,41 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_comr(j, JIT_R0, JIT_R0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + +#if __WORDSIZE == 32 + ASSERT(f(0) == 0xffffffff); + ASSERT(f(1) == 0xfffffffe); + ASSERT(f(0xffffffff) == 0); + ASSERT(f(0x80000000) == 0x7fffffff); + ASSERT(f(0x7fffffff) == 0x80000000); + ASSERT(f(0x80000001) == 0x7ffffffe); +#else + ASSERT(f(0) == 0xffffffffffffffff); + ASSERT(f(1) == 0xfffffffffffffffe); + ASSERT(f(0xffffffff) == 0xffffffff00000000); + ASSERT(f(0x80000000) == 0xffffffff7fffffff); + ASSERT(f(0x7fffffff) == 0xffffffff80000000); + ASSERT(f(0x80000001) == 0xffffffff7ffffffe); + ASSERT(f(0xffffffffffffffff) == 0); + ASSERT(f(0x8000000000000000) == 0x7fffffffffffffff); + ASSERT(f(0x7fffffffffffffff) == 0x8000000000000000); + ASSERT(f(0x8000000000000001) == 0x7ffffffffffffffe); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/divr.c b/deps/lightening/tests/divr.c new file mode 100644 index 0000000..399d70d --- /dev/null +++ b/deps/lightening/tests/divr.c @@ -0,0 +1,60 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_divr(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + jit_word_t (*f)(jit_word_t, jit_word_t) = ret; + + ASSERT(f(0x7fffffff, 1) == 0x7fffffff); + ASSERT(f(1, 0x7fffffff) == 0); + ASSERT(f(0x80000000, 1) == 0x80000000); + ASSERT(f(1, 0x80000000) == 0); + ASSERT(f(0x7fffffff, 2) == 0x3fffffff); + ASSERT(f(2, 0x7fffffff) == 0); + ASSERT(f(2, 0x80000000) == 0); + ASSERT(f(0x7fffffff, 0x80000000) == 0); + ASSERT(f(0, 0x7fffffff) == 0); + ASSERT(f(0xffffffff, 0xffffffff) == 1); +#if __WORDSIZE == 32 + ASSERT(f(0x80000000, 2) == 0xc0000000); + ASSERT(f(0x80000000, 0x7fffffff) == 0xffffffff); + ASSERT(f(0x7fffffff, 0xffffffff) == 0x80000001); + ASSERT(f(0xffffffff, 0x7fffffff) == 0); +#else + ASSERT(f(0x80000000, 2) == 0x40000000); + ASSERT(f(0x80000000, 0x7fffffff) == 1); + ASSERT(f(0x7fffffff, 0xffffffff) == 0); + ASSERT(f(0xffffffff, 0x7fffffff) == 2); + ASSERT(f(0x7fffffffffffffff, 1) == 0x7fffffffffffffff); + ASSERT(f(1, 0x7fffffffffffffff) == 0); + ASSERT(f(0x8000000000000000, 1) == 0x8000000000000000); + ASSERT(f(1, 0x8000000000000000) == 0); + ASSERT(f(0x7fffffffffffffff, 2) == 0x3fffffffffffffff); + ASSERT(f(2, 0x7fffffffffffffff) == 0); + ASSERT(f(0x8000000000000000, 2) == 0xc000000000000000); + ASSERT(f(2, 0x8000000000000000) == 0); + ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == 0); + ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == 0xffffffffffffffff); + ASSERT(f(0x7fffffffffffffff, 0xffffffffffffffff) == 0x8000000000000001); + ASSERT(f(0xffffffffffffffff, 0x7fffffffffffffff) == 0); + ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 1); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/divr_d.c b/deps/lightening/tests/divr_d.c new file mode 100644 index 0000000..9d21cb5 --- /dev/null +++ b/deps/lightening/tests/divr_d.c @@ -0,0 +1,27 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1)); + + jit_divr_d(j, JIT_F0, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_d(j, JIT_F0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + double (*f)(double, double) = ret; + ASSERT(f(-0.5f, 0.5f) == -1.0f); + ASSERT(f(1.25f, 0.5f) == 2.5f); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/divr_f.c b/deps/lightening/tests/divr_f.c new file mode 100644 index 0000000..de519dc --- /dev/null +++ b/deps/lightening/tests/divr_f.c @@ -0,0 +1,27 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1)); + + jit_divr_f(j, JIT_F0, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_f(j, JIT_F0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + float (*f)(float, float) = ret; + ASSERT(f(-0.5f, 0.5f) == -1.0f); + ASSERT(f(1.25f, 0.5f) == 2.5f); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/divr_u.c b/deps/lightening/tests/divr_u.c new file mode 100644 index 0000000..b8305f7 --- /dev/null +++ b/deps/lightening/tests/divr_u.c @@ -0,0 +1,55 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_divr_u(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + jit_word_t (*f)(jit_word_t, jit_word_t) = ret; + + ASSERT(f(0x7fffffff, 1) == 0x7fffffff); + ASSERT(f(1, 0x7fffffff) == 0); + ASSERT(f(0x80000000, 1) == 0x80000000); + ASSERT(f(1, 0x80000000) == 0); + ASSERT(f(0x7fffffff, 2) == 0x3fffffff); + ASSERT(f(2, 0x7fffffff) == 0); + ASSERT(f(0x80000000, 2) == 0x40000000); + ASSERT(f(2, 0x80000000) == 0); + ASSERT(f(0x7fffffff, 0x80000000) == 0); + ASSERT(f(0x80000000, 0x7fffffff) == 1); + ASSERT(f(0, 0x7fffffff) == 0); + ASSERT(f(0x7fffffff, 0xffffffff) == 0); + ASSERT(f(0xffffffff, 0x7fffffff) == 2); + ASSERT(f(0xffffffff, 0xffffffff) == 1); +#if __WORDSIZE != 32 + ASSERT(f(0x7fffffffffffffff, 1) == 0x7fffffffffffffff); + ASSERT(f(1, 0x7fffffffffffffff) == 0); + ASSERT(f(0x8000000000000000, 1) == 0x8000000000000000); + ASSERT(f(1, 0x8000000000000000) == 0); + ASSERT(f(0x7fffffffffffffff, 2) == 0x3fffffffffffffff); + ASSERT(f(2, 0x7fffffffffffffff) == 0); + ASSERT(f(0x8000000000000000, 2) == 0x4000000000000000); + ASSERT(f(2, 0x8000000000000000) == 0); + ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == 0); + ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == 1); + ASSERT(f(0x7fffffffffffffff, 0xffffffffffffffff) == 0); + ASSERT(f(0xffffffffffffffff, 0x7fffffffffffffff) == 2); + ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 1); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/extr_c.c b/deps/lightening/tests/extr_c.c new file mode 100644 index 0000000..043068d --- /dev/null +++ b/deps/lightening/tests/extr_c.c @@ -0,0 +1,27 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_extr_c(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0); + ASSERT(f(1) == 1); + ASSERT(f(0xf) == 0xf); + ASSERT(f(0xff) == -1); + ASSERT(f(0xfff) == -1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/extr_d.c b/deps/lightening/tests/extr_d.c new file mode 100644 index 0000000..af0fe91 --- /dev/null +++ b/deps/lightening/tests/extr_d.c @@ -0,0 +1,25 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_extr_d(j, JIT_F0, JIT_R0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_d(j, JIT_F0); + + double (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0.0); + ASSERT(f(1) == 1.0); + ASSERT(f(-100) == -100.0); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/extr_d_f.c b/deps/lightening/tests/extr_d_f.c new file mode 100644 index 0000000..049eb5f --- /dev/null +++ b/deps/lightening/tests/extr_d_f.c @@ -0,0 +1,26 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0)); + + jit_extr_d_f(j, JIT_F0, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_f(j, JIT_F0); + + float (*f)(double) = jit_end(j, NULL); + + ASSERT(f(0.0) == 0.0f); + ASSERT(f(0.5) == 0.5f); + ASSERT(f(1.0 / 0.0) == 1.0f / 0.0f); + ASSERT(f(1.25) == 1.25f); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/extr_f.c b/deps/lightening/tests/extr_f.c new file mode 100644 index 0000000..b57830c --- /dev/null +++ b/deps/lightening/tests/extr_f.c @@ -0,0 +1,25 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_extr_f(j, JIT_F0, JIT_R0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_f(j, JIT_F0); + + float (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0.0f); + ASSERT(f(1) == 1.0f); + ASSERT(f(-100) == -100.0f); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/extr_f_d.c b/deps/lightening/tests/extr_f_d.c new file mode 100644 index 0000000..5fa5007 --- /dev/null +++ b/deps/lightening/tests/extr_f_d.c @@ -0,0 +1,26 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0)); + + jit_extr_f_d(j, JIT_F0, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_d(j, JIT_F0); + + double (*f)(float) = jit_end(j, NULL); + + ASSERT(f(0.0f) == 0.0); + ASSERT(f(0.5f) == 0.5); + ASSERT(f(1.0f / 0.0f) == 1.0 / 0.0); + ASSERT(f(1.25f) == 1.25); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/extr_i.c b/deps/lightening/tests/extr_i.c new file mode 100644 index 0000000..d26a576 --- /dev/null +++ b/deps/lightening/tests/extr_i.c @@ -0,0 +1,30 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ +#if __WORDSIZE > 32 + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_extr_i(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0); + ASSERT(f(1) == 1); + ASSERT(f(0xfffffff) == 0xfffffff); + ASSERT(f(0xffffffff) == -1); + ASSERT(f(0xfffffffff) == -1); + ASSERT(f(0xf00000000) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/extr_s.c b/deps/lightening/tests/extr_s.c new file mode 100644 index 0000000..5b39af3 --- /dev/null +++ b/deps/lightening/tests/extr_s.c @@ -0,0 +1,28 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_extr_s(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0); + ASSERT(f(1) == 1); + ASSERT(f(0xfff) == 0xfff); + ASSERT(f(0xffff) == -1); + ASSERT(f(0xfffff) == -1); + ASSERT(f(0xf0000) == 0); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/extr_uc.c b/deps/lightening/tests/extr_uc.c new file mode 100644 index 0000000..a42e603 --- /dev/null +++ b/deps/lightening/tests/extr_uc.c @@ -0,0 +1,27 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_extr_uc(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0); + ASSERT(f(1) == 1); + ASSERT(f(0xff) == 0xff); + ASSERT(f(0xfff) == 0xff); + ASSERT(f(0xf00) == 0); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/extr_ui.c b/deps/lightening/tests/extr_ui.c new file mode 100644 index 0000000..37964da --- /dev/null +++ b/deps/lightening/tests/extr_ui.c @@ -0,0 +1,29 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ +#if __WORDSIZE > 32 + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_extr_ui(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0); + ASSERT(f(1) == 1); + ASSERT(f(0xffffffff) == 0xffffffff); + ASSERT(f(0xfffffffff) == 0xffffffff); + ASSERT(f(0xf00000000) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/extr_us.c b/deps/lightening/tests/extr_us.c new file mode 100644 index 0000000..38a7c39 --- /dev/null +++ b/deps/lightening/tests/extr_us.c @@ -0,0 +1,27 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_extr_us(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0); + ASSERT(f(1) == 1); + ASSERT(f(0xffff) == 0xffff); + ASSERT(f(0xfffff) == 0xffff); + ASSERT(f(0xf0000) == 0); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/jmp0.c b/deps/lightening/tests/jmp0.c new file mode 100644 index 0000000..261a399 --- /dev/null +++ b/deps/lightening/tests/jmp0.c @@ -0,0 +1,24 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_reloc_t r = jit_jmp(j); + jit_patch_here(j, r); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + ASSERT(f(42) == 42); + ASSERT(f(-1) == -1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/jmp_table.c b/deps/lightening/tests/jmp_table.c new file mode 100644 index 0000000..f90ab16 --- /dev/null +++ b/deps/lightening/tests/jmp_table.c @@ -0,0 +1,61 @@ +#include "test.h" + +#define NTARGETS ((size_t) 4) + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0)); + + jit_reloc_t default_target = jit_bgei_u(j, JIT_R0, NTARGETS); + + // FIXME: need ldxr with word stride, then can eliminate lshi. + jit_lshi(j, JIT_R0, JIT_R0, sizeof(intptr_t) == 4 ? 2 : 3); + jit_reloc_t table = jit_mov_addr(j, JIT_R1); + jit_ldxr(j, JIT_R1, JIT_R1, JIT_R0); + jit_jmpr(j, JIT_R1); + + jit_begin_data (j, (NTARGETS + 1) * sizeof(intptr_t)); + jit_align(j, sizeof(intptr_t)); + jit_patch_here(j, table); + jit_reloc_t targets[NTARGETS]; + jit_reloc_t tails[NTARGETS]; + for (size_t i = 0; i < NTARGETS; i++) { + targets[i] = jit_emit_addr(j); + } + jit_end_data (j); + + for (size_t i = 0; i < NTARGETS; i++) { + jit_patch_here(j, targets[i]); + jit_movi(j, JIT_R0, i * i); + tails[i] = jit_jmp(j); + } + + jit_patch_here(j, default_target); + jit_movi(j, JIT_R0, 42); + for (int i = 0; i < NTARGETS; i++) { + jit_patch_here(j, tails[i]); + } + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + for (int i = -2; i < ((int) NTARGETS) + 2; i++) { + if (i < 0) { + ASSERT(f(i) == 42); + } else if (i < NTARGETS) { + ASSERT(f(i) == i * i); + } else { + ASSERT(f(i) == 42); + } + } +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/jmpi.c b/deps/lightening/tests/jmpi.c new file mode 100644 index 0000000..e73ace0 --- /dev/null +++ b/deps/lightening/tests/jmpi.c @@ -0,0 +1,41 @@ +#include "test.h" + +void *tail; + +static void *target; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + jit_enter_jit_abi(j, 0, 0, 0); + jit_movi(j, JIT_R0, 42); + jit_jmpi(j, target); + // Unreachable. + jit_breakpoint(j); + int (*f)(void) = jit_end(j, NULL); + ASSERT(f() == 42); +} + +// Make the tail-call target via a separate main_helper because probably the new +// arena will be allocated farther away, forcing nonlocal jumps. +static void +make_target(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + // Tail call target assumes tail caller called enter_jit_abi with compatible + // parameters. + target = jit_address(j); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + jit_end(j, NULL); + + main_helper(0, NULL, run_test); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, make_target); +} diff --git a/deps/lightening/tests/jmpi_local.c b/deps/lightening/tests/jmpi_local.c new file mode 100644 index 0000000..49e4507 --- /dev/null +++ b/deps/lightening/tests/jmpi_local.c @@ -0,0 +1,25 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + + jit_reloc_t r = jit_jmp (j); + jit_reti (j, 0); + jit_pointer_t addr = jit_address (j); + jit_reti (j, 1); + jit_patch_here (j, r); + jit_jmpi (j, addr); + jit_reti (j, 2); + + int (*f)(void) = jit_end(j, NULL); + + ASSERT(f() == 1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/jmpr.c b/deps/lightening/tests/jmpr.c new file mode 100644 index 0000000..8840897 --- /dev/null +++ b/deps/lightening/tests/jmpr.c @@ -0,0 +1,23 @@ +#include "test.h" + +static int tail(void) { return 42; } + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0)); + jit_leave_jit_abi(j, 0, 0, align); + + jit_jmpr(j, JIT_R0); + + int (*f)(void*) = jit_end(j, NULL); + ASSERT(f(tail) == 42); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldi_c.c b/deps/lightening/tests/ldi_c.c new file mode 100644 index 0000000..9d5de82 --- /dev/null +++ b/deps/lightening/tests/ldi_c.c @@ -0,0 +1,24 @@ +#include "test.h" + +static uint8_t data[] = { 0xff, 0x00, 0x42 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + + jit_ldi_c(j, JIT_R0, &data[0]); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void) = jit_end(j, NULL); + + ASSERT(f() == -1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldi_d.c b/deps/lightening/tests/ldi_d.c new file mode 100644 index 0000000..b72cdda --- /dev/null +++ b/deps/lightening/tests/ldi_d.c @@ -0,0 +1,24 @@ +#include "test.h" + +static double data = -1.5; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + + jit_ldi_d(j, JIT_F0, &data); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_d(j, JIT_F0); + + double (*f)(void) = jit_end(j, NULL); + + ASSERT(f() == data); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldi_f.c b/deps/lightening/tests/ldi_f.c new file mode 100644 index 0000000..13e5fd4 --- /dev/null +++ b/deps/lightening/tests/ldi_f.c @@ -0,0 +1,24 @@ +#include "test.h" + +static float data = -1.5f; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + + jit_ldi_f(j, JIT_F0, &data); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_f(j, JIT_F0); + + float (*f)(void) = jit_end(j, NULL); + + ASSERT(f() == data); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldi_i.c b/deps/lightening/tests/ldi_i.c new file mode 100644 index 0000000..e389788 --- /dev/null +++ b/deps/lightening/tests/ldi_i.c @@ -0,0 +1,24 @@ +#include "test.h" + +static uint32_t data = 0xffffffff; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + + jit_ldi_i(j, JIT_R0, &data); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void) = jit_end(j, NULL); + + ASSERT(f() == -1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldi_l.c b/deps/lightening/tests/ldi_l.c new file mode 100644 index 0000000..f3fa729 --- /dev/null +++ b/deps/lightening/tests/ldi_l.c @@ -0,0 +1,26 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ +#if __WORDSIZE > 32 + static uint64_t data = 0xffffffffffffffff; + + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + + jit_ldi_l(j, JIT_R0, &data); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void) = jit_end(j, NULL); + + ASSERT(f() == -1); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldi_s.c b/deps/lightening/tests/ldi_s.c new file mode 100644 index 0000000..d9d1c47 --- /dev/null +++ b/deps/lightening/tests/ldi_s.c @@ -0,0 +1,24 @@ +#include "test.h" + +static uint16_t data = 0xffff; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + + jit_ldi_s(j, JIT_R0, &data); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void) = jit_end(j, NULL); + + ASSERT(f() == -1); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldi_uc.c b/deps/lightening/tests/ldi_uc.c new file mode 100644 index 0000000..12f18bf --- /dev/null +++ b/deps/lightening/tests/ldi_uc.c @@ -0,0 +1,24 @@ +#include "test.h" + +static uint8_t data[] = { 0xff, 0x00, 0x42 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + + jit_ldi_uc(j, JIT_R0, data); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void) = jit_end(j, NULL); + + ASSERT(f() == 0xff); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldi_ui.c b/deps/lightening/tests/ldi_ui.c new file mode 100644 index 0000000..d233694 --- /dev/null +++ b/deps/lightening/tests/ldi_ui.c @@ -0,0 +1,26 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ +#if __WORDSIZE > 32 + static uint32_t data[] = { 0xffffffff, 0x00000000, 0x42424242 }; + + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + + jit_ldi_ui(j, JIT_R0, data); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void) = jit_end(j, NULL); + + ASSERT(f() == data[0]); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldi_us.c b/deps/lightening/tests/ldi_us.c new file mode 100644 index 0000000..70eb4a0 --- /dev/null +++ b/deps/lightening/tests/ldi_us.c @@ -0,0 +1,24 @@ +#include "test.h" + +static uint16_t data[] = { 0xffff, 0x0000, 0x4242 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + + jit_ldi_us(j, JIT_R0, data); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void) = jit_end(j, NULL); + + ASSERT(f() == data[0]); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldr_atomic.c b/deps/lightening/tests/ldr_atomic.c new file mode 100644 index 0000000..73a8c0f --- /dev/null +++ b/deps/lightening/tests/ldr_atomic.c @@ -0,0 +1,28 @@ +#include "test.h" + +static long data[] = { 0x0f0f0f0f, 0x00000000, 0x42424242 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1)); + + /* atm does not test for actual atomicity, just that no segfaults etc happen */ + jit_ldr_atomic(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void*) = jit_end(j, NULL); + + ASSERT(f(&data[0]) == 0x0f0f0f0f); + ASSERT(f(&data[1]) == 0); + ASSERT(f(&data[2]) == 0x42424242); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldr_c.c b/deps/lightening/tests/ldr_c.c new file mode 100644 index 0000000..07a5931 --- /dev/null +++ b/deps/lightening/tests/ldr_c.c @@ -0,0 +1,27 @@ +#include "test.h" + +static uint8_t data[] = { 0xff, 0x00, 0x42 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1)); + + jit_ldr_c(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void*) = jit_end(j, NULL); + + ASSERT(f(&data[0]) == -1); + ASSERT(f(&data[1]) == 0); + ASSERT(f(&data[2]) == 0x42); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldr_d.c b/deps/lightening/tests/ldr_d.c new file mode 100644 index 0000000..37c75f0 --- /dev/null +++ b/deps/lightening/tests/ldr_d.c @@ -0,0 +1,27 @@ +#include "test.h" + +static double data[] = { -1.0, 0.0, 0.5 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1)); + + jit_ldr_d(j, JIT_F0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_d(j, JIT_F0); + + double (*f)(void*) = jit_end(j, NULL); + + ASSERT(f(&data[0]) == data[0]); + ASSERT(f(&data[1]) == data[1]); + ASSERT(f(&data[2]) == data[2]); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldr_f.c b/deps/lightening/tests/ldr_f.c new file mode 100644 index 0000000..bb68278 --- /dev/null +++ b/deps/lightening/tests/ldr_f.c @@ -0,0 +1,27 @@ +#include "test.h" + +static float data[] = { -1.0, 0.0, 0.5 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1)); + + jit_ldr_f(j, JIT_F0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_f(j, JIT_F0); + + float (*f)(void*) = jit_end(j, NULL); + + ASSERT(f(&data[0]) == data[0]); + ASSERT(f(&data[1]) == data[1]); + ASSERT(f(&data[2]) == data[2]); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldr_i.c b/deps/lightening/tests/ldr_i.c new file mode 100644 index 0000000..3de9e5f --- /dev/null +++ b/deps/lightening/tests/ldr_i.c @@ -0,0 +1,27 @@ +#include "test.h" + +static uint32_t data[] = { 0xffffffff, 0x00000000, 0x42424242 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1)); + + jit_ldr_i(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void*) = jit_end(j, NULL); + + ASSERT(f(&data[0]) == -1); + ASSERT(f(&data[1]) == 0); + ASSERT(f(&data[2]) == 0x42424242); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldr_l.c b/deps/lightening/tests/ldr_l.c new file mode 100644 index 0000000..15f0080 --- /dev/null +++ b/deps/lightening/tests/ldr_l.c @@ -0,0 +1,29 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ +#if __WORDSIZE > 32 + static uint64_t data[] = { 0xffffffffffffffff, 0, 0x4242424212345678 }; + + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1)); + + jit_ldr_l(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void*) = jit_end(j, NULL); + + ASSERT(f(&data[0]) == -1); + ASSERT(f(&data[1]) == 0); + ASSERT(f(&data[2]) == data[2]); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldr_s.c b/deps/lightening/tests/ldr_s.c new file mode 100644 index 0000000..cf668d5 --- /dev/null +++ b/deps/lightening/tests/ldr_s.c @@ -0,0 +1,27 @@ +#include "test.h" + +static uint16_t data[] = { 0xffff, 0x0000, 0x4242 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1)); + + jit_ldr_s(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void*) = jit_end(j, NULL); + + ASSERT(f(&data[0]) == -1); + ASSERT(f(&data[1]) == 0); + ASSERT(f(&data[2]) == 0x4242); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldr_uc.c b/deps/lightening/tests/ldr_uc.c new file mode 100644 index 0000000..a48f370 --- /dev/null +++ b/deps/lightening/tests/ldr_uc.c @@ -0,0 +1,27 @@ +#include "test.h" + +static uint8_t data[] = { 0xff, 0x00, 0x42 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1)); + + jit_ldr_uc(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void*) = jit_end(j, NULL); + + ASSERT(f(&data[0]) == 0xff); + ASSERT(f(&data[1]) == 0); + ASSERT(f(&data[2]) == 0x42); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldr_ui.c b/deps/lightening/tests/ldr_ui.c new file mode 100644 index 0000000..7668778 --- /dev/null +++ b/deps/lightening/tests/ldr_ui.c @@ -0,0 +1,29 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ +#if __WORDSIZE > 32 + static uint32_t data[] = { 0xffffffff, 0x00000000, 0x42424242 }; + + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1)); + + jit_ldr_ui(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void*) = jit_end(j, NULL); + + ASSERT(f(&data[0]) == data[0]); + ASSERT(f(&data[1]) == data[1]); + ASSERT(f(&data[2]) == data[2]); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldr_us.c b/deps/lightening/tests/ldr_us.c new file mode 100644 index 0000000..bb9928b --- /dev/null +++ b/deps/lightening/tests/ldr_us.c @@ -0,0 +1,27 @@ +#include "test.h" + +static uint16_t data[] = { 0xffff, 0x0000, 0x4242 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1)); + + jit_ldr_us(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void*) = jit_end(j, NULL); + + ASSERT(f(&data[0]) == data[0]); + ASSERT(f(&data[1]) == data[1]); + ASSERT(f(&data[2]) == data[2]); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldxi_c.c b/deps/lightening/tests/ldxi_c.c new file mode 100644 index 0000000..4271f97 --- /dev/null +++ b/deps/lightening/tests/ldxi_c.c @@ -0,0 +1,27 @@ +#include "test.h" + +static uint8_t data[] = { 0xff, 0x00, 0x42 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_ldxi_c(j, JIT_R0, JIT_R0, (uintptr_t)&data); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(0) == -1); + ASSERT(f(1) == 0); + ASSERT(f(2) == 0x42); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldxi_d.c b/deps/lightening/tests/ldxi_d.c new file mode 100644 index 0000000..6bcf632 --- /dev/null +++ b/deps/lightening/tests/ldxi_d.c @@ -0,0 +1,27 @@ +#include "test.h" + +static double data[] = { -1.0, 0.0, 0.5 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_ldxi_d(j, JIT_F0, JIT_R0, (uintptr_t)data); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_d(j, JIT_F0); + + double (*f)(jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(0) == data[0]); + ASSERT(f(8) == data[1]); + ASSERT(f(16) == data[2]); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldxi_f.c b/deps/lightening/tests/ldxi_f.c new file mode 100644 index 0000000..9e65321 --- /dev/null +++ b/deps/lightening/tests/ldxi_f.c @@ -0,0 +1,27 @@ +#include "test.h" + +static float data[] = { -1.0, 0.0, 0.5 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_ldxi_f(j, JIT_F0, JIT_R0, (uintptr_t)data); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_f(j, JIT_F0); + + float (*f)(jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(0) == data[0]); + ASSERT(f(4) == data[1]); + ASSERT(f(8) == data[2]); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldxi_i.c b/deps/lightening/tests/ldxi_i.c new file mode 100644 index 0000000..d1f7b56 --- /dev/null +++ b/deps/lightening/tests/ldxi_i.c @@ -0,0 +1,27 @@ +#include "test.h" + +static uint32_t data[] = { 0xffffffff, 0x00000000, 0x42424242 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0)); + + jit_ldxi_i(j, JIT_R0, JIT_R0, (uintptr_t)data); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(0) == -1); + ASSERT(f(4) == 0); + ASSERT(f(8) == 0x42424242); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldxi_l.c b/deps/lightening/tests/ldxi_l.c new file mode 100644 index 0000000..bb1a8b2 --- /dev/null +++ b/deps/lightening/tests/ldxi_l.c @@ -0,0 +1,29 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ +#if __WORDSIZE > 32 + static uint64_t data[] = { 0xffffffffffffffff, 0, 0x4242424212345678 }; + + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_ldxi_l(j, JIT_R0, JIT_R0, (uintptr_t)data); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(0) == -1); + ASSERT(f(8) == 0); + ASSERT(f(16) == data[2]); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldxi_s.c b/deps/lightening/tests/ldxi_s.c new file mode 100644 index 0000000..c9376d0 --- /dev/null +++ b/deps/lightening/tests/ldxi_s.c @@ -0,0 +1,27 @@ +#include "test.h" + +static uint16_t data[] = { 0xffff, 0x0000, 0x4242 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_ldxi_s(j, JIT_R0, JIT_R0, (uintptr_t)data); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(0) == -1); + ASSERT(f(2) == 0); + ASSERT(f(4) == 0x4242); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldxi_uc.c b/deps/lightening/tests/ldxi_uc.c new file mode 100644 index 0000000..31d7b73 --- /dev/null +++ b/deps/lightening/tests/ldxi_uc.c @@ -0,0 +1,27 @@ +#include "test.h" + +static uint8_t data[] = { 0xff, 0x00, 0x42 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_ldxi_uc(j, JIT_R0, JIT_R0, (uintptr_t)data); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0xff); + ASSERT(f(1) == 0); + ASSERT(f(2) == 0x42); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldxi_ui.c b/deps/lightening/tests/ldxi_ui.c new file mode 100644 index 0000000..4f7e304 --- /dev/null +++ b/deps/lightening/tests/ldxi_ui.c @@ -0,0 +1,29 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ +#if __WORDSIZE > 32 + static uint32_t data[] = { 0xffffffff, 0x00000000, 0x42424242 }; + + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_ldxi_ui(j, JIT_R0, JIT_R0, (uintptr_t)data); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(0) == data[0]); + ASSERT(f(4) == data[1]); + ASSERT(f(8) == data[2]); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldxi_us.c b/deps/lightening/tests/ldxi_us.c new file mode 100644 index 0000000..81c984f --- /dev/null +++ b/deps/lightening/tests/ldxi_us.c @@ -0,0 +1,27 @@ +#include "test.h" + +static uint16_t data[] = { 0xffff, 0x0000, 0x4242 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_ldxi_us(j, JIT_R0, JIT_R0, (uintptr_t)data); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(0) == data[0]); + ASSERT(f(2) == data[1]); + ASSERT(f(4) == data[2]); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldxr_c.c b/deps/lightening/tests/ldxr_c.c new file mode 100644 index 0000000..366f5b2 --- /dev/null +++ b/deps/lightening/tests/ldxr_c.c @@ -0,0 +1,28 @@ +#include "test.h" + +static uint8_t data[] = { 0xff, 0x00, 0x42 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_ldxr_c(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void*, jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(data, 0) == -1); + ASSERT(f(data, 1) == 0); + ASSERT(f(data, 2) == 0x42); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldxr_d.c b/deps/lightening/tests/ldxr_d.c new file mode 100644 index 0000000..38a12fd --- /dev/null +++ b/deps/lightening/tests/ldxr_d.c @@ -0,0 +1,28 @@ +#include "test.h" + +static double data[] = { -1.0, 0.0, 0.5 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_ldxr_d(j, JIT_F0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_d(j, JIT_F0); + + double (*f)(void*, jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(data, 0) == data[0]); + ASSERT(f(data, 8) == data[1]); + ASSERT(f(data, 16) == data[2]); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldxr_f.c b/deps/lightening/tests/ldxr_f.c new file mode 100644 index 0000000..c48b11f --- /dev/null +++ b/deps/lightening/tests/ldxr_f.c @@ -0,0 +1,28 @@ +#include "test.h" + +static float data[] = { -1.0, 0.0, 0.5 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_ldxr_f(j, JIT_F0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_f(j, JIT_F0); + + float (*f)(void*, jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(data, 0) == data[0]); + ASSERT(f(data, 4) == data[1]); + ASSERT(f(data, 8) == data[2]); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldxr_i.c b/deps/lightening/tests/ldxr_i.c new file mode 100644 index 0000000..e4149aa --- /dev/null +++ b/deps/lightening/tests/ldxr_i.c @@ -0,0 +1,28 @@ +#include "test.h" + +static uint32_t data[] = { 0xffffffff, 0x00000000, 0x42424242 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_ldxr_i(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void*, jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(data, 0) == -1); + ASSERT(f(data, 4) == 0); + ASSERT(f(data, 8) == 0x42424242); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldxr_l.c b/deps/lightening/tests/ldxr_l.c new file mode 100644 index 0000000..ee9f156 --- /dev/null +++ b/deps/lightening/tests/ldxr_l.c @@ -0,0 +1,30 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ +#if __WORDSIZE > 32 + static uint64_t data[] = { 0xffffffffffffffff, 0, 0x4242424212345678 }; + + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_ldxr_l(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void*, jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(data, 0) == -1); + ASSERT(f(data, 8) == 0); + ASSERT(f(data, 16) == data[2]); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldxr_s.c b/deps/lightening/tests/ldxr_s.c new file mode 100644 index 0000000..fbb5c09 --- /dev/null +++ b/deps/lightening/tests/ldxr_s.c @@ -0,0 +1,28 @@ +#include "test.h" + +static uint16_t data[] = { 0xffff, 0x0000, 0x4242 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_ldxr_s(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void*, jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(data, 0) == -1); + ASSERT(f(data, 2) == 0); + ASSERT(f(data, 4) == 0x4242); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldxr_uc.c b/deps/lightening/tests/ldxr_uc.c new file mode 100644 index 0000000..846c552 --- /dev/null +++ b/deps/lightening/tests/ldxr_uc.c @@ -0,0 +1,28 @@ +#include "test.h" + +static uint8_t data[] = { 0xff, 0x00, 0x42 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_ldxr_uc(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void*, jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(data, 0) == 0xff); + ASSERT(f(data, 1) == 0); + ASSERT(f(data, 2) == 0x42); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldxr_ui.c b/deps/lightening/tests/ldxr_ui.c new file mode 100644 index 0000000..cd774d3 --- /dev/null +++ b/deps/lightening/tests/ldxr_ui.c @@ -0,0 +1,30 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ +#if __WORDSIZE > 32 + static uint32_t data[] = { 0xffffffff, 0x00000000, 0x42424242 }; + + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_ldxr_ui(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void*, jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(data, 0) == data[0]); + ASSERT(f(data, 4) == data[1]); + ASSERT(f(data, 8) == data[2]); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ldxr_us.c b/deps/lightening/tests/ldxr_us.c new file mode 100644 index 0000000..b7e408b --- /dev/null +++ b/deps/lightening/tests/ldxr_us.c @@ -0,0 +1,28 @@ +#include "test.h" + +static uint16_t data[] = { 0xffff, 0x0000, 0x4242 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_ldxr_us(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void*, jit_uword_t) = jit_end(j, NULL); + + ASSERT(f(data, 0) == data[0]); + ASSERT(f(data, 2) == data[1]); + ASSERT(f(data, 4) == data[2]); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/link-register.c b/deps/lightening/tests/link-register.c new file mode 100644 index 0000000..96ee959 --- /dev/null +++ b/deps/lightening/tests/link-register.c @@ -0,0 +1,35 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0)); + + jit_reloc_t call_tramp = jit_jmp (j); + + void *tramp = jit_address (j); + jit_pop_link_register (j); + jit_movr (j, JIT_R0, JIT_LR); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr (j, JIT_R0); + + jit_patch_here (j, call_tramp); + jit_jmpi_with_link (j, tramp); + + void *expected_link = jit_address_to_function_pointer (jit_address (j)); + + size_t size = 0; + void* ret = jit_end(j, &size); + + void* (*f)(void) = ret; + + ASSERT(f() == expected_link); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/lshi.c b/deps/lightening/tests/lshi.c new file mode 100644 index 0000000..e721af5 --- /dev/null +++ b/deps/lightening/tests/lshi.c @@ -0,0 +1,27 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_lshi(j, JIT_R0, JIT_R0, 31); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + +#if __WORDSIZE == 32 + ASSERT(f(-0x7f) == 0x80000000); +#else + ASSERT(f(-0x7f) == 0xffffffc080000000); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/lshr.c b/deps/lightening/tests/lshr.c new file mode 100644 index 0000000..f81aa69 --- /dev/null +++ b/deps/lightening/tests/lshr.c @@ -0,0 +1,69 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_lshr(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + jit_word_t (*f)(jit_word_t, jit_word_t) = ret; + + ASSERT(f(0x7f, 1) == 0xfe); + ASSERT(f(0x7fff, 2) == 0x1fffc); + ASSERT(f(0x81, 16) == 0x810000); + ASSERT(f(0xff, 15) == 0x7f8000); + ASSERT(f(0x7fffffff, 0) == 0x7fffffff); +#if __WORDSIZE == 32 + ASSERT(f(0xffffffff, 8) == 0xffffff00); + ASSERT(f(0x7fffffff, 3) == 0xfffffff8); + ASSERT(f(-0x7f, 31) == 0x80000000); + ASSERT(f(-0x7fff, 30) == 0x40000000); + ASSERT(f(-0x7fffffff, 29) == 0x20000000); + ASSERT(f(0x80000001, 28) == 0x10000000); + ASSERT(f(0x8001, 17) == 0x20000); + ASSERT(f(0x80000001, 18) == 0x40000); + ASSERT(f(-0xffff, 24) == 0x1000000); +#else + ASSERT(f(0xffffffff, 8) == 0xffffffff00); + ASSERT(f(0x7fffffff, 3) == 0x3fffffff8); + ASSERT(f(-0x7f, 31) == 0xffffffc080000000); + ASSERT(f(-0x7fff, 30) == 0xffffe00040000000); + ASSERT(f(-0x7fffffff, 29) == 0xf000000020000000); + ASSERT(f(0x80000001, 28) == 0x800000010000000); + ASSERT(f(0x8001, 17) == 0x100020000); + ASSERT(f(0x80000001, 18) == 0x2000000040000); + ASSERT(f(-0xffff, 24) == 0xffffff0001000000); + ASSERT(f(0x7f, 33) == 0xfe00000000); + ASSERT(f(0x7ffff, 34) == 0x1ffffc00000000); + ASSERT(f(0x7fffffff, 35) == 0xfffffff800000000); + ASSERT(f(-0x7f, 63) == 0x8000000000000000); + ASSERT(f(-0x7fff, 62) == 0x4000000000000000); + ASSERT(f(-0x7fffffff, 61) == 0x2000000000000000); + ASSERT(f(0x80000001, 60) == 0x1000000000000000); + ASSERT(f(0x81, 48) == 0x81000000000000); + ASSERT(f(0x8001, 49) == 0x2000000000000); + ASSERT(f(0x80000001, 40) == 0x10000000000); + ASSERT(f(0xff, 47) == 0x7f800000000000); + ASSERT(f(0xffff0001, 56) == 0x100000000000000); + ASSERT(f(0xffffffff, 40) == 0xffffff0000000000); + ASSERT(f(0x7fffffffff, 33) == 0xfffffffe00000000); + ASSERT(f(-0x7fffffffff, 63) == 0x8000000000000000); + ASSERT(f(0x8000000001, 48) == 0x1000000000000); + ASSERT(f(0xffffffffff, 47) == 0xffff800000000000); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/mov_addr.c b/deps/lightening/tests/mov_addr.c new file mode 100644 index 0000000..b4a9aaa --- /dev/null +++ b/deps/lightening/tests/mov_addr.c @@ -0,0 +1,25 @@ +#include "test.h" + +static uint64_t thing = 0x123456789abcdef0; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + + jit_patch_there(j, jit_mov_addr(j, JIT_R0), &thing); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + void* (*f)(void) = jit_end(j, NULL); + + ASSERT(f() == &thing); + ASSERT(*(uint64_t*)f() == thing); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/movi.c b/deps/lightening/tests/movi.c new file mode 100644 index 0000000..fcdd656 --- /dev/null +++ b/deps/lightening/tests/movi.c @@ -0,0 +1,22 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + + jit_movi(j, JIT_R0, 0xa500a500); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_uword_t (*f)(void) = jit_end(j, NULL); + + ASSERT(f() == 0xa500a500); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/movi_d.c b/deps/lightening/tests/movi_d.c new file mode 100644 index 0000000..cb9e63d --- /dev/null +++ b/deps/lightening/tests/movi_d.c @@ -0,0 +1,22 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + + jit_movi_d(j, JIT_F0, 3.14159); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_d(j, JIT_F0); + + double (*f)(void) = jit_end(j, NULL); + + ASSERT(f() == 3.14159); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/movi_f.c b/deps/lightening/tests/movi_f.c new file mode 100644 index 0000000..944f615 --- /dev/null +++ b/deps/lightening/tests/movi_f.c @@ -0,0 +1,22 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + + jit_movi_f(j, JIT_F0, 3.14159f); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_f(j, JIT_F0); + + float (*f)(void) = jit_end(j, NULL); + + ASSERT(f() == 3.14159f); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/mulr.c b/deps/lightening/tests/mulr.c new file mode 100644 index 0000000..452e35d --- /dev/null +++ b/deps/lightening/tests/mulr.c @@ -0,0 +1,64 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_mulr(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + jit_word_t (*f)(jit_word_t, jit_word_t) = ret; + + ASSERT(f(0x7fffffff, 1) == 0x7fffffff); + ASSERT(f(1, 0x7fffffff) == 0x7fffffff); + ASSERT(f(0x80000000, 1) == 0x80000000); + ASSERT(f(1, 0x80000000) == 0x80000000); + ASSERT(f(0x7fffffff, 2) == 0xfffffffe); + ASSERT(f(2, 0x7fffffff) == 0xfffffffe); + ASSERT(f(0x7fffffff, 0) == 0); + ASSERT(f(0, 0x7fffffff) == 0); +#if __WORDSIZE == 32 + ASSERT(f(0x80000000, 2) == 0); + ASSERT(f(2, 0x80000000) == 0); + ASSERT(f(0x7fffffff, 0x80000000) == 0x80000000); + ASSERT(f(0x80000000, 0x7fffffff) == 0x80000000); + ASSERT(f(0x7fffffff, 0xffffffff) == 0x80000001); + ASSERT(f(0xffffffff, 0x7fffffff) == 0x80000001); + ASSERT(f(0xffffffff, 0xffffffff) == 1); +#else + ASSERT(f(0x80000000, 2) == 0x100000000); + ASSERT(f(2, 0x80000000) == 0x100000000); + ASSERT(f(0x7fffffff, 0x80000000) == 0x3fffffff80000000); + ASSERT(f(0x80000000, 0x7fffffff) == 0x3fffffff80000000); + ASSERT(f(0x7fffffff, 0xffffffff) == 0x7ffffffe80000001); + ASSERT(f(0xffffffff, 0x7fffffff) == 0x7ffffffe80000001); + ASSERT(f(0xffffffff, 0xffffffff) == 0xfffffffe00000001); + ASSERT(f(0x7fffffffffffffff, 1) == 0x7fffffffffffffff); + ASSERT(f(1, 0x7fffffffffffffff) == 0x7fffffffffffffff); + ASSERT(f(0x8000000000000000, 1) == 0x8000000000000000); + ASSERT(f(1, 0x8000000000000000) == 0x8000000000000000); + ASSERT(f(0x7fffffffffffffff, 2) == 0xfffffffffffffffe); + ASSERT(f(2, 0x7fffffffffffffff) == 0xfffffffffffffffe); + ASSERT(f(0x8000000000000000, 2) == 0); + ASSERT(f(2, 0x8000000000000000) == 0); + ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == 0x8000000000000000); + ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == 0x8000000000000000); + ASSERT(f(0x7fffffffffffffff, 0xffffffffffffffff) == 0x8000000000000001); + ASSERT(f(0xffffffffffffffff, 0x7fffffffffffffff) == 0x8000000000000001); + ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 1); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/mulr_d.c b/deps/lightening/tests/mulr_d.c new file mode 100644 index 0000000..945f152 --- /dev/null +++ b/deps/lightening/tests/mulr_d.c @@ -0,0 +1,27 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1)); + + jit_mulr_d(j, JIT_F0, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_d(j, JIT_F0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + double (*f)(double, double) = ret; + ASSERT(f(-0.5, 0.5) == -0.25); + ASSERT(f(0.25, 0.75) == 0.1875); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/mulr_f.c b/deps/lightening/tests/mulr_f.c new file mode 100644 index 0000000..2d0dd4f --- /dev/null +++ b/deps/lightening/tests/mulr_f.c @@ -0,0 +1,27 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1)); + + jit_mulr_f(j, JIT_F0, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_f(j, JIT_F0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + float (*f)(float, float) = ret; + ASSERT(f(-0.5f, 0.5f) == -0.25f); + ASSERT(f(0.25f, 0.75f) == 0.1875f); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/negr.c b/deps/lightening/tests/negr.c new file mode 100644 index 0000000..18e27cb --- /dev/null +++ b/deps/lightening/tests/negr.c @@ -0,0 +1,39 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_negr(j, JIT_R0, JIT_R0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + + ASSERT(f(0) == 0); +#if __WORDSIZE == 32 + ASSERT(f(1) == 0xffffffff); + ASSERT(f(0xffffffff) == 1); + ASSERT(f(0x80000000) == 0x80000000); + ASSERT(f(0x7fffffff) == 0x80000001); + ASSERT(f(0x80000001) == 0x7fffffff); +#else + ASSERT(f(1) == 0xffffffffffffffff); + ASSERT(f(0xffffffff) == 0xffffffff00000001); + ASSERT(f(0x80000000) == 0xffffffff80000000); + ASSERT(f(0x7fffffff) == 0xffffffff80000001); + ASSERT(f(0x80000001) == 0xffffffff7fffffff); + ASSERT(f(0xffffffffffffffff) == 1); + ASSERT(f(0x8000000000000000) == 0x8000000000000000); + ASSERT(f(0x7fffffffffffffff) == 0x8000000000000001); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/negr_d.c b/deps/lightening/tests/negr_d.c new file mode 100644 index 0000000..d0e168b --- /dev/null +++ b/deps/lightening/tests/negr_d.c @@ -0,0 +1,26 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0)); + + jit_negr_d(j, JIT_F0, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_d(j, JIT_F0); + + double (*f)(double) = jit_end(j, NULL); + + ASSERT(f(0.0) == -0.0); + ASSERT(f(0.5) == -0.5); + ASSERT(f(1.0 / 0.0) == -1.0 / 0.0); + ASSERT(f(-1.25) == 1.25); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/negr_f.c b/deps/lightening/tests/negr_f.c new file mode 100644 index 0000000..26110d5 --- /dev/null +++ b/deps/lightening/tests/negr_f.c @@ -0,0 +1,26 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0)); + + jit_negr_f(j, JIT_F0, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_f(j, JIT_F0); + + float (*f)(float) = jit_end(j, NULL); + + ASSERT(f(0.0f) == -0.0f); + ASSERT(f(0.5f) == -0.5f); + ASSERT(f(1.0f / 0.0f) == -1.0f / 0.0f); + ASSERT(f(-1.25f) == 1.25f); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/ori.c b/deps/lightening/tests/ori.c new file mode 100644 index 0000000..6310185 --- /dev/null +++ b/deps/lightening/tests/ori.c @@ -0,0 +1,31 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_ori(j, JIT_R0, JIT_R0, 1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + jit_word_t (*f)(jit_word_t) = ret; + + ASSERT(f(0x7fffffff) == 0x7fffffff); + ASSERT(f(0x80000000) == 0x80000001); +#if __WORDSIZE == 64 + ASSERT(f(0x7fffffffffffffff) == 0x7fffffffffffffff); + ASSERT(f(0x8000000000000000) == 0x8000000000000001); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/orr.c b/deps/lightening/tests/orr.c new file mode 100644 index 0000000..5a9087a --- /dev/null +++ b/deps/lightening/tests/orr.c @@ -0,0 +1,48 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_orr(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + jit_word_t (*f)(jit_word_t, jit_word_t) = ret; + + ASSERT(f(0x7fffffff, 1) == 0x7fffffff); + ASSERT(f(1, 0x7fffffff) == 0x7fffffff); + ASSERT(f(0x80000000, 1) == 0x80000001); + ASSERT(f(1, 0x80000000) == 0x80000001); + ASSERT(f(0x7fffffff, 0x80000000) == 0xffffffff); + ASSERT(f(0x80000000, 0x7fffffff) == 0xffffffff); + ASSERT(f(0x7fffffff, 0xffffffff) == 0xffffffff); + ASSERT(f(0xffffffff, 0x7fffffff) == 0xffffffff); + ASSERT(f(0xffffffff, 0xffffffff) == 0xffffffff); + ASSERT(f(0x7fffffff, 0) == 0x7fffffff); + ASSERT(f(0, 0x7fffffff) == 0x7fffffff); +#if __WORDSIZE == 64 + ASSERT(f(0x7fffffffffffffff, 1) == 0x7fffffffffffffff); + ASSERT(f(1, 0x7fffffffffffffff) == 0x7fffffffffffffff); + ASSERT(f(0x8000000000000000, 1) == 0x8000000000000001); + ASSERT(f(1, 0x8000000000000000) == 0x8000000000000001); + ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == 0xffffffffffffffff); + ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == 0xffffffffffffffff); + ASSERT(f(0x7fffffffffffffff, 0xffffffffffffffff) == 0xffffffffffffffff); + ASSERT(f(0xffffffffffffffff, 0x7fffffffffffffff) == 0xffffffffffffffff); + ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 0xffffffffffffffff); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/qdivr.c b/deps/lightening/tests/qdivr.c new file mode 100644 index 0000000..665053c --- /dev/null +++ b/deps/lightening/tests/qdivr.c @@ -0,0 +1,44 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 3, 0, 0); + + jit_operand_t args[] = + { jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_V0) }; + jit_load_args(j, 4, args); + + jit_qdivr(j, JIT_V1, JIT_V2, JIT_R2, JIT_V0); + jit_str(j, JIT_R0, JIT_V1); + jit_str(j, JIT_R1, JIT_V2); + + jit_leave_jit_abi(j, 3, 0, align); + + jit_ret(j); + + size_t size = 0; + void* ret = jit_end(j, &size); + + void (*f)(jit_word_t*, jit_word_t*, jit_word_t, jit_word_t) = ret; + +#define QDIV(a, b, c, d) \ + do { \ + jit_word_t C = 0, D = 0; f(&C, &D, a, b); ASSERT(C == c); ASSERT(D == d); \ + } while (0) + + QDIV(10, 3, 3, 1); + QDIV(-33, 9, -3, -6); + QDIV(-41, -7, 5, -6); + QDIV(65536, 4096, 16, 0); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/qdivr_u.c b/deps/lightening/tests/qdivr_u.c new file mode 100644 index 0000000..e260193 --- /dev/null +++ b/deps/lightening/tests/qdivr_u.c @@ -0,0 +1,42 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + + size_t align = jit_enter_jit_abi(j, 3, 0, 0); + + jit_operand_t args[] = + { jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_V0) }; + jit_load_args(j, 4, args); + + jit_qdivr_u(j, JIT_V1, JIT_V2, JIT_R2, JIT_V0); + jit_str(j, JIT_R0, JIT_V1); + jit_str(j, JIT_R1, JIT_V2); + + jit_leave_jit_abi(j, 3, 0, align); + + jit_ret(j); + + size_t size = 0; + void* ret = jit_end(j, &size); + + void (*f)(jit_word_t*, jit_word_t*, jit_word_t, jit_word_t) = ret; +#define QDIV(a, b, c, d) \ + do { \ + jit_word_t C = 0, D = 0; f(&C, &D, a, b); ASSERT(C == c); ASSERT(D == d); \ + } while (0) + + QDIV(-1, -2, 1, 1); + QDIV(-2, -5, 1, 3); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/qmulr.c b/deps/lightening/tests/qmulr.c new file mode 100644 index 0000000..1645f5a --- /dev/null +++ b/deps/lightening/tests/qmulr.c @@ -0,0 +1,58 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + + size_t align = jit_enter_jit_abi(j, 3, 0, 0); + + jit_operand_t args[] = + { jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_V0) }; + jit_load_args(j, 4, args); + + jit_qmulr(j, JIT_V1, JIT_V2, JIT_R2, JIT_V0); + jit_str(j, JIT_R0, JIT_V1); + jit_str(j, JIT_R1, JIT_V2); + + jit_leave_jit_abi(j, 3, 0, align); + + jit_ret(j); + + size_t size = 0; + void* ret = jit_end(j, &size); + + void (*f)(jit_word_t*, jit_word_t*, jit_word_t, jit_word_t) = ret; + +#define QMUL(a, b, c, d) \ + do { \ + jit_word_t C = 0, D = 0; f(&C, &D, a, b); ASSERT(C == c); ASSERT(D == d); \ + } while (0) + + QMUL(-2, -1, 2, 0); + QMUL(0, -1, 0, 0); + QMUL(-1, 0, 0, 0); + QMUL(1, -1, -1, -1); +#if __WORDSIZE == 32 + QMUL(0x7ffff, 0x7ffff, 0xfff00001, 0x3f); + QMUL(0x80000000, -2, 0, 1); + QMUL(0x80000000, 2, 0, -1); + QMUL(0x80000001, 3, 0x80000003, -2); + QMUL(0x80000001, -3, 0x7ffffffd, 1); +#else + QMUL(0x7ffffffff, 0x7ffffffff, 0xfffffff000000001, 0x3f); + QMUL(0x8000000000000000, -2, 0, 1); + QMUL(0x8000000000000000, 2, 0, -1); + QMUL(0x8000000000000001, 3, 0x8000000000000003, -2); + QMUL(0x8000000000000001, -3, 0x7ffffffffffffffd, 1); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/qmulr_u.c b/deps/lightening/tests/qmulr_u.c new file mode 100644 index 0000000..bb1d50d --- /dev/null +++ b/deps/lightening/tests/qmulr_u.c @@ -0,0 +1,46 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + + size_t align = jit_enter_jit_abi(j, 3, 0, 0); + + jit_operand_t args[] = + { jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R1), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_V0) }; + jit_load_args(j, 4, args); + + jit_qmulr_u(j, JIT_V1, JIT_V2, JIT_R2, JIT_V0); + jit_str(j, JIT_R0, JIT_V1); + jit_str(j, JIT_R1, JIT_V2); + + jit_leave_jit_abi(j, 3, 0, align); + + jit_ret(j); + + size_t size = 0; + void* ret = jit_end(j, &size); + + void (*f)(jit_word_t*, jit_word_t*, jit_word_t, jit_word_t) = ret; + +#define UQMUL(a, b, c, d) \ + do { \ + jit_word_t C = 0, D = 0; f(&C, &D, a, b); ASSERT(C == c); ASSERT(D == d); \ + } while (0) + +#if __WORDSIZE == 32 + UQMUL(0xffffff, 0xffffff, 0xfe000001, 0xffff); +#else + UQMUL(0xffffffffff, 0xffffffffff, 0xfffffe0000000001, 0xffff); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/remr.c b/deps/lightening/tests/remr.c new file mode 100644 index 0000000..805d6fb --- /dev/null +++ b/deps/lightening/tests/remr.c @@ -0,0 +1,60 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_remr(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + jit_word_t (*f)(jit_word_t, jit_word_t) = ret; + + ASSERT(f(0x7fffffff, 1) == 0); + ASSERT(f(1, 0x7fffffff) == 1); + ASSERT(f(0x80000000, 1) == 0); + ASSERT(f(1, 0x80000000) == 1); + ASSERT(f(0x7fffffff, 2) == 1); + ASSERT(f(2, 0x7fffffff) == 2); + ASSERT(f(0x80000000, 2) == 0); + ASSERT(f(2, 0x80000000) == 2); + ASSERT(f(0x7fffffff, 0x80000000) == 0x7fffffff); + ASSERT(f(0, 0x7fffffff) == 0); + ASSERT(f(0xffffffff, 0xffffffff) == 0); + +#if __WORDSIZE == 32 + ASSERT(f(0x80000000, 0x7fffffff) == 0xffffffff); + ASSERT(f(0x7fffffff, 0xffffffff) == 0); + ASSERT(f(0xffffffff, 0x7fffffff) == 0xffffffff); +#else + ASSERT(f(0x80000000, 0x7fffffff) == 1); + ASSERT(f(0x7fffffff, 0xffffffff) == 0x7fffffff); + ASSERT(f(0xffffffff, 0x7fffffff) == 1); + ASSERT(f(0x7fffffffffffffff, 1) == 0); + ASSERT(f(1, 0x7fffffffffffffff) == 1); + ASSERT(f(0x8000000000000000, 1) == 0); + ASSERT(f(1, 0x8000000000000000) == 1); + ASSERT(f(0x7fffffffffffffff, 2) == 1); + ASSERT(f(2, 0x7fffffffffffffff) == 2); + ASSERT(f(0x8000000000000000, 2) == 0); + ASSERT(f(2, 0x8000000000000000) == 2); + ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == 0x7fffffffffffffff); + ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == 0xffffffffffffffff); + ASSERT(f(0x7fffffffffffffff, 0xffffffffffffffff) == 0); + ASSERT(f(0xffffffffffffffff, 0x7fffffffffffffff) == 0xffffffffffffffff); + ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/remr_u.c b/deps/lightening/tests/remr_u.c new file mode 100644 index 0000000..a9a0178 --- /dev/null +++ b/deps/lightening/tests/remr_u.c @@ -0,0 +1,56 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_remr_u(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + jit_word_t (*f)(jit_word_t, jit_word_t) = ret; + + ASSERT(f(0x7fffffff, 1) == 0); + ASSERT(f(1, 0x7fffffff) == 1); + ASSERT(f(0x80000000, 1) == 0); + ASSERT(f(1, 0x80000000) == 1); + ASSERT(f(0x7fffffff, 2) == 1); + ASSERT(f(2, 0x7fffffff) == 2); + ASSERT(f(0x80000000, 2) == 0); + ASSERT(f(2, 0x80000000) == 2); + ASSERT(f(0x7fffffff, 0x80000000) == 0x7fffffff); + ASSERT(f(0x80000000, 0x7fffffff) == 1); + ASSERT(f(0, 0x7fffffff) == 0); + ASSERT(f(0x7fffffff, 0xffffffff) == 0x7fffffff); + ASSERT(f(0xffffffff, 0x7fffffff) == 1); + ASSERT(f(0xffffffff, 0xffffffff) == 0); + +#if __WORDSIZE != 32 + ASSERT(f(0x7fffffffffffffff, 1) == 0); + ASSERT(f(1, 0x7fffffffffffffff) == 1); + ASSERT(f(0x8000000000000000, 1) == 0); + ASSERT(f(1, 0x8000000000000000) == 1); + ASSERT(f(0x7fffffffffffffff, 2) == 1); + ASSERT(f(2, 0x7fffffffffffffff) == 2); + ASSERT(f(0x8000000000000000, 2) == 0); + ASSERT(f(2, 0x8000000000000000) == 2); + ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == 0x7fffffffffffffff); + ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == 1); + ASSERT(f(0x7fffffffffffffff, 0xffffffffffffffff) == 0x7fffffffffffffff); + ASSERT(f(0xffffffffffffffff, 0x7fffffffffffffff) == 1); + ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/rshi.c b/deps/lightening/tests/rshi.c new file mode 100644 index 0000000..c536055 --- /dev/null +++ b/deps/lightening/tests/rshi.c @@ -0,0 +1,28 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_rshi(j, JIT_R0, JIT_R0, 31); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + +#if __WORDSIZE == 32 + ASSERT(f(0x80000000) == -1); +#else + ASSERT(f(0x80000000) == 1); + ASSERT(f(0x8000000000000000) == 0xffffffff00000000); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/rshi_u.c b/deps/lightening/tests/rshi_u.c new file mode 100644 index 0000000..8f6dbd4 --- /dev/null +++ b/deps/lightening/tests/rshi_u.c @@ -0,0 +1,28 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_rshi_u(j, JIT_R0, JIT_R0, 31); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + jit_word_t (*f)(jit_word_t) = jit_end(j, NULL); + +#if __WORDSIZE == 32 + ASSERT(f(0x80000000) == 1); +#else + ASSERT(f(0x80000000) == 1); + ASSERT(f(0x8000000000000000) == 0x100000000); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/rshr.c b/deps/lightening/tests/rshr.c new file mode 100644 index 0000000..b4b5689 --- /dev/null +++ b/deps/lightening/tests/rshr.c @@ -0,0 +1,63 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_rshr(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + jit_word_t (*f)(jit_word_t, jit_word_t) = ret; + + ASSERT(f(0xfe, 1) == 0x7f); + ASSERT(f(0x1fffc, 2) == 0x7fff); + ASSERT(f(0x40000000, 30) == 1); + ASSERT(f(0x20000000, 29) == 1); + ASSERT(f(0x10000000, 28) == 1); + ASSERT(f(0x810000, 16) == 0x81); + ASSERT(f(0x20000, 17) == 1); + ASSERT(f(0x40000, 18) == 1); + ASSERT(f(0x7f8000, 15) == 0xff); + ASSERT(f(0x1000000, 24) == 1); + ASSERT(f(0x7fffffff, 0) == 0x7fffffff); +#if __WORDSIZE == 32 + ASSERT(f(0xfffffff8, 3) == 0xffffffff); + ASSERT(f(0x80000000, 31) == 0xffffffff); + ASSERT(f(0xffffff00, 8) == 0xffffffff); +#else + ASSERT(f(0x3fffffff8, 3) == 0x7fffffff); + ASSERT(f(0xffffffc080000000, 31) == 0xffffffffffffff81); + ASSERT(f(0xffffff00, 8) == 0xffffff); + ASSERT(f(0xfe00000000, 33) == 0x7f); + ASSERT(f(0x1ffffc00000000, 34) == 0x7ffff); + ASSERT(f(0xfffffff800000000, 29) == 0xffffffffffffffc0); + ASSERT(f(0x8000000000000000, 63) == 0xffffffffffffffff); + ASSERT(f(0x4000000000000000, 62) == 1); + ASSERT(f(0x2000000000000000, 61) == 1); + ASSERT(f(0x1000000000000000, 60) == 1); + ASSERT(f(0x81000000000000, 48) == 0x81); + ASSERT(f(0x2000000000000, 49) == 1); + ASSERT(f(0x10000000000, 40) == 1); + ASSERT(f(0x7f800000000000, 47) == 0xff); + ASSERT(f(0x100000000000000, 56) == 1); + ASSERT(f(0xffffff0000000000, 40) == 0xffffffffffffffff); + ASSERT(f(0xfffffffe00000000, 33) == 0xffffffffffffffff); + ASSERT(f(0x8000000000000001, 63) == 0xffffffffffffffff); + ASSERT(f(0x1000000000000, 48) == 1); + ASSERT(f(0xffff800000000000, 47) == 0xffffffffffffffff); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/rshr_u.c b/deps/lightening/tests/rshr_u.c new file mode 100644 index 0000000..64c59fd --- /dev/null +++ b/deps/lightening/tests/rshr_u.c @@ -0,0 +1,62 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_rshr_u(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + jit_word_t (*f)(jit_word_t, jit_word_t) = ret; + + ASSERT(f(0xfe, 1) == 0x7f); + ASSERT(f(0x1fffc, 2) == 0x7fff); + ASSERT(f(0x80000000, 31) == 1); + ASSERT(f(0x40000000, 30) == 1); + ASSERT(f(0x20000000, 29) == 1); + ASSERT(f(0x10000000, 28) == 1); + ASSERT(f(0x810000, 16) == 0x81); + ASSERT(f(0x20000, 17) == 1); + ASSERT(f(0x40000, 18) == 1); + ASSERT(f(0x7f8000, 15) == 0xff); + ASSERT(f(0x1000000, 24) == 1); + ASSERT(f(0xffffff00, 8) == 0xffffff); + ASSERT(f(0x7fffffff, 0) == 0x7fffffff); +#if __WORDSIZE == 32 + ASSERT(f(0xfffffff8, 3) == 0x1fffffff); +#else + ASSERT(f(0x3fffffff8, 3) == 0x7fffffff); + ASSERT(f(0xffffffc080000000, 31) == 0x1ffffff81); + ASSERT(f(0xfe00000000, 33) == 0x7f); + ASSERT(f(0x1ffffc00000000, 34) == 0x7ffff); + ASSERT(f(0xfffffff800000000, 29) == 0x7ffffffc0); + ASSERT(f(0x8000000000000000, 63) == 1); + ASSERT(f(0x4000000000000000, 62) == 1); + ASSERT(f(0x2000000000000000, 61) == 1); + ASSERT(f(0x1000000000000000, 60) == 1); + ASSERT(f(0x81000000000000, 48) == 0x81); + ASSERT(f(0x2000000000000, 49) == 1); + ASSERT(f(0x10000000000, 40) == 1); + ASSERT(f(0x7f800000000000, 47) == 0xff); + ASSERT(f(0x100000000000000, 56) == 1); + ASSERT(f(0xffffff0000000000, 40) == 0xffffff); + ASSERT(f(0xfffffffe00000000, 33) == 0x7fffffff); + ASSERT(f(0x8000000000000001, 63) == 1); + ASSERT(f(0x1000000000000, 48) == 1); + ASSERT(f(0xffff800000000000, 47) == 0x1ffff); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/sqrtr_d.c b/deps/lightening/tests/sqrtr_d.c new file mode 100644 index 0000000..873deb9 --- /dev/null +++ b/deps/lightening/tests/sqrtr_d.c @@ -0,0 +1,25 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0)); + + jit_sqrtr_d(j, JIT_F0, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_d(j, JIT_F0); + + double (*f)(double) = jit_end(j, NULL); + + ASSERT(f(0.0) == 0.0); + ASSERT(f(4.0) == 2.0); + ASSERT(f(-4.0) != f(-4.0)); // nan +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/sqrtr_f.c b/deps/lightening/tests/sqrtr_f.c new file mode 100644 index 0000000..66db831 --- /dev/null +++ b/deps/lightening/tests/sqrtr_f.c @@ -0,0 +1,25 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0)); + + jit_sqrtr_f(j, JIT_F0, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_f(j, JIT_F0); + + float (*f)(float) = jit_end(j, NULL); + + ASSERT(f(0.0) == 0.0); + ASSERT(f(4.0) == 2.0); + ASSERT(f(-4.0) != f(-4.0)); // nan +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/sti_c.c b/deps/lightening/tests/sti_c.c new file mode 100644 index 0000000..ff6e6d5 --- /dev/null +++ b/deps/lightening/tests/sti_c.c @@ -0,0 +1,31 @@ +#include "test.h" + +static uint8_t data[] = { 0x12, 0x00, 0x34 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_INT8, JIT_R1)); + + jit_sti_c(j, &data[1], JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(int8_t) = jit_end(j, NULL); + + ASSERT(data[0] == 0x12); + ASSERT(data[1] == 0x00); + ASSERT(data[2] == 0x34); + f(-1); + ASSERT(data[0] == 0x12); + ASSERT(data[1] == 0xff); + ASSERT(data[2] == 0x34); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/sti_d.c b/deps/lightening/tests/sti_d.c new file mode 100644 index 0000000..8a703e6 --- /dev/null +++ b/deps/lightening/tests/sti_d.c @@ -0,0 +1,31 @@ +#include "test.h" + +static double data[] = { -1.0, 0.0, 0.5 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0)); + + jit_sti_d(j, &data[1], JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(double) = jit_end(j, NULL); + + ASSERT(data[0] == -1.0); + ASSERT(data[1] == 0.0); + ASSERT(data[2] == 0.5); + f(42.5); + ASSERT(data[0] == -1.0); + ASSERT(data[1] == 42.5); + ASSERT(data[2] == 0.5); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/sti_f.c b/deps/lightening/tests/sti_f.c new file mode 100644 index 0000000..e027192 --- /dev/null +++ b/deps/lightening/tests/sti_f.c @@ -0,0 +1,31 @@ +#include "test.h" + +static float data[] = { -1.0, 0.0, 0.5 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0)); + + jit_sti_f(j, &data[1], JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(float) = jit_end(j, NULL); + + ASSERT(data[0] == -1.0f); + ASSERT(data[1] == 0.0f); + ASSERT(data[2] == 0.5f); + f(42.5f); + ASSERT(data[0] == -1.0f); + ASSERT(data[1] == 42.5f); + ASSERT(data[2] == 0.5f); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/sti_i.c b/deps/lightening/tests/sti_i.c new file mode 100644 index 0000000..4a233c6 --- /dev/null +++ b/deps/lightening/tests/sti_i.c @@ -0,0 +1,31 @@ +#include "test.h" + +static uint32_t data[] = { 0x12121212, 0x00000000, 0x34343434 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_INT32, JIT_R1)); + + jit_sti_i(j, &data[1], JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(int32_t) = jit_end(j, NULL); + + ASSERT(data[0] == 0x12121212); + ASSERT(data[1] == 0x00); + ASSERT(data[2] == 0x34343434); + f(-1); + ASSERT(data[0] == 0x12121212); + ASSERT(data[1] == 0xffffffff); + ASSERT(data[2] == 0x34343434); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/sti_l.c b/deps/lightening/tests/sti_l.c new file mode 100644 index 0000000..fce9180 --- /dev/null +++ b/deps/lightening/tests/sti_l.c @@ -0,0 +1,33 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ +#if __WORDSIZE > 32 + static uint64_t data[] = { 0x1212121212121212, 0, 0x3434343434343434 }; + + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_INT64, JIT_R1)); + + jit_sti_l(j, &data[1], JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(int64_t) = jit_end(j, NULL); + + ASSERT(data[0] == 0x1212121212121212); + ASSERT(data[1] == 0x00); + ASSERT(data[2] == 0x3434343434343434); + f(-1); + ASSERT(data[0] == 0x1212121212121212); + ASSERT(data[1] == 0xffffffffffffffff); + ASSERT(data[2] == 0x3434343434343434); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/sti_s.c b/deps/lightening/tests/sti_s.c new file mode 100644 index 0000000..daab0bd --- /dev/null +++ b/deps/lightening/tests/sti_s.c @@ -0,0 +1,31 @@ +#include "test.h" + +static uint16_t data[] = { 0x1212, 0x0000, 0x3434 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_INT16, JIT_R1)); + + jit_sti_s(j, &data[1], JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(int16_t) = jit_end(j, NULL); + + ASSERT(data[0] == 0x1212); + ASSERT(data[1] == 0); + ASSERT(data[2] == 0x3434); + f(-1); + ASSERT(data[0] == 0x1212); + ASSERT(data[1] == 0xffff); + ASSERT(data[2] == 0x3434); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/str_atomic.c b/deps/lightening/tests/str_atomic.c new file mode 100644 index 0000000..9098c2a --- /dev/null +++ b/deps/lightening/tests/str_atomic.c @@ -0,0 +1,32 @@ +#include "test.h" + +static long data[] = { 0x12121212, 0x00000000, 0x34343434 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_INT32, JIT_R1)); + + jit_str_atomic(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(void*, int32_t) = jit_end(j, NULL); + + ASSERT(data[0] == 0x12121212); + ASSERT(data[1] == 0x00); + ASSERT(data[2] == 0x34343434); + f(&data[1], 0x0f0f0f0f); + ASSERT(data[0] == 0x12121212); + ASSERT(data[1] == 0x0f0f0f0f); + ASSERT(data[2] == 0x34343434); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/str_c.c b/deps/lightening/tests/str_c.c new file mode 100644 index 0000000..b894b82 --- /dev/null +++ b/deps/lightening/tests/str_c.c @@ -0,0 +1,32 @@ +#include "test.h" + +static uint8_t data[] = { 0x12, 0x00, 0x34 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_INT8, JIT_R1)); + + jit_str_c(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(void*, int8_t) = jit_end(j, NULL); + + ASSERT(data[0] == 0x12); + ASSERT(data[1] == 0x00); + ASSERT(data[2] == 0x34); + f(&data[1], -1); + ASSERT(data[0] == 0x12); + ASSERT(data[1] == 0xff); + ASSERT(data[2] == 0x34); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/str_d.c b/deps/lightening/tests/str_d.c new file mode 100644 index 0000000..2f992a6 --- /dev/null +++ b/deps/lightening/tests/str_d.c @@ -0,0 +1,32 @@ +#include "test.h" + +static double data[] = { -1.0, 0.0, 0.5 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0)); + + jit_str_d(j, JIT_R0, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(void*, double) = jit_end(j, NULL); + + ASSERT(data[0] == -1.0); + ASSERT(data[1] == 0.0); + ASSERT(data[2] == 0.5); + f(&data[1], 42.5); + ASSERT(data[0] == -1.0); + ASSERT(data[1] == 42.5); + ASSERT(data[2] == 0.5); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/str_f.c b/deps/lightening/tests/str_f.c new file mode 100644 index 0000000..fdad3c2 --- /dev/null +++ b/deps/lightening/tests/str_f.c @@ -0,0 +1,32 @@ +#include "test.h" + +static float data[] = { -1.0, 0.0, 0.5 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0)); + + jit_str_f(j, JIT_R0, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(void*, float) = jit_end(j, NULL); + + ASSERT(data[0] == -1.0f); + ASSERT(data[1] == 0.0f); + ASSERT(data[2] == 0.5f); + f(&data[1], 42.5f); + ASSERT(data[0] == -1.0f); + ASSERT(data[1] == 42.5f); + ASSERT(data[2] == 0.5f); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/str_i.c b/deps/lightening/tests/str_i.c new file mode 100644 index 0000000..968f0ce --- /dev/null +++ b/deps/lightening/tests/str_i.c @@ -0,0 +1,32 @@ +#include "test.h" + +static uint32_t data[] = { 0x12121212, 0x00000000, 0x34343434 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_INT32, JIT_R1)); + + jit_str_i(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(void*, int32_t) = jit_end(j, NULL); + + ASSERT(data[0] == 0x12121212); + ASSERT(data[1] == 0x00); + ASSERT(data[2] == 0x34343434); + f(&data[1], -1); + ASSERT(data[0] == 0x12121212); + ASSERT(data[1] == 0xffffffff); + ASSERT(data[2] == 0x34343434); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/str_l.c b/deps/lightening/tests/str_l.c new file mode 100644 index 0000000..450885b --- /dev/null +++ b/deps/lightening/tests/str_l.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ +#if __WORDSIZE > 32 + static uint64_t data[] = { 0x1212121212121212, 0, 0x3434343434343434 }; + + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_INT64, JIT_R1)); + + jit_str_l(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(void*, int64_t) = jit_end(j, NULL); + + ASSERT(data[0] == 0x1212121212121212); + ASSERT(data[1] == 0x00); + ASSERT(data[2] == 0x3434343434343434); + f(&data[1], -1); + ASSERT(data[0] == 0x1212121212121212); + ASSERT(data[1] == 0xffffffffffffffff); + ASSERT(data[2] == 0x3434343434343434); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/str_s.c b/deps/lightening/tests/str_s.c new file mode 100644 index 0000000..3e228ed --- /dev/null +++ b/deps/lightening/tests/str_s.c @@ -0,0 +1,32 @@ +#include "test.h" + +static uint16_t data[] = { 0x1212, 0x0000, 0x3434 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_INT16, JIT_R1)); + + jit_str_s(j, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(void*, int16_t) = jit_end(j, NULL); + + ASSERT(data[0] == 0x1212); + ASSERT(data[1] == 0); + ASSERT(data[2] == 0x3434); + f(&data[1], -1); + ASSERT(data[0] == 0x1212); + ASSERT(data[1] == 0xffff); + ASSERT(data[2] == 0x3434); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/stxi_c.c b/deps/lightening/tests/stxi_c.c new file mode 100644 index 0000000..d76d814 --- /dev/null +++ b/deps/lightening/tests/stxi_c.c @@ -0,0 +1,32 @@ +#include "test.h" + +static uint8_t data[] = { 0x12, 0x00, 0x34 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2), + jit_operand_gpr (JIT_OPERAND_ABI_INT8, JIT_R1)); + + jit_stxi_c(j, (uintptr_t)data, JIT_R2, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(jit_word_t, int8_t) = jit_end(j, NULL); + + ASSERT(data[0] == 0x12); + ASSERT(data[1] == 0x00); + ASSERT(data[2] == 0x34); + f(1, -1); + ASSERT(data[0] == 0x12); + ASSERT(data[1] == 0xff); + ASSERT(data[2] == 0x34); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/stxi_d.c b/deps/lightening/tests/stxi_d.c new file mode 100644 index 0000000..3933c56 --- /dev/null +++ b/deps/lightening/tests/stxi_d.c @@ -0,0 +1,32 @@ +#include "test.h" + +static double data[] = { -1.0, 0.0, 0.5 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0)); + + jit_stxi_d(j, (uintptr_t)data, JIT_R2, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(jit_word_t, double) = jit_end(j, NULL); + + ASSERT(data[0] == -1.0); + ASSERT(data[1] == 0.0); + ASSERT(data[2] == 0.5); + f(8, 42.5); + ASSERT(data[0] == -1.0); + ASSERT(data[1] == 42.5); + ASSERT(data[2] == 0.5); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/stxi_f.c b/deps/lightening/tests/stxi_f.c new file mode 100644 index 0000000..aea6756 --- /dev/null +++ b/deps/lightening/tests/stxi_f.c @@ -0,0 +1,32 @@ +#include "test.h" + +static float data[] = { -1.0, 0.0, 0.5 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0)); + + jit_stxi_f(j, (uintptr_t)data, JIT_R2, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(jit_word_t, float) = jit_end(j, NULL); + + ASSERT(data[0] == -1.0f); + ASSERT(data[1] == 0.0f); + ASSERT(data[2] == 0.5f); + f(4, 42.5f); + ASSERT(data[0] == -1.0f); + ASSERT(data[1] == 42.5f); + ASSERT(data[2] == 0.5f); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/stxi_i.c b/deps/lightening/tests/stxi_i.c new file mode 100644 index 0000000..79dab03 --- /dev/null +++ b/deps/lightening/tests/stxi_i.c @@ -0,0 +1,32 @@ +#include "test.h" + +static uint32_t data[] = { 0x12121212, 0x00000000, 0x34343434 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2), + jit_operand_gpr (JIT_OPERAND_ABI_INT32, JIT_R1)); + + jit_stxi_i(j, (uintptr_t)data, JIT_R2, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(jit_word_t, int32_t) = jit_end(j, NULL); + + ASSERT(data[0] == 0x12121212); + ASSERT(data[1] == 0x00); + ASSERT(data[2] == 0x34343434); + f(4, -1); + ASSERT(data[0] == 0x12121212); + ASSERT(data[1] == 0xffffffff); + ASSERT(data[2] == 0x34343434); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/stxi_l.c b/deps/lightening/tests/stxi_l.c new file mode 100644 index 0000000..8a68241 --- /dev/null +++ b/deps/lightening/tests/stxi_l.c @@ -0,0 +1,34 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ +#if __WORDSIZE > 32 + static uint64_t data[] = { 0x1212121212121212, 0, 0x3434343434343434 }; + + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2), + jit_operand_gpr (JIT_OPERAND_ABI_INT64, JIT_R1)); + + jit_stxi_l(j, (uintptr_t)data, JIT_R2, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(jit_word_t, int64_t) = jit_end(j, NULL); + + ASSERT(data[0] == 0x1212121212121212); + ASSERT(data[1] == 0x00); + ASSERT(data[2] == 0x3434343434343434); + f(8, -1); + ASSERT(data[0] == 0x1212121212121212); + ASSERT(data[1] == 0xffffffffffffffff); + ASSERT(data[2] == 0x3434343434343434); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/stxi_s.c b/deps/lightening/tests/stxi_s.c new file mode 100644 index 0000000..64bda5d --- /dev/null +++ b/deps/lightening/tests/stxi_s.c @@ -0,0 +1,32 @@ +#include "test.h" + +static uint16_t data[] = { 0x1212, 0x0000, 0x3434 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2), + jit_operand_gpr (JIT_OPERAND_ABI_INT16, JIT_R1)); + + jit_stxi_s(j, (uintptr_t)data, JIT_R2, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(jit_word_t, int16_t) = jit_end(j, NULL); + + ASSERT(data[0] == 0x1212); + ASSERT(data[1] == 0); + ASSERT(data[2] == 0x3434); + f(2, -1); + ASSERT(data[0] == 0x1212); + ASSERT(data[1] == 0xffff); + ASSERT(data[2] == 0x3434); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/stxr_c.c b/deps/lightening/tests/stxr_c.c new file mode 100644 index 0000000..8876855 --- /dev/null +++ b/deps/lightening/tests/stxr_c.c @@ -0,0 +1,33 @@ +#include "test.h" + +static uint8_t data[] = { 0x12, 0x00, 0x34 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_3(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2), + jit_operand_gpr (JIT_OPERAND_ABI_INT8, JIT_R1)); + + jit_stxr_c(j, JIT_R0, JIT_R2, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(void*, jit_word_t, int8_t) = jit_end(j, NULL); + + ASSERT(data[0] == 0x12); + ASSERT(data[1] == 0x00); + ASSERT(data[2] == 0x34); + f(data, 1, -1); + ASSERT(data[0] == 0x12); + ASSERT(data[1] == 0xff); + ASSERT(data[2] == 0x34); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/stxr_d.c b/deps/lightening/tests/stxr_d.c new file mode 100644 index 0000000..e87688a --- /dev/null +++ b/deps/lightening/tests/stxr_d.c @@ -0,0 +1,33 @@ +#include "test.h" + +static double data[] = { -1.0, 0.0, 0.5 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_3(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0)); + + jit_stxr_d(j, JIT_R0, JIT_R2, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(void*, jit_word_t, double) = jit_end(j, NULL); + + ASSERT(data[0] == -1.0); + ASSERT(data[1] == 0.0); + ASSERT(data[2] == 0.5); + f(data, 8, 42.5); + ASSERT(data[0] == -1.0); + ASSERT(data[1] == 42.5); + ASSERT(data[2] == 0.5); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/stxr_f.c b/deps/lightening/tests/stxr_f.c new file mode 100644 index 0000000..bf0c476 --- /dev/null +++ b/deps/lightening/tests/stxr_f.c @@ -0,0 +1,33 @@ +#include "test.h" + +static float data[] = { -1.0, 0.0, 0.5 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_3(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0)); + + jit_stxr_f(j, JIT_R0, JIT_R2, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(void*, jit_word_t, float) = jit_end(j, NULL); + + ASSERT(data[0] == -1.0f); + ASSERT(data[1] == 0.0f); + ASSERT(data[2] == 0.5f); + f(data, 4, 42.5f); + ASSERT(data[0] == -1.0f); + ASSERT(data[1] == 42.5f); + ASSERT(data[2] == 0.5f); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/stxr_i.c b/deps/lightening/tests/stxr_i.c new file mode 100644 index 0000000..8260462 --- /dev/null +++ b/deps/lightening/tests/stxr_i.c @@ -0,0 +1,33 @@ +#include "test.h" + +static uint32_t data[] = { 0x12121212, 0x00000000, 0x34343434 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_3(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2), + jit_operand_gpr (JIT_OPERAND_ABI_INT32, JIT_R1)); + + jit_stxr_i(j, JIT_R0, JIT_R2, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(void*, jit_word_t, int32_t) = jit_end(j, NULL); + + ASSERT(data[0] == 0x12121212); + ASSERT(data[1] == 0x00); + ASSERT(data[2] == 0x34343434); + f(data, 4, -1); + ASSERT(data[0] == 0x12121212); + ASSERT(data[1] == 0xffffffff); + ASSERT(data[2] == 0x34343434); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/stxr_l.c b/deps/lightening/tests/stxr_l.c new file mode 100644 index 0000000..fa6bb1f --- /dev/null +++ b/deps/lightening/tests/stxr_l.c @@ -0,0 +1,35 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ +#if __WORDSIZE > 32 + static uint64_t data[] = { 0x1212121212121212, 0, 0x3434343434343434 }; + + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_3(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2), + jit_operand_gpr (JIT_OPERAND_ABI_INT64, JIT_R1)); + + jit_stxr_l(j, JIT_R0, JIT_R2, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(void*, jit_word_t, int64_t) = jit_end(j, NULL); + + ASSERT(data[0] == 0x1212121212121212); + ASSERT(data[1] == 0x00); + ASSERT(data[2] == 0x3434343434343434); + f(data, 8, -1); + ASSERT(data[0] == 0x1212121212121212); + ASSERT(data[1] == 0xffffffffffffffff); + ASSERT(data[2] == 0x3434343434343434); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/stxr_s.c b/deps/lightening/tests/stxr_s.c new file mode 100644 index 0000000..a93ccd9 --- /dev/null +++ b/deps/lightening/tests/stxr_s.c @@ -0,0 +1,33 @@ +#include "test.h" + +static uint16_t data[] = { 0x1212, 0x0000, 0x3434 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_3(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R2), + jit_operand_gpr (JIT_OPERAND_ABI_INT16, JIT_R1)); + + jit_stxr_s(j, JIT_R0, JIT_R2, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(void*, jit_word_t, int16_t) = jit_end(j, NULL); + + ASSERT(data[0] == 0x1212); + ASSERT(data[1] == 0); + ASSERT(data[2] == 0x3434); + f(data, 2, -1); + ASSERT(data[0] == 0x1212); + ASSERT(data[1] == 0xffff); + ASSERT(data[2] == 0x3434); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/subr.c b/deps/lightening/tests/subr.c new file mode 100644 index 0000000..57cf950 --- /dev/null +++ b/deps/lightening/tests/subr.c @@ -0,0 +1,26 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_subr(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + int (*f)(int, int) = ret; + ASSERT(f(42, 69) == -27); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/subr_d.c b/deps/lightening/tests/subr_d.c new file mode 100644 index 0000000..bc611c5 --- /dev/null +++ b/deps/lightening/tests/subr_d.c @@ -0,0 +1,27 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F1)); + + jit_subr_d(j, JIT_F0, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_d(j, JIT_F0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + double (*f)(double, double) = ret; + ASSERT(f(42., 69.) == -27.); + ASSERT(f(42., 69.5) == -27.5); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/subr_f.c b/deps/lightening/tests/subr_f.c new file mode 100644 index 0000000..a7befec --- /dev/null +++ b/deps/lightening/tests/subr_f.c @@ -0,0 +1,27 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0), + jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F1)); + + jit_subr_f(j, JIT_F0, JIT_F0, JIT_F1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr_f(j, JIT_F0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + float (*f)(float, float) = ret; + ASSERT(f(42.f, 69.f) == -27.f); + ASSERT(f(42.0f, 69.5f) == -27.5f); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/subx.c b/deps/lightening/tests/subx.c new file mode 100644 index 0000000..b88bcbd --- /dev/null +++ b/deps/lightening/tests/subx.c @@ -0,0 +1,63 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_movi(j, JIT_R2, 0); + jit_subcr(j, JIT_R0, JIT_R0, JIT_R1); + jit_subxi(j, JIT_R2, JIT_R2, 0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R2); + + size_t size = 0; + void* ret = jit_end(j, &size); + + jit_word_t (*f)(jit_word_t, jit_word_t) = ret; + + ASSERT(f(0, 0) == 0); + +#if __WORDSIZE == 32 + /* carry */ + ASSERT(f(0x7fffffff, 0xffffffff) == 0xffffffff); + /* overflow */ + ASSERT(f(0x80000000, 1) == 0); + /* carry */ + ASSERT(f(0x7fffffff, 0x80000000) == 0xffffffff); + /* overflow */ + ASSERT(f(0x80000000, 0x7fffffff) == 0); + /* carry+overflow */ + ASSERT(f(1, 0x80000000) == 0xffffffff); +#else + /* carry */ + ASSERT(f(0x7fffffff, 0xffffffff) == -1); + /* nothing */ + ASSERT(f(0x80000000, 1) == 0); + /* carry */ + ASSERT(f(0x7fffffff, 0x80000000) == -1); + /* nothing */ + ASSERT(f(0x80000000, 0x7fffffff) == 0); + /* carry */ + ASSERT(f(1, 0x80000000) == -1); + /* carry */ + ASSERT(f(0x7fffffffffffffff, 0xffffffffffffffff) == -1); + /* overflow */ + ASSERT(f(0x8000000000000000, 1) == 0); + /* carry */ + ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == -1); + /* overflow */ + ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == 0); + /* carry+overflow */ + ASSERT(f(1, 0x8000000000000000) == -1); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/swap_atomic.c b/deps/lightening/tests/swap_atomic.c new file mode 100644 index 0000000..fffa05e --- /dev/null +++ b/deps/lightening/tests/swap_atomic.c @@ -0,0 +1,32 @@ +#include "test.h" + +static long data[] = { 0x12121212, 0x00000000, 0x34343434 }; + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_POINTER, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_INT32, JIT_R1)); + + jit_swap_atomic(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_ret(j); + + void (*f)(void*, int32_t) = jit_end(j, NULL); + + ASSERT(data[0] == 0x12121212); + ASSERT(data[1] == 0x00); + ASSERT(data[2] == 0x34343434); + f(&data[1], 0x0f0f0f0f); + ASSERT(data[0] == 0x12121212); + ASSERT(data[1] == 0x0f0f0f0f); + ASSERT(data[2] == 0x34343434); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/test.h b/deps/lightening/tests/test.h new file mode 100644 index 0000000..c4eff2b --- /dev/null +++ b/deps/lightening/tests/test.h @@ -0,0 +1,79 @@ +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> + +#include <lightening.h> + +#define ASSERT(x) \ + do { \ + if (!(x)) { \ + fprintf(stderr, "%s:%d: assertion failed: " #x "\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ + } while (0) + +static inline int +main_helper (int argc, char *argv[], + void (*run_test)(jit_state_t*, uint8_t*, size_t)) +{ + ASSERT(init_jit()); + jit_state_t *j = jit_new_state (NULL, NULL); + ASSERT(j); + + const size_t arena_size = 4096; + char *arena_base = mmap (NULL, arena_size, + PROT_EXEC | PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (arena_base == MAP_FAILED) + { + perror ("allocating JIT code buffer failed"); + return 1; + } + + run_test(j, (uint8_t*)arena_base, arena_size); + + jit_destroy_state(j); + + munmap(arena_base, arena_size); + + return 0; +} + +static inline int +main_compiler (int argc, char *argv[], + size_t (*run_test)(jit_state_t*, uint8_t*, size_t)) +{ + ASSERT(init_jit()); + jit_state_t *j = jit_new_state (NULL, NULL); + ASSERT(j); + + size_t arena_size = 4096, prev_arena_size = arena_size; + uint8_t *arena_base = NULL; + do { + if (arena_base) { + if (munmap(arena_base, prev_arena_size) == -1) { + perror("unmapping arena failed"); + return 1; + } + } + + prev_arena_size = arena_size; + + arena_base = mmap (NULL, arena_size, + PROT_EXEC | PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (arena_base == MAP_FAILED) { + perror ("allocating JIT code buffer failed"); + return 1; + } + } while ((arena_size = run_test(j, arena_base, arena_size)) != 0); + + jit_destroy_state(j); + + munmap(arena_base, arena_size); + + return 0; +} diff --git a/deps/lightening/tests/truncr_d_i.c b/deps/lightening/tests/truncr_d_i.c new file mode 100644 index 0000000..b21280f --- /dev/null +++ b/deps/lightening/tests/truncr_d_i.c @@ -0,0 +1,30 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0)); + + jit_truncr_d_i(j, JIT_R0, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + int (*f)(double) = jit_end(j, NULL); + + ASSERT(f(0.0) == 0); + ASSERT(f(-0.0) == 0); + ASSERT(f(0.5) == 0); + ASSERT(f(-0.5) == 0); + ASSERT(f(1.5) == 1); + ASSERT(f(-1.5) == -1); + ASSERT(f(2.5) == 2); + ASSERT(f(-2.5) == -2); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/truncr_d_l.c b/deps/lightening/tests/truncr_d_l.c new file mode 100644 index 0000000..189617a --- /dev/null +++ b/deps/lightening/tests/truncr_d_l.c @@ -0,0 +1,32 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ +#if __WORDSIZE > 32 + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_DOUBLE, JIT_F0)); + + jit_truncr_d_l(j, JIT_R0, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + int64_t (*f)(double) = jit_end(j, NULL); + + ASSERT(f(0.0) == 0); + ASSERT(f(-0.0) == 0); + ASSERT(f(0.5) == 0); + ASSERT(f(-0.5) == 0); + ASSERT(f(1.5) == 1); + ASSERT(f(-1.5) == -1); + ASSERT(f(2.5) == 2); + ASSERT(f(-2.5) == -2); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/truncr_f_i.c b/deps/lightening/tests/truncr_f_i.c new file mode 100644 index 0000000..3dbf630 --- /dev/null +++ b/deps/lightening/tests/truncr_f_i.c @@ -0,0 +1,30 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0)); + + jit_truncr_f_i(j, JIT_R0, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + int (*f)(float) = jit_end(j, NULL); + + ASSERT(f(0.0) == 0); + ASSERT(f(-0.0) == 0); + ASSERT(f(0.5) == 0); + ASSERT(f(-0.5) == 0); + ASSERT(f(1.5) == 1); + ASSERT(f(-1.5) == -1); + ASSERT(f(2.5) == 2); + ASSERT(f(-2.5) == -2); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/truncr_f_l.c b/deps/lightening/tests/truncr_f_l.c new file mode 100644 index 0000000..7369ae3 --- /dev/null +++ b/deps/lightening/tests/truncr_f_l.c @@ -0,0 +1,32 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ +#if __WORDSIZE > 32 + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_fpr (JIT_OPERAND_ABI_FLOAT, JIT_F0)); + + jit_truncr_f_l(j, JIT_R0, JIT_F0); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + int64_t (*f)(float) = jit_end(j, NULL); + + ASSERT(f(0.0) == 0); + ASSERT(f(-0.0) == 0); + ASSERT(f(0.5) == 0); + ASSERT(f(-0.5) == 0); + ASSERT(f(1.5) == 1); + ASSERT(f(-1.5) == -1); + ASSERT(f(2.5) == 2); + ASSERT(f(-2.5) == -2); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/xori.c b/deps/lightening/tests/xori.c new file mode 100644 index 0000000..4bb2ad1 --- /dev/null +++ b/deps/lightening/tests/xori.c @@ -0,0 +1,31 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_1(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0)); + + jit_xori(j, JIT_R0, JIT_R0, 1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + jit_word_t (*f)(jit_word_t) = ret; + + ASSERT(f(0x7fffffff) == 0x7ffffffe); + ASSERT(f(0x80000000) == 0x80000001); +#if __WORDSIZE == 64 + ASSERT(f(0x7fffffffffffffff) == 0x7ffffffffffffffe); + ASSERT(f(0x8000000000000000) == 0x8000000000000001); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/xorr.c b/deps/lightening/tests/xorr.c new file mode 100644 index 0000000..dd5a390 --- /dev/null +++ b/deps/lightening/tests/xorr.c @@ -0,0 +1,48 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + size_t align = jit_enter_jit_abi(j, 0, 0, 0); + jit_load_args_2(j, jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R0), + jit_operand_gpr (JIT_OPERAND_ABI_WORD, JIT_R1)); + + jit_xorr(j, JIT_R0, JIT_R0, JIT_R1); + jit_leave_jit_abi(j, 0, 0, align); + jit_retr(j, JIT_R0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + jit_word_t (*f)(jit_word_t, jit_word_t) = ret; + + ASSERT(f(0x7fffffff, 1) == 0x7ffffffe); + ASSERT(f(1, 0x7fffffff) == 0x7ffffffe); + ASSERT(f(0x80000000, 1) == 0x80000001); + ASSERT(f(1, 0x80000000) == 0x80000001); + ASSERT(f(0x7fffffff, 0x80000000) == 0xffffffff); + ASSERT(f(0x80000000, 0x7fffffff) == 0xffffffff); + ASSERT(f(0x7fffffff, 0xffffffff) == 0x80000000); + ASSERT(f(0xffffffff, 0x7fffffff) == 0x80000000); + ASSERT(f(0xffffffff, 0xffffffff) == 0); + ASSERT(f(0x7fffffff, 0) == 0x7fffffff); + ASSERT(f(0, 0x7fffffff) == 0x7fffffff); +#if __WORDSIZE == 64 + ASSERT(f(0x7fffffffffffffff, 1) == 0x7ffffffffffffffe); + ASSERT(f(1, 0x7fffffffffffffff) == 0x7ffffffffffffffe); + ASSERT(f(0x8000000000000000, 1) == 0x8000000000000001); + ASSERT(f(1, 0x8000000000000000) == 0x8000000000000001); + ASSERT(f(0x7fffffffffffffff, 0x8000000000000000) == 0xffffffffffffffff); + ASSERT(f(0x8000000000000000, 0x7fffffffffffffff) == 0xffffffffffffffff); + ASSERT(f(0x7fffffffffffffff, 0xffffffffffffffff) == 0x8000000000000000); + ASSERT(f(0xffffffffffffffff, 0x7fffffffffffffff) == 0x8000000000000000); + ASSERT(f(0xffffffffffffffff, 0xffffffffffffffff) == 0); +#endif +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/deps/lightening/tests/z_atomic.c b/deps/lightening/tests/z_atomic.c new file mode 100644 index 0000000..8612d15 --- /dev/null +++ b/deps/lightening/tests/z_atomic.c @@ -0,0 +1,88 @@ +#include "test.h" + +#include <threads.h> + +/* note non-atomic counter! */ +size_t num = 0; +long lock_var = 0; +void (*spin_lock)(void); +void (*spin_unlock)(void); + +/* arbitrary number, as long as its large enough to likely allow other threads + * to spawn. */ +#define THREAD_INCREMENTS 1000000 +static int +loop(void *arg) +{ + for (size_t i = 0; i < THREAD_INCREMENTS; ++i) { + (*spin_lock)(); + num++; + (*spin_unlock)(); + } + + return 0; +} + +#define NUM_THREADS 10 +static void +run_loops() +{ + thrd_t threads[NUM_THREADS]; + for (size_t i = 0; i < NUM_THREADS; ++i) + ASSERT(thrd_create(&threads[i], loop, NULL) == thrd_success); + + for (size_t i = 0; i < NUM_THREADS; ++i) + ASSERT(thrd_join(threads[i], NULL) == thrd_success); + + ASSERT(num == NUM_THREADS * THREAD_INCREMENTS); +} + +static size_t +run_test(jit_state_t *_jit, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(_jit, arena_base, arena_size); + + /* based on https://rigtorp.se/spinlock/ */ + spin_lock = jit_address(_jit); + { + size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); + void *do_exchange = jit_address(_jit); + void *while_load = jit_address(_jit); + /* do { */ + /* while (atomic_load(lock_var)); */ + jit_movi(_jit, JIT_R1, (jit_imm_t)&lock_var); + jit_ldr_atomic(_jit, JIT_R0, JIT_R1); + jit_patch_there(_jit, jit_bnei(_jit, JIT_R0, 0), while_load); + /* } while (atomic_exchange(lock_var, 1)); */ + jit_movi(_jit, JIT_R0, 1); + jit_swap_atomic(_jit, JIT_R0, JIT_R1, JIT_R0); + jit_patch_there(_jit, jit_bnei(_jit, JIT_R0, 0), do_exchange); + jit_leave_jit_abi(_jit, 0, 0, frame); + jit_ret(_jit); + } + + spin_unlock = jit_address(_jit); + { + size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); + jit_movi(_jit, JIT_R0, 0); + jit_movi(_jit, JIT_R1, (jit_imm_t)&lock_var); + jit_str_atomic(_jit, JIT_R1, JIT_R0); + jit_leave_jit_abi(_jit, 0, 0, frame); + jit_ret(_jit); + } + + size_t size; + void *p = jit_end(_jit, &size); + + if (p) + run_loops(); + else + return size; + + return 0; +} + +int main(int argc, char *argv[]) +{ + return main_compiler(argc, argv, run_test); +} diff --git a/deps/lightening/tests/z_bp.c b/deps/lightening/tests/z_bp.c new file mode 100644 index 0000000..57f7bfa --- /dev/null +++ b/deps/lightening/tests/z_bp.c @@ -0,0 +1,61 @@ +#include "test.h" + +static size_t +run_test(jit_state_t *_jit, uint8_t *arena_base, size_t arena_size) +{ + int32_t (*function)(int32_t); + + jit_begin(_jit, arena_base, arena_size); + + void *entry = jit_address(_jit); + size_t frame = jit_enter_jit_abi(_jit, 3, 0, 0); + + jit_load_args_1(_jit, jit_operand_gpr(JIT_OPERAND_ABI_INT32, JIT_R0)); + + jit_reloc_t out1 = jit_beqi(_jit, JIT_R0, 0); + jit_movr(_jit, JIT_V0, JIT_R0); + jit_movi(_jit, JIT_R0, 1); + + jit_reloc_t out2 = jit_blei_u(_jit, JIT_V0, 2); + jit_subi(_jit, JIT_V1, JIT_V0, 1); + jit_subi(_jit, JIT_V2, JIT_V0, 2); + + jit_calli_1(_jit, entry, jit_operand_gpr(JIT_OPERAND_ABI_INT32, JIT_V1)); + jit_retval(_jit, JIT_V1); + + jit_calli_1(_jit, entry, jit_operand_gpr(JIT_OPERAND_ABI_INT32, JIT_V2)); + jit_retval(_jit, JIT_R0); + + jit_addr(_jit, JIT_R0, JIT_R0, JIT_V1); + + jit_patch_here(_jit, out1); + jit_patch_here(_jit, out2); + jit_leave_jit_abi(_jit, 3, 0, frame); + jit_retr(_jit, JIT_R0); + + size_t size = 0; + function = jit_end(_jit, &size); + + if (function) { + ASSERT((*function)(1) == 1); + ASSERT((*function)(2) == 1); + ASSERT((*function)(3) == 2); + ASSERT((*function)(4) == 3); + ASSERT((*function)(5) == 5); + ASSERT((*function)(6) == 8); + ASSERT((*function)(7) == 13); + ASSERT((*function)(8) == 21); + ASSERT((*function)(9) == 34); + + ASSERT((*function)(32) == 2178309); + } else { + return size; + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + return main_compiler(argc, argv, run_test); +} diff --git a/deps/lightening/tests/z_branch.c b/deps/lightening/tests/z_branch.c new file mode 100644 index 0000000..30f0ea5 --- /dev/null +++ b/deps/lightening/tests/z_branch.c @@ -0,0 +1,584 @@ +#include "test.h" + +#if __WORDSIZE == 64 +# define I7f 0x7fffffffffffffff +# define I80 0x8000000000000000 +# define I81 0x8000000000000001 +# define Iff 0xffffffffffffffff +#else +# define I7f 0x7fffffff +# define I80 0x80000000 +# define I81 0x80000001 +# define Iff 0xffffffff +#endif + +#define NaN (0.0 / 0.0) + +#if defined(DEBUG) +#define dump_args(comp, r0, r1)\ + jit_calli_1(_jit, puts,\ + jit_operand_imm(JIT_OPERAND_ABI_POINTER,\ + (jit_imm_t)#comp " " #r0 " " #r1)); +#else +#define dump_args(comp, r0, r1) +#endif + +#define BOP(N, Ls, Rs, Lu, Ru, R0, R1) \ +{ \ + dump_args(N##r, Ls, Rs); \ + jit_movi(_jit, R0, Ls); \ + jit_movi(_jit, R1, Rs); \ + jit_reloc_t r = jit_b##N##r(_jit, R0, R1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##i, Ls, Rs); \ + jit_movi(_jit, R0, Ls); \ + jit_reloc_t r = jit_b##N##i(_jit, R0, Rs); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##r_u, Lu, Ru); \ + jit_movi(_jit, R0, Lu); \ + jit_movi(_jit, R1, Ru); \ + jit_reloc_t r = jit_b##N##r_u(_jit, R0, R1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##i_u, Lu, Ru); \ + jit_movi(_jit, R0, Lu); \ + jit_reloc_t r = jit_b##N##i_u(_jit, R0, Ru); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} + +#define EB(N, L, R, R0, R1) \ +{ \ + dump_args(N##r, L, R); \ + jit_movi(_jit, R0, L); \ + jit_movi(_jit, R1, R); \ + jit_reloc_t r = jit_b##N##r(_jit, R0, R1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##i, L, R); \ + jit_movi(_jit, R0, L); \ + jit_reloc_t r = jit_b##N##i(_jit, R0, R); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ + +#define XEB(N, L, R, R0, R1) \ +{ \ + dump_args(N##r, L, R); \ + jit_movi(_jit, R0, L); \ + jit_movi(_jit, R1, R); \ + jit_reloc_t r = jit_b##N##r(_jit, R0, R1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##i, L, R); \ + jit_movi(_jit, R0, L); \ + jit_reloc_t r = jit_b##N##i(_jit, R0, R); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} + +#define XBOP(N, Ls, Rs, Lu, Ru, R0, R1) \ +{ \ + dump_args(N##r, Ls, Rs); \ + jit_movi(_jit, R0, Ls); \ + jit_movi(_jit, R1, Rs); \ + jit_reloc_t r = jit_b##N##r(_jit, R0, R1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##i, Ls, Rs); \ + jit_movi(_jit, R0, Ls); \ + jit_reloc_t r = jit_b##N##i(_jit, R0, Rs); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##r_u, Lu, Ru); \ + jit_movi(_jit, R0, Lu); \ + jit_movi(_jit, R1, Ru); \ + jit_reloc_t r = jit_b##N##r_u(_jit, R0, R1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##i_u, Lu, Ru); \ + jit_movi(_jit, R0, Lu); \ + jit_reloc_t r = jit_b##N##i_u(_jit, R0, Ru); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} + +#define BOPI(N, Ls, Rs, Lu, Ru) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_V1) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_V2) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_R0) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_R1) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_R2) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_V0) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_V2) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_R0) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_R1) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_R2) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_V0) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_V1) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_R0) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_R1) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_R2) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_V0) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_V1) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_V2) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_R1) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_R2) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_V0) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_V1) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_V2) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_R0) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_R2) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_V0) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_V1) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_V2) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_R0) \ + BOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_R1) + +#define EBI(N, L, R) \ + EB(N, L, R, JIT_V0, JIT_V1) \ + EB(N, L, R, JIT_V0, JIT_V2) \ + EB(N, L, R, JIT_V0, JIT_R0) \ + EB(N, L, R, JIT_V0, JIT_R1) \ + EB(N, L, R, JIT_V0, JIT_R2) \ + EB(N, L, R, JIT_V1, JIT_V0) \ + EB(N, L, R, JIT_V1, JIT_V2) \ + EB(N, L, R, JIT_V1, JIT_R0) \ + EB(N, L, R, JIT_V1, JIT_R1) \ + EB(N, L, R, JIT_V1, JIT_R2) \ + EB(N, L, R, JIT_V2, JIT_V0) \ + EB(N, L, R, JIT_V2, JIT_V1) \ + EB(N, L, R, JIT_V2, JIT_R0) \ + EB(N, L, R, JIT_V2, JIT_R1) \ + EB(N, L, R, JIT_V2, JIT_R2) \ + EB(N, L, R, JIT_R0, JIT_V0) \ + EB(N, L, R, JIT_R0, JIT_V1) \ + EB(N, L, R, JIT_R0, JIT_V2) \ + EB(N, L, R, JIT_R0, JIT_R1) \ + EB(N, L, R, JIT_R0, JIT_R2) \ + EB(N, L, R, JIT_R1, JIT_V0) \ + EB(N, L, R, JIT_R1, JIT_V1) \ + EB(N, L, R, JIT_R1, JIT_V2) \ + EB(N, L, R, JIT_R1, JIT_R0) \ + EB(N, L, R, JIT_R1, JIT_R2) \ + EB(N, L, R, JIT_R2, JIT_V0) \ + EB(N, L, R, JIT_R2, JIT_V1) \ + EB(N, L, R, JIT_R2, JIT_V2) \ + EB(N, L, R, JIT_R2, JIT_R0) \ + EB(N, L, R, JIT_R2, JIT_R1) + + +#define XEBI(N, L, R) \ + XEB(N, L, R, JIT_V0, JIT_V1) \ + XEB(N, L, R, JIT_V0, JIT_V2) \ + XEB(N, L, R, JIT_V0, JIT_R0) \ + XEB(N, L, R, JIT_V0, JIT_R1) \ + XEB(N, L, R, JIT_V0, JIT_R2) \ + XEB(N, L, R, JIT_V1, JIT_V0) \ + XEB(N, L, R, JIT_V1, JIT_V2) \ + XEB(N, L, R, JIT_V1, JIT_R0) \ + XEB(N, L, R, JIT_V1, JIT_R1) \ + XEB(N, L, R, JIT_V1, JIT_R2) \ + XEB(N, L, R, JIT_V2, JIT_V0) \ + XEB(N, L, R, JIT_V2, JIT_V1) \ + XEB(N, L, R, JIT_V2, JIT_R0) \ + XEB(N, L, R, JIT_V2, JIT_R1) \ + XEB(N, L, R, JIT_V2, JIT_R2) \ + XEB(N, L, R, JIT_R0, JIT_V0) \ + XEB(N, L, R, JIT_R0, JIT_V1) \ + XEB(N, L, R, JIT_R0, JIT_V2) \ + XEB(N, L, R, JIT_R0, JIT_R1) \ + XEB(N, L, R, JIT_R0, JIT_R2) \ + XEB(N, L, R, JIT_R1, JIT_V0) \ + XEB(N, L, R, JIT_R1, JIT_V1) \ + XEB(N, L, R, JIT_R1, JIT_V2) \ + XEB(N, L, R, JIT_R1, JIT_R0) \ + XEB(N, L, R, JIT_R1, JIT_R2) \ + XEB(N, L, R, JIT_R2, JIT_V0) \ + XEB(N, L, R, JIT_R2, JIT_V1) \ + XEB(N, L, R, JIT_R2, JIT_V2) \ + XEB(N, L, R, JIT_R2, JIT_R0) \ + XEB(N, L, R, JIT_R2, JIT_R1) + +#define XBOPI(N, Ls, Rs, Lu, Ru) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_V1) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_V2) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_R0) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_R1) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_V0, JIT_R2) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_V0) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_V2) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_R0) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_R1) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_V1, JIT_R2) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_V0) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_V1) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_R0) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_R1) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_V2, JIT_R2) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_V0) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_V1) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_V2) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_R1) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_R0, JIT_R2) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_V0) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_V1) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_V2) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_R0) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_R1, JIT_R2) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_V0) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_V1) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_V2) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_R0) \ + XBOP(N, Ls, Rs, Lu, Ru, JIT_R2, JIT_R1) + +#define TBOPF(N, T, L, R) \ +{ \ + dump_args(N##r##_##T, L, R); \ + jit_movi_##T(_jit, JIT_F0, L); \ + jit_movi_##T(_jit, JIT_F1, R); \ + jit_reloc_t r = \ + jit_b##N##r##_##T(_jit, JIT_F0, JIT_F1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##i##_##T, L, R); \ + jit_movi_##T(_jit, JIT_F0, L); \ + jit_movi_##T(_jit, JIT_F1, R); \ + jit_reloc_t r = \ + jit_b##N##r##_##T(_jit, JIT_F0, JIT_F1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##r##_##T, L, NaN); \ + jit_movi_##T(_jit, JIT_F0, L); \ + jit_movi_##T(_jit, JIT_F1, NaN); \ + jit_reloc_t err = \ + jit_b##N##r##_##T(_jit, JIT_F0, JIT_F1); \ + jit_reloc_t ok = jit_jmp(_jit); \ + jit_patch_here(_jit, err); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, ok); \ +} \ +{ \ + dump_args(N##i##_##T, L, NaN); \ + jit_movi_##T(_jit, JIT_F0, L); \ + jit_movi_##T(_jit, JIT_F1, NaN); \ + jit_reloc_t err = \ + jit_b##N##r##_##T(_jit, JIT_F0, JIT_F1); \ + jit_reloc_t ok = jit_jmp(_jit); \ + jit_patch_here(_jit, err); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, ok); \ +} + +#define BOPF(N, L, R) \ + TBOPF(N, f, L, R) \ + TBOPF(N, d, L, R) + +#define TUBOPF(N, T, L, R) \ +{ \ + dump_args(N##r##_##T, L, R); \ + jit_movi_##T(_jit, JIT_F0, L); \ + jit_movi_##T(_jit, JIT_F1, R); \ + jit_reloc_t r = \ + jit_b##N##r##_##T(_jit, JIT_F0, JIT_F1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##i##_##T, L, R); \ + jit_movi_##T(_jit, JIT_F0, L); \ + jit_movi_##T(_jit, JIT_F1, R); \ + jit_reloc_t r = \ + jit_b##N##r##_##T(_jit, JIT_F0, JIT_F1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##r##_##T, L, NaN); \ + jit_movi_##T(_jit, JIT_F0, L); \ + jit_movi_##T(_jit, JIT_F1, NaN); \ + jit_reloc_t r = \ + jit_b##N##r##_##T(_jit, JIT_F0, JIT_F1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##i##_##T, L, NaN); \ + jit_movi_##T(_jit, JIT_F0, L); \ + jit_movi_##T(_jit, JIT_F1, NaN); \ + jit_reloc_t r = \ + jit_b##N##r##_##T(_jit, JIT_F0, JIT_F1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} + +#define UBOPF(N, L, R) \ + TUBOPF(N, f, L, R) \ + TUBOPF(N, d, L, R) + +#define ARGB(N, L, R) \ +{ \ + dump_args(N##r, L, R); \ + jit_movi(_jit, JIT_R0, L); \ + jit_movi(_jit, JIT_R1, R); \ + jit_reloc_t r = \ + jit_b##N##r(_jit, JIT_R0, JIT_R1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##i, L, R); \ + jit_movi(_jit, JIT_R0, L); \ + jit_reloc_t r = jit_b##N##i(_jit, JIT_R0, R); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##r_u, R, L); \ + jit_movi(_jit, JIT_R0, R); \ + jit_movi(_jit, JIT_R1, L); \ + jit_reloc_t r = \ + jit_b##N##r_u(_jit, JIT_R0, JIT_R1);\ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##i_u, R, L); \ + jit_movi(_jit, JIT_R0, R); \ + jit_reloc_t r = jit_b##N##i_u(_jit, JIT_R0, L); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} + +#define ARGBS() \ + ARGB(lt, -1, 1) \ + ARGB(le, -1, -1) \ + ARGB(ge, -1, -1) \ + ARGB(gt, 1, -1) \ + +#define OVFGB(N, L, R, U, D) \ +{ \ + dump_args(N##r, L, ); \ + jit_movi(_jit, JIT_R0, L); \ + jit_movi(_jit, JIT_R1, 1); \ + jit_reloc_t r = \ + jit_b##N##r(_jit, JIT_R0, JIT_R1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##r_u, R, ); \ + jit_movi(_jit, JIT_R0, R); \ + jit_movi(_jit, JIT_R1, 1); \ + jit_reloc_t r = \ + jit_b##N##r_u(_jit, JIT_R0, JIT_R1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##i, U, ); \ + jit_movi(_jit, JIT_R0, U); \ + jit_reloc_t r = \ + jit_b##N##i(_jit, JIT_R0, 1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##i_u, D, ); \ + jit_movi(_jit, JIT_R0, D); \ + jit_reloc_t r = \ + jit_b##N##i_u(_jit, JIT_R0, 1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} + +#define OVFGBS() \ + OVFGB(oadd, I7f, Iff, I7f, Iff) \ + OVFGB(xadd, I80, I7f, I80, I7f) \ + OVFGB(osub, I80, 0x0, I80, 0x0) \ + OVFGB(xsub, I81, I80, I81, I80) + +#define MGB(N, L, R) \ +{ \ + dump_args(N##r, L, R); \ + jit_movi(_jit, JIT_R0, L); \ + jit_movi(_jit, JIT_R1, R); \ + jit_reloc_t r = \ + jit_b##N##r(_jit, JIT_R0, JIT_R1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##i, L, R); \ + jit_movi(_jit, JIT_R0, L); \ + jit_reloc_t r = jit_b##N##i(_jit, JIT_R0, R); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} + +#define MBGS() \ + MGB(ms, 1, 3) \ + MGB(mc, 1, 2) \ + MGB(ne, -3, 3) \ + MGB(eq, 3, 3) + +#define ARFGB(N, L, R) \ +{ \ + dump_args(N##r_f, L, R); \ + jit_movi_f(_jit, JIT_F0, L); \ + jit_movi_f(_jit, JIT_F1, R); \ + jit_reloc_t r = \ + jit_b##N##r_f(_jit, JIT_F0, JIT_F1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} \ +{ \ + dump_args(N##i_f, L, R); \ + jit_movi_f(_jit, JIT_F0, L); \ + jit_movi_f(_jit, JIT_F1, R); \ + jit_reloc_t r = \ + jit_b##N##r_f(_jit, JIT_F0, JIT_F1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} + +#define ARFGBS() \ + ARFGB(lt, 1, 2) \ + ARFGB(le, -1, -1) \ + ARFGB(ge, -3, -3) \ + ARFGB(gt, 2, 1) \ + ARFGB(eq, -2, -2) \ + ARFGB(ne, 0, 2) + +static size_t +run_test(jit_state_t *_jit, uint8_t *arena_base, size_t arena_size) +{ + void (*function)(); + jit_begin(_jit, arena_base, arena_size); + size_t frame = jit_enter_jit_abi(_jit, 3, 0, 0); + + ARGBS(); + MBGS(); + OVFGBS(); + ARFGBS(); + + BOPI(lt, -1, 1, 1, -1) + BOPI(le, -1, -1, 1, 1) + EBI(eq, 32, 32) + BOPI(ge, -2, -2, 2, 2) + BOPI(gt, 2, -2, -2, 2) + EBI(ne, 3, -3) + XEBI(ms, 1, 3) + XEBI(mc, 1, 2) + XBOPI(oadd, I7f, 1, Iff, 1) + XBOPI(xadd, I80, 1, I7f, 1) + XBOPI(osub, I80, 1, 0, 1) + XBOPI(xsub, I81, 1, I80, 1) + BOPF(lt, 1, 2) + BOPF(le, 2, 2) + BOPF(eq, 3, 3) + BOPF(ge, 3, 3) + BOPF(gt, 4, 3) + UBOPF(ne, 4, 3) + UBOPF(unlt, 1, 2) + UBOPF(unle, 2, 2) + UBOPF(uneq, 3, 3) + UBOPF(unge, 3, 3) + UBOPF(ungt, 4, 3) + BOPF(ltgt, 4, 3) + + { + dump_args(ordr, 5, 5); + jit_movi_f(_jit, JIT_F0, 5); + jit_movi_f(_jit, JIT_F1, 5); + jit_reloc_t r = jit_bordr_f(_jit, JIT_F0, JIT_F1); + jit_calli_0(_jit, abort); + jit_patch_here(_jit, r); + } + { + dump_args(ordr, 5, 1); + jit_movi_f(_jit, JIT_F0, 5); + jit_movi_f(_jit, JIT_F1, 1); + jit_reloc_t r = jit_bordr_f(_jit, JIT_F0, JIT_F1); + jit_calli_0(_jit, abort); + jit_patch_here(_jit, r); + } + { + dump_args(ordr, 5, NaN); + jit_movi_f(_jit, JIT_F0, 5); + jit_movi_f(_jit, JIT_F1, NaN); + jit_reloc_t err = jit_bordr_f(_jit, JIT_F0, JIT_F1); + jit_reloc_t ok = jit_jmp(_jit); + jit_patch_here(_jit, err); + jit_calli_0(_jit, abort); + jit_patch_here(_jit, ok); + } + { + dump_args(unordr, 5, 5); + jit_movi_f(_jit, JIT_F0, 5); + jit_movi_f(_jit, JIT_F1, 5); + jit_reloc_t err = jit_bunordr_f(_jit, JIT_F0, JIT_F1); + jit_reloc_t ok = jit_jmp(_jit); + jit_patch_here(_jit, err); + jit_calli_0(_jit, abort); + jit_patch_here(_jit, ok); + } + { + dump_args(unordr, 5, 1); + jit_movi_f(_jit, JIT_F0, 5); + jit_movi_f(_jit, JIT_F1, 1); + jit_reloc_t err = jit_bunordr_f(_jit, JIT_F0, JIT_F1); + jit_reloc_t ok = jit_jmp(_jit); + jit_patch_here(_jit, err); + jit_calli_0(_jit, abort); + jit_patch_here(_jit, ok); + } + { + dump_args(unordr, 5, NaN); + jit_movi_f(_jit, JIT_F0, 5); + jit_movi_f(_jit, JIT_F1, NaN); + jit_reloc_t r = jit_bunordr_f(_jit, JIT_F0, JIT_F1); + jit_calli_0(_jit, abort); + jit_patch_here(_jit, r); + } + + jit_leave_jit_abi(_jit, 3, 0, frame); + jit_ret(_jit); + + size_t size; + function = jit_end(_jit, &size); + + if (function) + (*function)(); + else + return size; + + return 0; +} + +int main(int argc, char *argv[]) +{ + return main_compiler(argc, argv, run_test); +} diff --git a/deps/lightening/tests/z_call.c b/deps/lightening/tests/z_call.c new file mode 100644 index 0000000..be1c072 --- /dev/null +++ b/deps/lightening/tests/z_call.c @@ -0,0 +1,307 @@ +#include "test.h" + +#define operand_c JIT_OPERAND_ABI_INT8 +#define operand_s JIT_OPERAND_ABI_INT16 +#define operand_i JIT_OPERAND_ABI_INT32 +#define operand_uc JIT_OPERAND_ABI_UINT8 +#define operand_us JIT_OPERAND_ABI_UINT16 +#define operand_ui JIT_OPERAND_ABI_UINT32 +#define operand_l JIT_OPERAND_ABI_INT64 +#define operand_f JIT_OPERAND_ABI_FLOAT +#define operand_d JIT_OPERAND_ABI_DOUBLE + +#define def_wi(i) \ + void *_w##i = jit_address(_jit); \ +{ \ + size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); \ + jit_load_args_1(_jit, jit_operand_gpr(operand##i, JIT_R0)); \ + jit_leave_jit_abi(_jit, 0, 0, frame); \ + jit_retr(_jit, JIT_R0); \ +} + +#define def_wf(f) \ + void *_w##f = jit_address(_jit); \ +{ \ + size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); \ + jit_load_args_1(_jit, jit_operand_fpr(operand##f, JIT_F0)); \ + jit_truncr##f(_jit, JIT_R0, JIT_F0); \ + jit_leave_jit_abi(_jit, 0, 0, frame); \ + jit_retr(_jit, JIT_R0); \ +} + +#define def_fi(f, i) \ + void *f##i = jit_address(_jit); \ +{ \ + size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); \ + jit_load_args_1(_jit, jit_operand_gpr(operand##i, JIT_R0)); \ + jit_extr##f(_jit, JIT_F0, JIT_R0); \ + jit_leave_jit_abi(_jit, 0, 0, frame); \ + jit_retr##f(_jit, JIT_F0); \ +} + +#define def_f(f) \ + void *f##f = jit_address(_jit); \ +{ \ + size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); \ + jit_load_args_1(_jit, jit_operand_fpr(operand##f, JIT_F0)); \ + jit_leave_jit_abi(_jit, 0, 0, frame); \ + jit_retr##f(_jit, JIT_F0); \ +} + +#define def_ff(f, g) \ + void *f##g = jit_address(_jit); \ +{ \ + size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); \ + jit_load_args_1(_jit, jit_operand_fpr(operand##f, JIT_F0)); \ + jit_extr##f##g(_jit, JIT_F0, JIT_F0); \ + jit_leave_jit_abi(_jit, 0, 0, frame); \ + jit_retr##g(_jit, JIT_F0); \ +} + +#if defined(DEBUG) +#define dump_args(n, f, i, a, r) \ + jit_calli_1(_jit, puts, \ + jit_operand_imm(JIT_OPERAND_ABI_POINTER, \ + (jit_imm_t)#n " " #f " " #i " " #a " " #r)) +#else +#define dump_args(n, f, i, a, r) +#endif + +#define _call_w(n, i, a, r) \ +{ \ + dump_args(n, , i, a, r); \ + jit_calli_1(_jit, _w##i, jit_operand_imm(operand##i, a)); \ + jit_retval(_jit, JIT_R0); \ + jit_extr##i(_jit, JIT_R0, JIT_R0); \ + jit_reloc_t ok = jit_beqi(_jit, JIT_R0, r); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, ok); \ +} +#define call_w(n, i, a, r) _call_w(n, i, a, r) + +#define _call_wf(n, f, a, r) \ +{ \ + dump_args(n, f, , a, r); \ + jit_movi##f(_jit, JIT_F0, (long long)a); \ + jit_calli_1(_jit, _w##f, jit_operand_fpr(operand##f, JIT_F0)); \ + jit_retval(_jit, JIT_R0); \ + jit_extr##f(_jit, JIT_F0, JIT_R0); \ + jit_movi##f(_jit, JIT_F1, r); \ + jit_reloc_t ok = jit_beqr##f(_jit, JIT_F0, JIT_F1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, ok); \ +} +#define call_wf(n, f, a, r) _call_wf(n, f, a, r) + +#define _call_fi(n, f, i, a, r) \ +{ \ + dump_args(n, f, i, a, r); \ + jit_calli_1(_jit, f##i, jit_operand_imm(operand##i, a)); \ + jit_retval##f(_jit, JIT_F0); \ + jit_movi##f(_jit, JIT_F1, r); \ + jit_reloc_t ok = jit_beqr##f(_jit, JIT_F0, JIT_F1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, ok); \ +} +#define call_fi(n, f, i, a, r) _call_fi(n, f, i, a, r) + +#define _call_f(n, f, a, r) \ +{ \ + dump_args(n, f, , a, r); \ + jit_movi##f(_jit, JIT_F0, a); \ + jit_calli_1(_jit, f##f, jit_operand_fpr(operand##f, JIT_F0)); \ + jit_retval##f(_jit, JIT_F0); \ + jit_movi##f(_jit, JIT_F1, r); \ + jit_reloc_t ok = jit_beqr##f(_jit, JIT_F0, JIT_F1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, ok); \ +} +#define call_f(n, f, a, r) _call_f(n, f, a, r) + +#define _call_ff(n, f, g, a, r) \ +{ \ + dump_args(n, f, g, a, r); \ + jit_movi##f(_jit, JIT_F0, a); \ + jit_calli_1(_jit, f##g, jit_operand_fpr(operand##f, JIT_F0)); \ + jit_retval##g(_jit, JIT_F0); \ + jit_movi##g(_jit, JIT_F1, r); \ + jit_reloc_t ok = jit_beqr##g(_jit, JIT_F0, JIT_F1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, ok); \ +} +#define call_ff(n, f, g, a, r) _call_ff(n, f, g, a, r) + +#define c7f (int8_t)0x7f +#define c80 (int8_t)0x80 +#define c81 (int8_t)0x81 +#define cff (int8_t)0xff +#define s7f (int16_t)0x7fff +#define s80 (int16_t)0x8000 +#define s81 (int16_t)0x8001 +#define i7f (int32_t)0x7fffffff +#define i80 (int32_t)0x80000000 +#define i81 (int32_t)0x80000001 +#define iff (int32_t)0xffffffff +#define l7f (int64_t)0x7fffffffffffffff +#define l80 (int64_t)0x8000000000000000 +#define l81 (int64_t)0x8000000000000001 + +#define uc7f (uint8_t)0x7f +#define uc80 (uint8_t)0x80 +#define uc81 (uint8_t)0x81 +#define ucff (uint8_t)0xff +#define us7f (uint16_t)0x7fff +#define us80 (uint16_t)0x8000 +#define us81 (uint16_t)0x8001 +#define ui7f (uint32_t)0x7fffffff +#define ui80 (uint32_t)0x80000000 +#define ui81 (uint32_t)0x80000001 +#define uiff (uint32_t)0xffffffff +#define ul7f (uint64_t)0x7fffffffffffffff +#define ul80 (uint64_t)0x8000000000000000 +#define ul81 (uint64_t)0x8000000000000001 + +#define f7f 127.0 +#define f80 -128.0 +#define f81 -127.0 +#define uf80 128.0 +#define uf81 127.0 + +#if __WORDSIZE == 32 +# define wc80 (long)0xffffff80 +# define wc81 (long)0xffffff81 +# define ws80 (long)0xffff8000 +# define ws81 (long)0xffff8001 +#else +# define wc80 (long)0xffffffffffffff80 +# define wc81 (long)0xffffffffffffff81 +# define ws80 (long)0xffffffffffff8000 +# define ws81 (long)0xffffffffffff8001 +# define wi80 (long)0xffffffff80000000 +# define wi81 (long)0xffffffff80000001 +#endif + +static size_t +run_test(jit_state_t *_jit, uint8_t *arena_base, size_t arena_size) +{ + void (*function)(); + jit_begin(_jit, arena_base, arena_size); + jit_reloc_t main = jit_jmp(_jit); + + def_wi(_c) + def_wi(_uc) + def_wi(_s) + def_wi(_us) +#if __WORDSIZE == 64 + def_wi(_i) + def_wi(_ui) +#endif + def_wf(_f) + def_wf(_d) + def_fi(_f, _c) + def_fi(_f, _uc) + def_fi(_f, _s) + def_fi(_f, _us) + def_fi(_f, _i) +#if __WORDSIZE == 64 + def_fi(_f, _ui) + def_fi(_f, _l) +#endif + def_fi(_d, _c) + def_fi(_d, _uc) + def_fi(_d, _s) + def_fi(_d, _us) + def_fi(_d, _i) +#if __WORDSIZE == 64 + def_fi(_d, _ui) + def_fi(_d, _l) +#endif + def_f(_f) + def_f(_d) + def_ff(_f, _d) + def_ff(_d, _f) + + jit_patch_here(_jit, main); + size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); + + call_w(__LINE__, _c, c7f, c7f) + call_w(__LINE__, _c, c80, wc80) + call_w(__LINE__, _c, c81, wc81) + call_w(__LINE__, _uc, uc7f, c7f) + call_w(__LINE__, _uc, uc80, uc80) + call_w(__LINE__, _uc, uc81, uc81) + call_w(__LINE__, _s, s7f, s7f) + call_w(__LINE__, _s, s80, ws80) + call_w(__LINE__, _s, s81, ws81) + call_w(__LINE__, _us, us7f, us7f) + call_w(__LINE__, _us, us80, us80) + call_w(__LINE__, _us, us81, us81) +#if __WORDSIZE == 64 + call_w(__LINE__, _i, i7f, i7f) + call_w(__LINE__, _i, i80, wi80) + call_w(__LINE__, _i, i81, wi81) + call_w(__LINE__, _ui, ui7f, ui7f) + call_w(__LINE__, _ui, ui80, ui80) + call_w(__LINE__, _ui, ui81, ui81) +#endif + call_wf(__LINE__, _f, c7f, f7f) + call_wf(__LINE__, _f, wc80, f80) + call_wf(__LINE__, _f, wc81, f81) + call_wf(__LINE__, _d, c7f, f7f) + call_wf(__LINE__, _d, wc80, f80) + call_wf(__LINE__, _d, wc81, f81) + call_fi(__LINE__, _f, _c, c7f, f7f) + call_fi(__LINE__, _f, _c, c80, f80) + call_fi(__LINE__, _f, _uc, uc7f, f7f) + call_fi(__LINE__, _f, _uc, uc80, uf80) + call_fi(__LINE__, _f, _s, c7f, f7f) + call_fi(__LINE__, _f, _s, uc80, uf80) + call_fi(__LINE__, _f, _us, uc7f, f7f) + call_fi(__LINE__, _f, _us, uc80, uf80) + call_fi(__LINE__, _f, _i, c7f, f7f) + call_fi(__LINE__, _f, _i, uc80, uf80) +#if __WORDSIZE == 64 + call_fi(__LINE__, _f, _ui, uc7f, f7f) + call_fi(__LINE__, _f, _ui, uc80, uf80) + call_fi(__LINE__, _f, _l, c7f, f7f) + call_fi(__LINE__, _f, _l, uc80, uf80) +#endif + call_fi(__LINE__, _d, _c, c7f, f7f) + call_fi(__LINE__, _d, _c, c80, f80) + call_fi(__LINE__, _d, _uc, uc7f, f7f) + call_fi(__LINE__, _d, _uc, uc80, uf80) + call_fi(__LINE__, _d, _s, c7f, f7f) + call_fi(__LINE__, _d, _s, uc80, uf80) + call_fi(__LINE__, _d, _us, uc7f, f7f) + call_fi(__LINE__, _d, _us, uc80, uf80) + call_fi(__LINE__, _d, _i, c7f, f7f) + call_fi(__LINE__, _d, _i, uc80, uf80) +#if __WORDSIZE == 64 + call_fi(__LINE__, _d, _ui, uc7f, f7f) + call_fi(__LINE__, _d, _ui, uc80, uf80) + call_fi(__LINE__, _d, _l, c7f, f7f) + call_fi(__LINE__, _d, _l, uc80, uf80) +#endif + call_f(__LINE__, _f, f7f, f7f) + call_f(__LINE__, _d, f7f, f7f) + call_ff(__LINE__, _f, _d, f80, f80) + call_ff(__LINE__, _d, _f, f81, f81) + + jit_leave_jit_abi(_jit, 0, 0, frame); + jit_ret(_jit); + + size_t size = 0; + function = jit_end(_jit, &size); + + if (function) + (*function)(); + else + return size; + + return 0; +} + +int main(int argc, char *argv[]) +{ + return main_compiler(argc, argv, run_test); +} diff --git a/deps/lightening/tests/z_ccall.c b/deps/lightening/tests/z_ccall.c new file mode 100644 index 0000000..ef2b17e --- /dev/null +++ b/deps/lightening/tests/z_ccall.c @@ -0,0 +1,1000 @@ +#include "test.h" + +#define _QUOTE(x) #x +#define QUOTE(x) _QUOTE(x) + +#if defined(DEBUG) +#define dump_args(x) puts(x) +#else +#define dump_args(x) +#endif + +#define _w0 0 +#define _w1 1 +#define _w2 (_w1-2) +#define _w3 (_w2-3) +#define _w4 (_w3-4) +#define _w5 (_w4-5) +#define _w6 (_w5-6) +#define _w7 (_w6-7) +#define _w8 (_w7-8) +#define _w9 (_w8-9) +#define _w10 (_w9-10) +#define _w11 (_w10-11) +#define _w12 (_w11-12) +#define _w13 (_w12-13) +#define _w14 (_w13-14) +#define _w15 (_w14-15) +#define _c0 _w0 +#define _c1 _w1 +#define _c2 _w2 +#define _c3 _w3 +#define _c4 _w4 +#define _c5 _w5 +#define _c6 _w6 +#define _c7 _w7 +#define _c8 _w8 +#define _c9 _w9 +#define _c10 _w10 +#define _c11 _w11 +#define _c12 _w12 +#define _c13 _w13 +#define _c14 _w14 +#define _c15 _w15 +#define _uc0 (_w0&0xff) +#define _uc1 (_w1&0xff) +#define _uc2 (_w2&0xff) +#define _uc3 (_w3&0xff) +#define _uc4 (_w4&0xff) +#define _uc5 (_w5&0xff) +#define _uc6 (_w6&0xff) +#define _uc7 (_w7&0xff) +#define _uc8 (_w8&0xff) +#define _uc9 (_w9&0xff) +#define _uc10 (_w10&0xff) +#define _uc11 (_w11&0xff) +#define _uc12 (_w12&0xff) +#define _uc13 (_w13&0xff) +#define _uc14 (_w14&0xff) +#define _uc15 (_w15&0xff) +#define _s0 _w0 +#define _s1 _w1 +#define _s2 _w2 +#define _s3 _w3 +#define _s4 _w4 +#define _s5 _w5 +#define _s6 _w6 +#define _s7 _w7 +#define _s8 _w8 +#define _s9 _w9 +#define _s10 _w10 +#define _s11 _w11 +#define _s12 _w12 +#define _s13 _w13 +#define _s14 _w14 +#define _s15 _w15 +#define _us0 (_w0&0xffff) +#define _us1 (_w1&0xffff) +#define _us2 (_w2&0xffff) +#define _us3 (_w3&0xffff) +#define _us4 (_w4&0xffff) +#define _us5 (_w5&0xffff) +#define _us6 (_w6&0xffff) +#define _us7 (_w7&0xffff) +#define _us8 (_w8&0xffff) +#define _us9 (_w9&0xffff) +#define _us10 (_w10&0xffff) +#define _us11 (_w11&0xffff) +#define _us12 (_w12&0xffff) +#define _us13 (_w13&0xffff) +#define _us14 (_w14&0xffff) +#define _us15 (_w15&0xffff) +#define _i0 _w0 +#define _i1 _w1 +#define _i2 _w2 +#define _i3 _w3 +#define _i4 _w4 +#define _i5 _w5 +#define _i6 _w6 +#define _i7 _w7 +#define _i8 _w8 +#define _i9 _w9 +#define _i10 _w10 +#define _i11 _w11 +#define _i12 _w12 +#define _i13 _w13 +#define _i14 _w14 +#define _i15 _w15 +#if __WORDSIZE == 64 +# define _ui0 (_w0&0xffffffff) +# define _ui1 (_w1&0xffffffff) +# define _ui2 (_w2&0xffffffff) +# define _ui3 (_w3&0xffffffff) +# define _ui4 (_w4&0xffffffff) +# define _ui5 (_w5&0xffffffff) +# define _ui6 (_w6&0xffffffff) +# define _ui7 (_w7&0xffffffff) +# define _ui8 (_w8&0xffffffff) +# define _ui9 (_w9&0xffffffff) +# define _ui10 (_w10&0xffffffff) +# define _ui11 (_w11&0xffffffff) +# define _ui12 (_w12&0xffffffff) +# define _ui13 (_w13&0xffffffff) +# define _ui14 (_w14&0xffffffff) +# define _ui15 (_w15&0xffffffff) +# define _l0 _w0 +# define _l1 _w1 +# define _l2 _w2 +# define _l3 _w3 +# define _l4 _w4 +# define _l5 _w5 +# define _l6 _w6 +# define _l7 _w7 +# define _l8 _w8 +# define _l9 _w9 +# define _l10 _w10 +# define _l11 _w11 +# define _l12 _w12 +# define _l13 _w13 +# define _l14 _w14 +# define _l15 _w15 +#endif + +/* + * Types + */ +typedef signed char _c; +typedef unsigned char _uc; +typedef signed short _s; +typedef unsigned short _us; +typedef signed int _i; +#if __WORDSIZE == 64 +typedef unsigned int _ui; +typedef jit_word_t _l; +#endif +typedef float _f; +typedef double _d; + +#define prt0(T) T C##T##0(void); +#define prt1(T) prt0(T) \ + T C##T##1(T); +#define prt2(T) prt1(T) \ + T C##T##2(T,T); +#define prt3(T) prt2(T) \ + T C##T##3(T,T,T); +#define prt4(T) prt3(T) \ + T C##T##4(T,T,T,T); +#define prt5(T) prt4(T) \ + T C##T##5(T,T,T,T,T); +#define prt6(T) prt5(T) \ + T C##T##6(T,T,T,T,T,T); +#define prt7(T) prt6(T) \ + T C##T##7(T,T,T,T,T,T,T); +#define prt8(T) prt7(T) \ + T C##T##8(T,T,T,T,T,T,T,T); +#define prt9(T) prt8(T) \ + T C##T##9(T,T,T,T,T,T,T,T,T); +#define prt10(T) prt9(T) \ + T C##T##10(T,T,T,T,T,T,T,T,T,T); +#define prt11(T) prt10(T) \ + T C##T##11(T,T,T,T,T,T,T,T,T,T,T); +#define prt12(T) prt11(T) \ + T C##T##12(T,T,T,T,T,T,T,T,T,T,T,T); +#define prt13(T) prt12(T) \ + T C##T##13(T,T,T,T,T,T,T,T,T,T,T,T,T); +#define prt14(T) prt13(T) \ + T C##T##14(T,T,T,T,T,T,T,T,T,T,T,T,T,T); +#define prt15(T) prt14(T) \ + T C##T##15(T,T,T,T,T,T,T,T,T,T,T,T,T,T,T); +#define prt(T) prt15(T) +prt(_c) +prt(_uc) +prt(_s) +prt(_us) +prt(_i) +#if __WORDSIZE == 64 +prt(_ui) +prt(_l) +#endif +prt(_f) +prt(_d) +#undef prt +#undef prt15 +#undef prt14 +#undef prt13 +#undef prt12 +#undef prt11 +#undef prt10 +#undef prt9 +#undef prt8 +#undef prt7 +#undef prt6 +#undef prt5 +#undef prt4 +#undef prt3 +#undef prt2 +#undef prt1 +#undef prt0 + +#define prtn(N,T) T J##T##n(void); +#define prt0(T) prtn(0,T) +#define prt1(T) prt0(T) prtn(1,T) +#define prt2(T) prt1(T) prtn(2,T) +#define prt3(T) prt2(T) prtn(3,T) +#define prt4(T) prt3(T) prtn(4,T) +#define prt5(T) prt4(T) prtn(5,T) +#define prt6(T) prt5(T) prtn(6,T) +#define prt7(T) prt6(T) prtn(7,T) +#define prt8(T) prt7(T) prtn(8,T) +#define prt9(T) prt8(T) prtn(9,T) +#define prt10(T) prt9(T) prtn(10,T) +#define prt11(T) prt10(T) prtn(11,T) +#define prt12(T) prt11(T) prtn(12,T) +#define prt13(T) prt12(T) prtn(13,T) +#define prt14(T) prt13(T) prtn(14,T) +#define prt15(T) prt14(T) prtn(15,T) +#define prt(T) prt15(T) +prt(_c) +prt(_uc) +prt(_s) +prt(_us) +prt(_i) +#if __WORDSIZE == 64 +prt(_ui) +prt(_l) +#endif +prt(_f) +prt(_d) +#undef prt +#undef prt15 +#undef prt14 +#undef prt13 +#undef prt12 +#undef prt11 +#undef prt10 +#undef prt9 +#undef prt8 +#undef prt7 +#undef prt6 +#undef prt5 +#undef prt4 +#undef prt3 +#undef prt2 +#undef prt1 +#undef prt0 +#undef prtn + +/* + * Initialization + */ + +#define dat0(T) T (*j##T##0)(void); + +#define dat1(T) dat0(T) \ + T (*j##T##1)(T); + +#define dat2(T) dat1(T) \ + T (*j##T##2)(T,T); + +#define dat3(T) dat2(T) \ + T (*j##T##3)(T,T,T); + +#define dat4(T) dat3(T) \ + T (*j##T##4)(T,T,T,T); + +#define dat5(T) dat4(T) \ + T (*j##T##5)(T,T,T,T,T); + +#define dat6(T) dat5(T) \ + T (*j##T##6)(T,T,T,T,T,T); + +#define dat7(T) dat6(T) \ + T (*j##T##7)(T,T,T,T,T,T,T); + +#define dat8(T) dat7(T) \ + T (*j##T##8)(T,T,T,T,T,T,T,T); + +#define dat9(T) dat8(T) \ + T (*j##T##9)(T,T,T,T,T,T,T,T,T); + +#define dat10(T) dat9(T) \ + T (*j##T##10)(T,T,T,T,T,T,T,T,T,T); + +#define dat11(T) dat10(T) \ + T (*j##T##11)(T,T,T,T,T,T,T,T,T,T,T); + +#define dat12(T) dat11(T) \ + T (*j##T##12)(T,T,T,T,T,T,T,T,T,T,T,T); + +#define dat13(T) dat12(T) \ + T (*j##T##13)(T,T,T,T,T,T,T,T,T,T,T,T,T); + +#define dat14(T) dat13(T) \ + T (*j##T##14)(T,T,T,T,T,T,T,T,T,T,T,T,T,T); + +#define dat15(T) dat14(T) \ + T (*j##T##15)(T,T,T,T,T,T,T,T,T,T,T,T,T,T,T); + +#define dat(T) dat15(T) +dat(_c) +dat(_uc) +dat(_s) +dat(_us) +dat(_i) +#if __WORDSIZE == 64 +dat(_ui) +dat(_l) +#endif +dat(_f) +dat(_d) +#undef dat +#undef dat15 +#undef dat14 +#undef dat13 +#undef dat12 +#undef dat11 +#undef dat10 +#undef dat9 +#undef dat8 +#undef dat7 +#undef dat6 +#undef dat5 +#undef dat4 +#undef dat3 +#undef dat2 +#undef dat1 +#undef dat0 + +/* + * Implementation + */ +#define dcl0(T) \ +T C##T##0(void) \ +{ \ + dump_args(QUOTE(C##T##0));\ + return (0); \ +} +#define dcl1(T) \ +dcl0(T) \ +T C##T##1(T A) \ +{ \ + dump_args(QUOTE(C##T##1));\ + return (A); \ +} +#define dcl2(T) \ +dcl1(T) \ +T C##T##2(T A,T B) \ +{ \ + dump_args(QUOTE(C##T##2));\ + return (A-B); \ +} +#define dcl3(T) \ +dcl2(T) \ +T C##T##3(T A,T B,T C) \ +{ \ + dump_args(QUOTE(C##T##3));\ + return (A-B-C); \ +} +#define dcl4(T) \ +dcl3(T) \ +T C##T##4(T A,T B,T C,T D) \ +{ \ + dump_args(QUOTE(C##T##4));\ + return (A-B-C-D); \ +} +#define dcl5(T) \ +dcl4(T) \ +T C##T##5(T A,T B,T C,T D,T E) \ +{ \ + dump_args(QUOTE(C##T##5));\ + return (A-B-C-D-E); \ +} +#define dcl6(T) \ +dcl5(T) \ +T C##T##6(T A,T B,T C,T D,T E,T F) \ +{ \ + dump_args(QUOTE(C##T##6));\ + return (A-B-C-D-E-F); \ +} +#define dcl7(T) \ +dcl6(T) \ +T C##T##7(T A,T B,T C,T D,T E,T F,T G) \ +{ \ + dump_args(QUOTE(C##T##7));\ + return (A-B-C-D-E-F-G); \ +} +#define dcl8(T) \ +dcl7(T) \ +T C##T##8(T A,T B,T C,T D,T E,T F,T G,T H) \ +{ \ + dump_args(QUOTE(C##T##8));\ + return (A-B-C-D-E-F-G-H); \ +} +#define dcl9(T) \ +dcl8(T) \ +T C##T##9(T A,T B,T C,T D,T E,T F,T G,T H,T I) \ +{ \ + dump_args(QUOTE(C##T##9));\ + return (A-B-C-D-E-F-G-H-I); \ +} +#define dcl10(T) \ +dcl9(T) \ +T C##T##10(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J) \ +{ \ + dump_args(QUOTE(C##T##10));\ + return (A-B-C-D-E-F-G-H-I-J); \ +} +#define dcl11(T) \ +dcl10(T) \ +T C##T##11(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K) \ +{ \ + dump_args(QUOTE(C##T##11));\ + return (A-B-C-D-E-F-G-H-I-J-K); \ +} +#define dcl12(T) \ +dcl11(T) \ +T C##T##12(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K,T L) \ +{ \ + dump_args(QUOTE(C##T##12));\ + return (A-B-C-D-E-F-G-H-I-J-K-L); \ +} +#define dcl13(T) \ +dcl12(T) \ +T C##T##13(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K,T L,T M) \ +{ \ + dump_args(QUOTE(C##T##13));\ + return (A-B-C-D-E-F-G-H-I-J-K-L-M); \ +} +#define dcl14(T) \ +dcl13(T) \ +T C##T##14(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K,T L,T M,T N) \ +{ \ + dump_args(QUOTE(C##T##14));\ + return (A-B-C-D-E-F-G-H-I-J-K-L-M-N); \ +} +#define dcl15(T) \ +dcl14(T) \ +T C##T##15(T A,T B,T C,T D,T E,T F,T G,T H,T I,T J,T K,T L,T M,T N,T O) \ +{ \ + dump_args(QUOTE(C##T##15));\ + return (A-B-C-D-E-F-G-H-I-J-K-L-M-N-O); \ +} +#define dcl(T) dcl15(T) +dcl(_c) +dcl(_uc) +dcl(_s) +dcl(_us) +dcl(_i) +#if __WORDSIZE == 64 +dcl(_ui) +dcl(_l) +#endif +dcl(_f) +dcl(_d) +#undef dcl +#undef dcl15 +#undef dcl14 +#undef dcl13 +#undef dcl12 +#undef dcl11 +#undef dcl10 +#undef dcl9 +#undef dcl8 +#undef dcl7 +#undef dcl6 +#undef dcl5 +#undef dcl4 +#undef dcl3 +#undef dcl2 +#undef dcl1 +#undef dcl0 + +#define dcl0(T) \ +T CJ##T##0(void) \ +{ \ + dump_args(QUOTE(CJ##T##0));\ + return ((*j##T##0)()); \ +} +#define dcl1(T) \ +dcl0(T) \ +T CJ##T##1(void) \ +{ \ + dump_args(QUOTE(CJ##T##1));\ + return ((*j##T##1)(1)); \ +} +#define dcl2(T) \ +dcl1(T) \ +T CJ##T##2(void) \ +{ \ + dump_args(QUOTE(CJ##T##2));\ + return ((*j##T##2)(1,2)); \ +} +#define dcl3(T) \ +dcl2(T) \ +T CJ##T##3(void) \ +{ \ + dump_args(QUOTE(CJ##T##3));\ + return ((*j##T##3)(1,2,3)); \ +} +#define dcl4(T) \ +dcl3(T) \ +T CJ##T##4(void) \ +{ \ + dump_args(QUOTE(CJ##T##4));\ + return ((*j##T##4)(1,2,3,4)); \ +} +#define dcl5(T) \ +dcl4(T) \ +T CJ##T##5(void) \ +{ \ + dump_args(QUOTE(CJ##T##5));\ + return ((*j##T##5)(1,2,3,4,5)); \ +} +#define dcl6(T) \ +dcl5(T) \ +T CJ##T##6(void) \ +{ \ + dump_args(QUOTE(CJ##T##6));\ + return ((*j##T##6)(1,2,3,4,5,6)); \ +} +#define dcl7(T) \ +dcl6(T) \ +T CJ##T##7(void) \ +{ \ + dump_args(QUOTE(CJ##T##7));\ + return ((*j##T##7)(1,2,3,4,5,6,7)); \ +} +#define dcl8(T) \ +dcl7(T) \ +T CJ##T##8(void) \ +{ \ + dump_args(QUOTE(CJ##T##8));\ + return ((*j##T##8)(1,2,3,4,5,6,7,8)); \ +} +#define dcl9(T) \ +dcl8(T) \ +T CJ##T##9(void) \ +{ \ + dump_args(QUOTE(CJ##T##9));\ + return ((*j##T##9)(1,2,3,4,5,6,7,8,9)); \ +} +#define dcl10(T) \ +dcl9(T) \ +T CJ##T##10(void) \ +{ \ + dump_args(QUOTE(CJ##T##10));\ + return ((*j##T##10)(1,2,3,4,5,6,7,8,9,10)); \ +} +#define dcl11(T) \ +dcl10(T) \ +T CJ##T##11(void) \ +{ \ + dump_args(QUOTE(CJ##T##11));\ + return ((*j##T##11)(1,2,3,4,5,6,7,8,9,10,11)); \ +} +#define dcl12(T) \ +dcl11(T) \ +T CJ##T##12(void) \ +{ \ + dump_args(QUOTE(CJ##T##12));\ + return ((*j##T##12)(1,2,3,4,5,6,7,8,9,10,11,12)); \ +} +#define dcl13(T) \ +dcl12(T) \ +T CJ##T##13(void) \ +{ \ + dump_args(QUOTE(CJ##T##13));\ + return ((*j##T##13)(1,2,3,4,5,6,7,8,9,10,11,12,13)); \ +} +#define dcl14(T) \ +dcl13(T) \ +T CJ##T##14(void) \ +{ \ + dump_args(QUOTE(CJ##T##14));\ + return ((*j##T##14)(1,2,3,4,5,6,7,8,9,10,11,12,13,14)); \ +} +#define dcl15(T) \ +dcl14(T) \ +T CJ##T##15(void) \ +{ \ + dump_args(QUOTE(CJ##T##15));\ + return ((*j##T##15)(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15)); \ +} +#define dcl(t) dcl15(t) +dcl(_c) +dcl(_uc) +dcl(_s) +dcl(_us) +dcl(_i) +#if __WORDSIZE == 64 +dcl(_ui) +dcl(_l) +#endif +dcl(_f) +dcl(_d) +#undef dcl +#undef dcl15 +#undef dcl14 +#undef dcl13 +#undef dcl12 +#undef dcl11 +#undef dcl10 +#undef dcl9 +#undef dcl8 +#undef dcl7 +#undef dcl6 +#undef dcl5 +#undef dcl4 +#undef dcl3 +#undef dcl2 +#undef dcl1 +#undef dcl0 + +size_t +run_test(jit_state_t *_jit, uint8_t *code_base, size_t code_size) +{ + jit_reloc_t jmpi_main; + void (*function)(void); + jit_reloc_t jmp; + + jit_begin(_jit, code_base, code_size); + + jmpi_main = jit_jmp(_jit); + +#define calc(B,T,R,O)\ + jit_movr##B(_jit, R##1, R##0);\ + jit_ldxi##T(_jit, R##0, JIT_SP, 8 * O);\ + jit_subr##B(_jit, R##0, R##1, R##0); + +#define get0(B,T,R) jit_movi##B(_jit, R##0, 0); +#define get1(B,T,R) jit_ldxi##T(_jit, R##0, JIT_SP, 8 * 0); +#define get2(B,T,R) \ + get1(B,T,R); \ + calc(B,T,R,1); +#define get3(B,T,R) \ + get2(B,T,R); \ + calc(B,T,R,2); +#define get4(B,T,R) \ + get3(B,T,R); \ + calc(B,T,R,3); +#define get5(B,T,R) \ + get4(B,T,R); \ + calc(B,T,R,4); +#define get6(B,T,R) \ + get5(B,T,R); \ + calc(B,T,R,5); +#define get7(B,T,R) \ + get6(B,T,R); \ + calc(B,T,R,6); +#define get8(B,T,R) \ + get7(B,T,R); \ + calc(B,T,R,7); +#define get9(B,T,R) \ + get8(B,T,R); \ + calc(B,T,R,8); +#define get10(B,T,R) \ + get9(B,T,R); \ + calc(B,T,R,9); +#define get11(B,T,R) \ + get10(B,T,R); \ + calc(B,T,R,10); +#define get12(B,T,R) \ + get11(B,T,R); \ + calc(B,T,R,11); +#define get13(B,T,R) \ + get12(B,T,R); \ + calc(B,T,R,12); +#define get14(B,T,R) \ + get13(B,T,R); \ + calc(B,T,R,13); +#define get15(B,T,R) \ + get14(B,T,R); \ + calc(B,T,R,14); + +#if __WORDSIZE == 32 +# define jit_extr_i(_jit, u, v) /**/ +#else +# define jit_extr_l(_jit, u, v) /**/ +#endif + +#if __WORDSIZE == 64 +#define jit_stxi_ui(_jit, u, r0, r1) jit_stxi_i(_jit, u, r0, r1) +#endif +#define jit_stxi_us(_jit, u, r0, r1) jit_stxi_s(_jit, u, r0, r1) +#define jit_stxi_uc(_jit, u, r0, r1) jit_stxi_c(_jit, u, r0, r1) + +#define abi_uc JIT_OPERAND_ABI_UINT8 +#define abi_c JIT_OPERAND_ABI_INT8 +#define abi_us JIT_OPERAND_ABI_UINT16 +#define abi_s JIT_OPERAND_ABI_INT16 +#define abi_ui JIT_OPERAND_ABI_UINT32 +#define abi_i JIT_OPERAND_ABI_INT32 +#define abi_ul JIT_OPERAND_ABI_UINT64 +#define abi_l JIT_OPERAND_ABI_INT64 +#define abi_f JIT_OPERAND_ABI_FLOAT +#define abi_d JIT_OPERAND_ABI_DOUBLE + +#define store0(T) jit_operand_mem(JIT_OPERAND_ABI_UINT8, JIT_SP, 0) +#define store1(T) jit_operand_mem(abi##T, JIT_SP, 0 * 8) +#define store2(T) store1(T), jit_operand_mem(abi##T, JIT_SP, 1 * 8) +#define store3(T) store2(T), jit_operand_mem(abi##T, JIT_SP, 2 * 8) +#define store4(T) store3(T), jit_operand_mem(abi##T, JIT_SP, 3 * 8) +#define store5(T) store4(T), jit_operand_mem(abi##T, JIT_SP, 4 * 8) +#define store6(T) store5(T), jit_operand_mem(abi##T, JIT_SP, 5 * 8) +#define store7(T) store6(T), jit_operand_mem(abi##T, JIT_SP, 6 * 8) +#define store8(T) store7(T), jit_operand_mem(abi##T, JIT_SP, 7 * 8) +#define store9(T) store8(T), jit_operand_mem(abi##T, JIT_SP, 8 * 8) +#define store10(T) store9(T), jit_operand_mem(abi##T, JIT_SP, 9 * 8) +#define store11(T) store10(T), jit_operand_mem(abi##T, JIT_SP, 10 * 8) +#define store12(T) store11(T), jit_operand_mem(abi##T, JIT_SP, 11 * 8) +#define store13(T) store12(T), jit_operand_mem(abi##T, JIT_SP, 12 * 8) +#define store14(T) store13(T), jit_operand_mem(abi##T, JIT_SP, 13 * 8) +#define store15(T) store14(T), jit_operand_mem(abi##T, JIT_SP, 14 * 8) + +// Placeholder, won't actually be used. +#define load0(T) jit_operand_mem(JIT_OPERAND_ABI_INT8, JIT_SP, 0) +#define load1(T) jit_operand_mem(abi##T, JIT_SP, 0 * 8) +#define load2(T) load1(T), jit_operand_mem(abi##T, JIT_SP, 1 * 8) +#define load3(T) load2(T), jit_operand_mem(abi##T, JIT_SP, 2 * 8) +#define load4(T) load3(T), jit_operand_mem(abi##T, JIT_SP, 3 * 8) +#define load5(T) load4(T), jit_operand_mem(abi##T, JIT_SP, 4 * 8) +#define load6(T) load5(T), jit_operand_mem(abi##T, JIT_SP, 5 * 8) +#define load7(T) load6(T), jit_operand_mem(abi##T, JIT_SP, 6 * 8) +#define load8(T) load7(T), jit_operand_mem(abi##T, JIT_SP, 7 * 8) +#define load9(T) load8(T), jit_operand_mem(abi##T, JIT_SP, 8 * 8) +#define load10(T) load9(T), jit_operand_mem(abi##T, JIT_SP, 9 * 8) +#define load11(T) load10(T), jit_operand_mem(abi##T, JIT_SP, 10 * 8) +#define load12(T) load11(T), jit_operand_mem(abi##T, JIT_SP, 11 * 8) +#define load13(T) load12(T), jit_operand_mem(abi##T, JIT_SP, 12 * 8) +#define load14(T) load13(T), jit_operand_mem(abi##T, JIT_SP, 13 * 8) +#define load15(T) load14(T), jit_operand_mem(abi##T, JIT_SP, 14 * 8) + +#define defi(T, N) \ + { \ + j##T##N = jit_address(_jit); \ + size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); \ + size_t stack = jit_align_stack(_jit, N * 8); \ + jit_operand_t args[] = {store##N(T)}; \ + jit_load_args(_jit, N, args); \ + get##N(,T,JIT_R) \ + jit_extr##T(_jit, JIT_R0, JIT_R0); \ + jit_shrink_stack(_jit, stack); \ + jit_leave_jit_abi(_jit, 0, 0, frame); \ + jit_retr(_jit, JIT_R0); \ + } + +#define deff(T, N) \ + { \ + j##T##N = jit_address(_jit); \ + size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); \ + size_t stack = jit_align_stack(_jit, N * 8); \ + jit_operand_t args[] = {store##N(T)}; \ + jit_load_args(_jit, N, args); \ + get##N(T,T,JIT_F); \ + jit_shrink_stack(_jit, stack); \ + jit_leave_jit_abi(_jit, 0, 0, frame); \ + jit_retr##T(_jit, JIT_F0); \ + } + +#define def0(X, T) def##X(T, 0) +#define def1(X, T) def0(X, T) def##X(T, 1) +#define def2(X, T) def1(X, T) def##X(T, 2) +#define def3(X, T) def2(X, T) def##X(T, 3) +#define def4(X, T) def3(X, T) def##X(T, 4) +#define def5(X, T) def4(X, T) def##X(T, 5) +#define def6(X, T) def5(X, T) def##X(T, 6) +#define def7(X, T) def6(X, T) def##X(T, 7) +#define def8(X, T) def7(X, T) def##X(T, 8) +#define def9(X, T) def8(X, T) def##X(T, 9) +#define def10(X, T) def9(X, T) def##X(T, 10) +#define def11(X, T) def10(X, T) def##X(T, 11) +#define def12(X, T) def11(X, T) def##X(T, 12) +#define def13(X, T) def12(X, T) def##X(T, 13) +#define def14(X, T) def13(X, T) def##X(T, 14) +#define def15(X, T) def14(X, T) def##X(T, 15) +#define def(T) def15(i, T) + def(_c) + def(_uc) + def(_s) + def(_us) + def(_i) +#if __WORDSIZE == 64 + def(_ui) + def(_l) +#endif +#undef def +#define def(T) def15(f, T) + def(_f) + def(_d) +#undef def + + jit_patch_here(_jit, jmpi_main); + size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); + size_t stack = jit_align_stack(_jit, 15 * 8); + +#define push0(B,T,R) /**/ +#define push1(B,T,R)\ + jit_movi##B(_jit, R##0, 1);\ + jit_stxi##T(_jit, 0 * 8, JIT_SP, R##0); + +#define push2(B,T,R)\ + push1(B,T,R)\ + jit_movi##B(_jit, R##0, 2);\ + jit_stxi##T(_jit, 1 * 8, JIT_SP, R##0); + +#define push3(B,T,R)\ + push2(B,T,R)\ + jit_movi##B(_jit, R##0, 3);\ + jit_stxi##T(_jit, 2 * 8, JIT_SP, R##0); + +#define push4(B,T,R)\ + push3(B,T,R)\ + jit_movi##B(_jit, R##0, 4);\ + jit_stxi##T(_jit, 3 * 8, JIT_SP, R##0); + +#define push5(B,T,R)\ + push4(B,T,R)\ + jit_movi##B(_jit, R##0, 5);\ + jit_stxi##T(_jit, 4 * 8, JIT_SP, R##0); + +#define push6(B,T,R)\ + push5(B,T,R)\ + jit_movi##B(_jit, R##0, 6);\ + jit_stxi##T(_jit, 5 * 8, JIT_SP, R##0); + +#define push7(B,T,R)\ + push6(B,T,R)\ + jit_movi##B(_jit, R##0, 7);\ + jit_stxi##T(_jit, 6 * 8, JIT_SP, R##0); + +#define push8(B,T,R)\ + push7(B,T,R)\ + jit_movi##B(_jit, R##0, 8);\ + jit_stxi##T(_jit, 7 * 8, JIT_SP, R##0); + +#define push9(B,T,R)\ + push8(B,T,R)\ + jit_movi##B(_jit, R##0, 9);\ + jit_stxi##T(_jit, 8 * 8, JIT_SP, R##0); + +#define push10(B,T,R)\ + push9(B,T,R)\ + jit_movi##B(_jit, R##0, 10);\ + jit_stxi##T(_jit, 9 * 8, JIT_SP, R##0); + +#define push11(B,T,R)\ + push10(B,T,R)\ + jit_movi##B(_jit, R##0, 11);\ + jit_stxi##T(_jit, 10 * 8, JIT_SP, R##0); + +#define push12(B,T,R)\ + push11(B,T,R)\ + jit_movi##B(_jit, R##0, 12);\ + jit_stxi##T(_jit, 11 * 8, JIT_SP, R##0); + +#define push13(B,T,R)\ + push12(B,T,R)\ + jit_movi##B(_jit, R##0, 13);\ + jit_stxi##T(_jit, 12 * 8, JIT_SP, R##0); + +#define push14(B,T,R)\ + push13(B,T,R)\ + jit_movi##B(_jit, R##0, 14);\ + jit_stxi##T(_jit, 13 * 8, JIT_SP, R##0); + +#define push15(B,T,R)\ + push14(B,T,R)\ + jit_movi##B(_jit, R##0, 15);\ + jit_stxi##T(_jit, 14 * 8, JIT_SP, R##0); + +#define calin(T,N) \ + { \ + push##N(, T, JIT_R) \ + jit_operand_t args[] = {load##N(T)}; \ + jit_calli(_jit, C##T##N, N, args); \ + jit_retval##T(_jit, JIT_R0); \ + jit_movi(_jit, JIT_R1, T##N); \ + jmp = jit_beqr(_jit, JIT_R0, JIT_R1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, jmp); \ + } + +#define calfn(T,N) \ + { \ + push##N(T, T, JIT_F) \ + jit_operand_t args[] = {load##N(T)}; \ + jit_calli(_jit, C##T##N, N, args); \ + jit_retval##T(_jit, JIT_F0); \ + jit_movi##T(_jit, JIT_F1, _w##N); \ + jmp = jit_beqr##T(_jit, JIT_F0, JIT_F1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, jmp); \ + } +#define calx0(X,T) cal##X##n(T,0) +#define calx1(X,T) calx0(X,T) cal##X##n(T,1) +#define calx2(X,T) calx1(X,T) cal##X##n(T,2) +#define calx3(X,T) calx2(X,T) cal##X##n(T,3) +#define calx4(X,T) calx3(X,T) cal##X##n(T,4) +#define calx5(X,T) calx4(X,T) cal##X##n(T,5) +#define calx6(X,T) calx5(X,T) cal##X##n(T,6) +#define calx7(X,T) calx6(X,T) cal##X##n(T,7) +#define calx8(X,T) calx7(X,T) cal##X##n(T,8) +#define calx9(X,T) calx8(X,T) cal##X##n(T,9) +#define calx10(X,T) calx9(X,T) cal##X##n(T,10) +#define calx11(X,T) calx10(X,T) cal##X##n(T,11) +#define calx12(X,T) calx11(X,T) cal##X##n(T,12) +#define calx13(X,T) calx12(X,T) cal##X##n(T,13) +#define calx14(X,T) calx13(X,T) cal##X##n(T,14) +#define calx15(X,T) calx14(X,T) cal##X##n(T,15) +#define cali(T) calx15(i,T) +#define calf(T) calx15(f,T) + + cali(_c) + cali(_uc) + cali(_s) + cali(_us) + cali(_i) +#if __WORDSIZE == 64 + cali(_ui) + cali(_l) +#endif + calf(_f) + calf(_d) + +#undef calin +#undef calfn +#define calin(T,N) \ + { \ + push##N(, T, JIT_R) \ + jit_operand_t args[] = {load##N(T)}; \ + jit_calli(_jit, CJ##T##N, N, args); \ + jit_retval##T(_jit, JIT_R0); \ + jit_movi(_jit, JIT_R1, T##N); \ + jmp = jit_beqr(_jit, JIT_R0, JIT_R1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, jmp); \ + } + +#define calfn(T,N) \ + { \ + push##N(T, T, JIT_F) \ + jit_operand_t args[] = {load##N(T)}; \ + jit_calli(_jit, CJ##T##N, N, args); \ + jit_retval##T(_jit, JIT_F0); \ + jit_movi##T(_jit, JIT_F1, _w##N); \ + jmp = jit_beqr##T(_jit, JIT_F0, JIT_F1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, jmp); \ + } + + cali(_c) + cali(_uc) + cali(_s) + cali(_us) + cali(_i) +#if __WORDSIZE == 64 + cali(_ui) + cali(_l) +#endif + calf(_f) + calf(_d) + + jit_shrink_stack(_jit, stack); + jit_leave_jit_abi(_jit, 0, 0, frame); + jit_ret(_jit); + + size_t size = 0; + function = jit_end(_jit, &size); + + if (function) + (*function)(); + else + return size; + + return 0; +} + +int main(int argc, char *argv[]) +{ + return main_compiler(argc, argv, run_test); +} diff --git a/deps/lightening/tests/z_clobber.c b/deps/lightening/tests/z_clobber.c new file mode 100644 index 0000000..7503de7 --- /dev/null +++ b/deps/lightening/tests/z_clobber.c @@ -0,0 +1,1145 @@ +#include "test.h" + +/* do not bother about result of operations, only ensure valid arguments + * and that registers not modified by the operation are not clobbered */ + +#define IV0 0x10000 +#define IV1 0x10001 +#define IV2 0x10002 +#define IV3 0x10003 +#define IV4 0x10004 +#define IV5 0x10005 +#define FV0 100.0 +#define FV1 101.0 +#define FV2 102.0 +#define FV3 103.0 +#define FV4 104.0 +#define FV5 105.0 +#define IR0 JIT_R0 +#define IR1 JIT_R1 +#define IR2 JIT_R2 +#define IR3 JIT_V0 +#define IR4 JIT_V1 +#define IR5 JIT_V2 +#define FR0 JIT_F0 +#define FR1 JIT_F1 +#define FR2 JIT_F2 +#define FR3 JIT_F3 +#define FR4 JIT_F4 +#define FR5 JIT_F5 + +#define setup() \ + jit_movi(_jit, JIT_R0, IV0); \ + jit_movi(_jit, JIT_R1, IV1); \ + jit_movi(_jit, JIT_R2, IV2); \ + jit_movi(_jit, JIT_V0, IV3); \ + jit_movi(_jit, JIT_V1, IV4); \ + jit_movi(_jit, JIT_V2, IV5); + +#define setup_f() \ + jit_movi_f(_jit, JIT_F0, FV0); \ + jit_movi_f(_jit, JIT_F1, FV1); \ + jit_movi_f(_jit, JIT_F2, FV2); \ + jit_movi_f(_jit, JIT_F3, FV3); \ + jit_movi_f(_jit, JIT_F4, FV4); \ + jit_movi_f(_jit, JIT_F5, FV5); + +#define setup_d() \ + jit_movi_d(_jit, JIT_F0, FV0); \ + jit_movi_d(_jit, JIT_F1, FV1); \ + jit_movi_d(_jit, JIT_F2, FV2); \ + jit_movi_d(_jit, JIT_F3, FV3); \ + jit_movi_d(_jit, JIT_F4, FV4); \ + jit_movi_d(_jit, JIT_F5, FV5); + +#define check(label, rn) \ +{ \ + jit_reloc_t r = jit_beqi(_jit, IR##rn, IV##rn); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} + +#define check1(k, l, i0) \ + check(k##l##i0##_0, i0) + +#define check2(k, l, i0, i1) \ + check(k##l##i0##i1##_0, i0) \ + check(k##l##i0##i1##_1, i1) + +#define check3(k, l, i0, i1, i2) \ + check(k##l##i0##i1##i2##_0, i0) \ + check(k##l##i0##i1##i2##_1, i1) \ + check(k##l##i0##i1##i2##_2, i2) + +#define check4(k, l, i0, i1, i2, i3) \ + check(k##l##i0##i1##i2##i3##_0, i0) \ + check(k##l##i0##i1##i2##i3##_1, i1) \ + check(k##l##i0##i1##i2##i3##_2, i2) \ + check(k##l##i0##i1##i2##i3##_3, i3) + +#define check5(k, l, i0, i1, i2, i3, i4) \ + check(k##l##i0##i1##i2##i3##i4##_0, i0) \ + check(k##l##i0##i1##i2##i3##i4##_1, i1) \ + check(k##l##i0##i1##i2##i3##i3##_2, i2) \ + check(k##l##i0##i1##i2##i3##i4##_3, i3) \ + check(k##l##i0##i1##i2##i3##i4##_4, i4) + +#define check6(k, l, i0, i1, i2, i3, i4, i5) \ + check(k##l##i0##i1##i2##i3##i4##i5##_0, i0) \ + check(k##l##i0##i1##i2##i3##i4##i5##_1, i1) \ + check(k##l##i0##i1##i2##i3##i3##i5##_2, i2) \ + check(k##l##i0##i1##i2##i3##i4##i5##_3, i3) \ + check(k##l##i0##i1##i2##i3##i4##i5##_4, i4) \ + check(k##l##i0##i1##i2##i3##i4##i5##_5, i5) + +/* slightly hacky, lightning only uses JIT_F0-F5, and since all lightening + * platforms (at least at the moment) support JIT_F6, we can use it as a + * temporary register to get the value to compare agains in to the beqrf. + */ +#define checkf(f, label, rn) \ +{ \ + jit_movi##f(_jit, JIT_F6, FV##rn); \ + jit_reloc_t r = jit_beqr##f(_jit, FR##rn, JIT_F6); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} + +#define checkf1(f, k, l, i0) \ + checkf(f, f##k##l##i0##_0, i0) + +#define checkf2(f, k, l, i0, i1) \ + checkf(f, f##k##l##i0##i1##_0, i0) \ + checkf(f, f##k##l##i0##i1##_1, i1) + +#define checkf3(f, k, l, i0, i1, i2) \ + checkf(f, f##k##l##i0##i1##i2##_0, i0) \ + checkf(f, f##k##l##i0##i1##i2##_1, i1) \ + checkf(f, f##k##l##i0##i1##i2##_2, i2) + +#define checkf4(f, k, l, i0, i1, i2, i3) \ + checkf(f, f##k##l##i0##i1##i2##i3##_0, i0) \ + checkf(f, f##k##l##i0##i1##i2##i3##_1, i1) \ + checkf(f, f##k##l##i0##i1##i2##i3##_2, i2) \ + checkf(f, f##k##l##i0##i1##i2##i3##_3, i3) + +#define checkf5(f, k, l, i0, i1, i2, i3, i4) \ + checkf(f, f##k##l##i0##i1##i2##i3##i4##_0, i0) \ + checkf(f, f##k##l##i0##i1##i2##i3##i4##_1, i1) \ + checkf(f, f##k##l##i0##i1##i2##i3##i3##_2, i2) \ + checkf(f, f##k##l##i0##i1##i2##i3##i4##_3, i3) \ + checkf(f, f##k##l##i0##i1##i2##i3##i4##_4, i4) + +#define checkf6(f, k, l, i0, i1, i2, i3, i4, i5) \ + checkf(f, f##k##l##i0##i1##i2##i3##i4##i5##_0, i0) \ + checkf(f, f##k##l##i0##i1##i2##i3##i4##i5##_1, i1) \ + checkf(f, f##k##l##i0##i1##i2##i3##i3##i5##_2, i2) \ + checkf(f, f##k##l##i0##i1##i2##i3##i4##i5##_3, i3) \ + checkf(f, f##k##l##i0##i1##i2##i3##i4##i5##_4, i4) \ + checkf(f, f##k##l##i0##i1##i2##i3##i4##i5##_5, i5) + +#define alui(l, op, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i0, 1); \ + jit_##op##i(_jit, IR##i1, IR##i0, 1); \ + check4(i, l, i2, i3, i4, i5) + +#define aluic(l, op, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i0, 1); \ + jit_##op##i(_jit, IR##i0, IR##i0, 1); \ + check5(ic, l, i1, i2, i3, i4, i5) + +#define alur(l, op, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i0, 1); \ + jit_movi(_jit, IR##i1, 1); \ + jit_##op##r(_jit, IR##i2, IR##i0, IR##i1); \ + check3(r, l, i3, i4, i5) + +#define alurc0(l, op, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i0, 1); \ + jit_movi(_jit, IR##i1, 1); \ + jit_##op##r(_jit, IR##i0, IR##i0, IR##i1); \ + check4(r0, l, i2, i3, i4, i5) + +#define alurc1(l, op, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i0, 1); \ + jit_movi(_jit, IR##i1, 1); \ + jit_##op##r(_jit, IR##i1, IR##i0, IR##i1); \ + check4(r1, l, i2, i3, i4, i5) + +#define alurc2(l, op, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i0, 1); \ + jit_##op##r(_jit, IR##i0, IR##i0, IR##i0); \ + check5(r2, l, i1, i2, i3, i4, i5) + +#define xalu(l, op, i0, i1, i2, i3, i4, i5) \ + alui(l, op, i0, i1, i2, i3, i4, i5) \ + aluic(l, op, i0, i1, i2, i3, i4, i5) \ + alur(l, op, i0, i1, i2, i3, i4, i5) \ + alurc0(l, op, i0, i1, i2, i3, i4, i5) \ + alurc1(l, op, i0, i1, i2, i3, i4, i5) \ + alurc2(l, op, i0, i1, i2, i3, i4, i5) + +#if __ia64__ +# define alu(l, op) \ + xalu(l, op, 0, 1, 2, 3, 4, 5) +#else +# define alu(l, op) \ + xalu(l, op, 0, 1, 2, 3, 4, 5) \ + xalu(l, op, 1, 2, 3, 4, 5, 0) \ + xalu(l, op, 2, 3, 4, 5, 0, 1) \ + xalu(l, op, 3, 4, 5, 0, 1, 2) \ + xalu(l, op, 4, 5, 0, 1, 2, 3) \ + xalu(l, op, 5, 0, 1, 2, 3, 4) +#endif + +#define fopi(f, l, op, f0, f1, f2, f3, f4, f5) \ + setup##f() \ + jit_movi##f(_jit, FR##f0, 1.0); \ + jit_movi##f(_jit, JIT_F6, 1.0); \ + jit_##op##r##f(_jit, FR##f1, FR##f0, JIT_F6); \ + checkf4(f, i, l, f2, f3, f4, f5) + +#define fopic(f, l, op, f0, f1, f2, f3, f4, f5) \ + setup##f() \ + jit_movi##f(_jit, FR##f0, 1.0); \ + jit_movi##f(_jit, JIT_F6, 1.0); \ + jit_##op##r##f(_jit, FR##f0, FR##f0, JIT_F6); \ + checkf5(f, ic, l, f1, f2, f3, f4, f5) + +#define fopr(f, l, op, f0, f1, f2, f3, f4, f5) \ + setup##f() \ + jit_movi##f(_jit, FR##f0, 1.0); \ + jit_movi##f(_jit, FR##f1, 1.0); \ + jit_##op##r##f(_jit, FR##f2, FR##f0, FR##f1); \ + checkf3(f, r, l, f3, f4, f5) + +#define foprc0(f, l, op, f0, f1, f2, f3, f4, f5) \ + setup##f() \ + jit_movi##f(_jit, FR##f0, 1.0); \ + jit_movi##f(_jit, FR##f1, 1.0); \ + jit_##op##r##f(_jit, FR##f0, FR##f0, FR##f1); \ + checkf4(f, r0, l, f2, f3, f4, f5) + +#define foprc1(f, l, op, f0, f1, f2, f3, f4, f5) \ + setup##f() \ + jit_movi##f(_jit, FR##f0, 1.0); \ + jit_movi##f(_jit, FR##f1, 1.0); \ + jit_##op##r##f(_jit, FR##f1, FR##f0, FR##f1); \ + checkf4(f, r1, l, f2, f3, f4, f5) + +#define foprc2(f, l, op, f0, f1, f2, f3, f4, f5) \ + setup##f() \ + jit_movi##f(_jit, FR##f0, 1.0); \ + jit_##op##r##f(_jit, FR##f0, FR##f0, FR##f0); \ + checkf5(f, r2, l, f1, f2, f3, f4, f5) + +#define xfop(f, l, op, f0, f1, f2, f3, f4, f5) \ + fopi(f, l, op, f0, f1, f2, f3, f4, f5) \ + fopic(f, l, op, f0, f1, f2, f3, f4, f5) \ + fopr(f, l, op, f0, f1, f2, f3, f4, f5) \ + foprc0(f, l, op, f0, f1, f2, f3, f4, f5) \ + foprc1(f, l, op, f0, f1, f2, f3, f4, f5) \ + foprc2(f, l, op, f0, f1, f2, f3, f4, f5) +#if __ia64__ +# define xxfop(l, op, f, f0, f1, f2, f3, f4, f5) \ + xfop(_f, l, op, f0, f1, f2, f3, f4, f5) +#else +# define xxfop(l, op, f, f0, f1, f2, f3, f4, f5) \ + xfop(_f, l, op, f0, f1, f2, f3, f4, f5) \ + xfop(_d, l, op, f0, f1, f2, f3, f4, f5) +#endif +#if __ia64__ +# define fop(l, op) \ + xxfop(l, op, f, 0, 1, 2, 3, 4, 5) +#else +# define fop(l, op) \ + xxfop(l, op, f, 0, 1, 2, 3, 4, 5) \ + xxfop(l, op, f, 1, 2, 3, 4, 5, 0) \ + xxfop(l, op, f, 2, 3, 4, 5, 0, 1) \ + xxfop(l, op, f, 3, 4, 5, 0, 1, 2) \ + xxfop(l, op, f, 4, 5, 0, 1, 2, 3) \ + xxfop(l, op, f, 5, 0, 1, 2, 3, 4) +#endif + +#define aluxii(l, op, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i0, 1); \ + jit_##op##ci(_jit, IR##i1, IR##i0, 1); \ + jit_##op##xi(_jit, IR##i2, IR##i0, 1); \ + check3(ii, l, i3, i4, i5) + +#define aluxir(l, op, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i0, 1); \ + jit_##op##ci(_jit, IR##i1, IR##i0, 1); \ + jit_##op##xr(_jit, IR##i2, IR##i0, IR##i1); \ + check3(ir, l, i3, i4, i5) + +#define aluxri(l, op, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i0, 1); \ + jit_movi(_jit, IR##i1, 1); \ + jit_##op##cr(_jit, IR##i2, IR##i0, IR##i1); \ + jit_##op##xi(_jit, IR##i0, IR##i1, 1); \ + check3(ri, l, i3, i4, i5) + +#define aluxrr(l, op, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i0, 1); \ + jit_movi(_jit, IR##i1, 1); \ + jit_##op##cr(_jit, IR##i2, IR##i0, IR##i1); \ + jit_##op##xr(_jit, IR##i2, IR##i0, IR##i1); \ + check3(rr, l, i3, i4, i5) + +#define xalux(l, op, i0, i1, i2, i3, i4, i5) \ + aluxii(l, op, i0, i1, i2, i3, i4, i5) \ + aluxir(l, op, i0, i1, i2, i3, i4, i5) \ + aluxri(l, op, i0, i1, i2, i3, i4, i5) \ + aluxrr(l, op, i0, i1, i2, i3, i4, i5) +#if __ia64__ +# define alux(l, op) \ + xalux(l, op, 0, 1, 2, 3, 4, 5) +#else +# define alux(l, op) \ + xalux(l, op, 0, 1, 2, 3, 4, 5) \ + xalux(l, op, 1, 2, 3, 4, 5, 0) \ + xalux(l, op, 2, 3, 4, 5, 0, 1) \ + xalux(l, op, 3, 4, 5, 0, 1, 2) \ + xalux(l, op, 4, 5, 0, 1, 2, 3) \ + xalux(l, op, 5, 0, 1, 2, 3, 4) +#endif + +#define alui_u(l, op, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i0, 1); \ + jit_##op##i_u(_jit, IR##i1, IR##i0, 1); \ + check4(i_u, l, i2, i3, i4, i5) + +#define aluic_u(l, op, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i0, 1); \ + jit_##op##i_u(_jit, IR##i0, IR##i0, 1); \ + check5(ic_u, l, i1, i2, i3, i4, i5) + +#define alur_u(l, op, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i0, 1); \ + jit_movi(_jit, IR##i1, 1); \ + jit_##op##r_u(_jit, IR##i2, IR##i0, IR##i1); \ + check3(r_u, l, i3, i4, i5) + +#define alurc0_u(l, op, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i0, 1); \ + jit_movi(_jit, IR##i1, 1); \ + jit_##op##r_u(_jit, IR##i0, IR##i0, IR##i1); \ + check4(r0_u, l, i2, i3, i4, i5) + +#define alurc1_u(l, op, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i0, 1); \ + jit_movi(_jit, IR##i1, 1); \ + jit_##op##r_u(_jit, IR##i1, IR##i0, IR##i1); \ + check4(r1_u, l, i2, i3, i4, i5) + +#define alurc2_u(l, op, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i0, 1); \ + jit_##op##r_u(_jit, IR##i0, IR##i0, IR##i0); \ + check5(r2_u, l, i1, i2, i3, i4, i5) + +#define xalu_u(l, op, i0, i1, i2, i3, i4, i5) \ + alui_u(l, op, i0, i1, i2, i3, i4, i5) \ + aluic_u(l, op, i0, i1, i2, i3, i4, i5) \ + alur_u(l, op, i0, i1, i2, i3, i4, i5) \ + alurc0_u(l, op, i0, i1, i2, i3, i4, i5) \ + alurc1_u(l, op, i0, i1, i2, i3, i4, i5) \ + alurc2_u(l, op, i0, i1, i2, i3, i4, i5) +#if __ia64__ +# define alu_u(l, op) \ + xalu_u(l, op, 0, 1, 2, 3, 4, 5) +#else +# define alu_u(l, op) \ + xalu_u(l, op, 0, 1, 2, 3, 4, 5) \ + xalu_u(l, op, 1, 2, 3, 4, 5, 0) \ + xalu_u(l, op, 2, 3, 4, 5, 0, 1) \ + xalu_u(l, op, 3, 4, 5, 0, 1, 2) \ + xalu_u(l, op, 4, 5, 0, 1, 2, 3) \ + xalu_u(l, op, 5, 0, 1, 2, 3, 4) +#endif + +#define unir(l, op, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i0, 1); \ + jit_##op(_jit, IR##i1, IR##i0); \ + check4(rr, l, i2, i3, i4, i5) + +#define unirc(l, op, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i0, 1); \ + jit_##op(_jit, IR##i0, IR##i0); \ + check5(rc, l, i1, i2, i3, i4, i5) + +#define xuni(l, op, i0, i1, i2, i3, i4, i5) \ + unir(l, op, i0, i1, i2, i3, i4, i5) \ + unirc(l, op, i0, i1, i2, i3, i4, i5) +#if __ia64__ +# define uni(l, op) \ + xuni(l, op, 0, 1, 2, 3, 4, 5) +#else +# define uni(l, op) \ + xuni(l, op, 0, 1, 2, 3, 4, 5) \ + xuni(l, op, 1, 2, 3, 4, 5, 0) \ + xuni(l, op, 2, 3, 4, 5, 0, 1) \ + xuni(l, op, 3, 4, 5, 0, 1, 2) \ + xuni(l, op, 4, 5, 0, 1, 2, 3) \ + xuni(l, op, 5, 0, 1, 2, 3, 4) +#endif + +#define unfr(f, l, op, f0, f1, f2, f3, f4, f5) \ + setup##f() \ + jit_movi##f(_jit, FR##f0, 1); \ + jit_##op##f(_jit, FR##f1, FR##f0); \ + checkf4(f, rr, l, f2, f3, f4, f5) + +#define unfrc(f, l, op, f0, f1, f2, f3, f4, f5) \ + setup##f() \ + jit_movi##f(_jit, FR##f0, 1); \ + jit_##op##f(_jit, FR##f0, FR##f0); \ + checkf5(f, rc, l, f1, f2, f3, f4, f5) + +#define xunf(f, l, op, f0, f1, f2, f3, f4, f5) \ + unfr(f, l, op, f0, f1, f2, f3, f4, f5) \ + unfrc(f, l, op, f0, f1, f2, f3, f4, f5) +#define xxunf(l, op, f0, f1, f2, f3, f4, f5) \ + xunf(_f, l, op, f0, f1, f2, f3, f4, f5) \ + xunf(_d, l, op, f0, f1, f2, f3, f4, f5) +#if __ia64__ +# define unf(l, op) \ + xxunf(l, op, 0, 1, 2, 3, 4, 5) +#else +# define unf(l, op) \ + xxunf(l, op, 0, 1, 2, 3, 4, 5) \ + xxunf(l, op, 1, 2, 3, 4, 5, 0) \ + xxunf(l, op, 2, 3, 4, 5, 0, 1) \ + xxunf(l, op, 3, 4, 5, 0, 1, 2) \ + xxunf(l, op, 4, 5, 0, 1, 2, 3) \ + xxunf(l, op, 5, 0, 1, 2, 3, 4) +#endif + +#define fcpi(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + setup() \ + setup##f() \ + jit_movi##f(_jit, FR##f0, 1.0); \ + jit_movi##f(_jit, JIT_F6, 1.0); \ + jit_##op##r##f(_jit, IR##r0, FR##f0, JIT_F6); \ + check5(i##f##f0, l, r1, r2, r3, r4, r5) \ + checkf5(f, i##r0, l, f1, f2, f3, f4, f5) + +#define fcpr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + setup() \ + setup##f() \ + jit_movi##f(_jit, FR##f0, 1.0); \ + jit_movi##f(_jit, FR##f1, 1.0); \ + jit_##op##r##f(_jit, IR##r0, FR##f0, FR##f1); \ + check5(r##f##f0, l, r1, r2, r3, r4, r5) \ + checkf4(f, r##r0, l, f2, f3, f4, f5) + +#define fcprc(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + setup() \ + setup##f() \ + jit_movi##f(_jit, FR##f0, 1.0); \ + jit_##op##r##f(_jit, IR##r0, FR##f0, FR##f0); \ + check5(rc##f##f0, l, r1, r2, r3, r4, r5) \ + checkf5(f, rc##r0, l, f1, f2, f3, f4, f5) + +#if __ia64__ +# define ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + fcpi(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) +#else +# define ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + fcpi(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + fcpr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + fcprc(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + fcpi(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5) \ + fcpr(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5) \ + fcprc(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5) \ + fcpi(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5) \ + fcpr(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5) \ + fcprc(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5) \ + fcpi(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5) \ + fcpr(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5) \ + fcprc(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5) \ + fcpi(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5) \ + fcpr(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5) \ + fcprc(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5) \ + fcpi(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5) \ + fcpr(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5) \ + fcprc(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5) +#endif +#if __ia64__ +# define xfcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) +#else +# define xfcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0) \ + ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1) \ + ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2) \ + ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3) \ + ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4) +#endif +#if __ia64__ +# define fcmp(l, op) \ + xfcp(_f, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5) +#else +# define fcmp(l, op) \ + xfcp(_f, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5) \ + xfcp(_d, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5) +#endif + +#define imvi(l, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i0, 1); \ + check5(i, l, i1, i2, i3, i4, i5) + +#define imvr(l, i0, i1, i2, i3, i4, i5) \ + setup() \ + jit_movi(_jit, IR##i1, 1); \ + jit_movr(_jit, IR##i0, IR##i1); \ + check4(r, l, i2, i3, i4, i5) + +#define xmvi(l, i0, i1, i2, i3, i4, i5) \ + imvi(l, i0, i1, i2, i3, i4, i5) \ + imvr(l, i0, i1, i2, i3, i4, i5) +#if __ia64__ +# define mvi(l) \ + xmvi(l, 0, 1, 2, 3, 4, 5) +#else +# define mvi(l) \ + xmvi(l, 0, 1, 2, 3, 4, 5) \ + xmvi(l, 1, 2, 3, 4, 5, 0) \ + xmvi(l, 2, 3, 4, 5, 0, 1) \ + xmvi(l, 3, 4, 5, 0, 1, 2) \ + xmvi(l, 4, 5, 0, 1, 2, 3) \ + xmvi(l, 5, 0, 1, 2, 3, 4) +#endif + +#define fmvi(f, l, f0, f1, f2, f3, f4, f5) \ + setup##f() \ + jit_movi##f(_jit, FR##f0, 1); \ + checkf5(f, i, l, f1, f2, f3, f4, f5) + +#define fmvr(f, l, f0, f1, f2, f3, f4, f5) \ + setup##f() \ + jit_movi##f(_jit, FR##f1, 1); \ + jit_movr##f(_jit, FR##f0, FR##f1); \ + checkf4(f, r, l, f2, f3, f4, f5) + +#define xmvf(f, l, f0, f1, f2, f3, f4, f5) \ + fmvi(f, l, f0, f1, f2, f3, f4, f5) \ + fmvr(f, l, f0, f1, f2, f3, f4, f5) +#if __ia64__ +# define xxmvf(f, l) \ + xmvf(f, l, 0, 1, 2, 3, 4, 5) +#else +# define xxmvf(f, l) \ + xmvf(f, l, 0, 1, 2, 3, 4, 5) \ + xmvf(f, l, 1, 2, 3, 4, 5, 0) \ + xmvf(f, l, 2, 3, 4, 5, 0, 1) \ + xmvf(f, l, 3, 4, 5, 0, 1, 2) \ + xmvf(f, l, 4, 5, 0, 1, 2, 3) \ + xmvf(f, l, 5, 0, 1, 2, 3, 4) +#endif +#define mvf(l) \ + xxmvf(_f, l) \ + xxmvf(_d, l) + +#define f2fr(f, l, op, f0, f1, f2, f3, f4, f5) \ + setup##f() \ + jit_movi##f(_jit, FR##f0, 1); \ + jit_##op(_jit, FR##f1, FR##f0); \ + checkf4(f, rr, l, f2, f3, f4, f5) + +#define f2frc(f, l, op, f0, f1, f2, f3, f4, f5) \ + setup##f() \ + jit_movi##f(_jit, FR##f0, 1); \ + jit_##op(_jit, FR##f0, FR##f0); \ + checkf5(f, rc, l, f1, f2, f3, f4, f5) + +#define xf2f(f, l, op, f0, f1, f2, f3, f4, f5) \ + f2fr(f, l, op, f0, f1, f2, f3, f4, f5) \ + f2frc(f, l, op, f0, f1, f2, f3, f4, f5) +#if __ia64__ +# define f2f(l, f, op) \ + xf2f(f, l, op, 0, 1, 2, 3, 4, 5) +#else +# define f2f(l, f, op) \ + xf2f(f, l, op, 0, 1, 2, 3, 4, 5) \ + xf2f(f, l, op, 1, 2, 3, 4, 5, 0) \ + xf2f(f, l, op, 2, 3, 4, 5, 0, 1) \ + xf2f(f, l, op, 3, 4, 5, 0, 1, 2) \ + xf2f(f, l, op, 4, 5, 0, 1, 2, 3) \ + xf2f(f, l, op, 5, 0, 1, 2, 3, 4) +#endif + +#define f2ir(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + setup() \ + setup##f() \ + jit_movi##f(_jit, FR##f0, 1); \ + jit_##op##f(_jit, IR##r0, FR##f0); \ + check5(r##f##f0, l, r1, r2, r3, r4, r5) \ + checkf5(f, i##r0, l, f1, f2, f3, f4, f5) + +#if __ia64__ +# define if2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + f2ir(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) +# define xf2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + if2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) +#else +# define if2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + f2ir(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + f2ir(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5) \ + f2ir(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5) \ + f2ir(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5) \ + f2ir(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5) \ + f2ir(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5) +# define xf2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + if2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + if2i(f, l, op, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0) \ + if2i(f, l, op, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1) \ + if2i(f, l, op, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2) \ + if2i(f, l, op, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3) \ + if2i(f, l, op, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4) +#endif +#define f2i(l, op) \ + xf2i(_f, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5) \ + xf2i(_d, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5) + +#define i2fr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + setup() \ + setup##f() \ + jit_movi(_jit, IR##r0, 1); \ + jit_##op##f(_jit, FR##f0, IR##r0); \ + check5(r##f##f0, l, r1, r2, r3, r4, r5) \ + checkf5(f, i##r0, l, f1, f2, f3, f4, f5) +#if __ia64__ +# define ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + i2fr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) +# define xi2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) +#else +# define ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + i2fr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + i2fr(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5) \ + i2fr(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5) \ + i2fr(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5) \ + i2fr(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5) \ + i2fr(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5) +# define xi2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0) \ + ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1) \ + ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2) \ + ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3) \ + ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4) +#endif +#define i2f(l, op) \ + xi2f(_f, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5) \ + xi2f(_d, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5) + +#define off_c 1 +#define off_uc off_c +#define off_s 2 +#define off_us off_s +#define off_i 4 +#define off_ui off_i +#define off_l 8 +#define off_f 4 +#define off_d 8 + +#define ildi(i, l, r0, r1, r2, r3, r4, r5) \ + setup() \ + jit_##ldi##i(_jit, IR##r0, buff); \ + check5(ldi##i, l, r1, r2, r3, r4, r5) + +#define ildr(i, l, r0, r1, r2, r3, r4, r5) \ + setup() \ + jit_movi(_jit, IR##r1, (jit_imm_t)buff); \ + jit_##ldr##i(_jit, IR##r0, IR##r1); \ + check4(ldr##i, l, r2, r3, r4, r5) + +#define ildr0(i, l, r0, r1, r2, r3, r4, r5) \ + setup() \ + jit_movi(_jit, IR##r0, (jit_imm_t)buff); \ + jit_##ldr##i(_jit, IR##r0, IR##r0); \ + check5(ldr##i, l, r1, r2, r3, r4, r5) + +#define ildxi(i, l, r0, r1, r2, r3, r4, r5) \ + setup() \ + jit_movi(_jit, IR##r1, (jit_imm_t)buff); \ + jit_ldxi##i(_jit, IR##r0, IR##r1, off##i); \ + check4(ldxi##i, l, r2, r3, r4, r5) + +#define ildxr(i, l, r0, r1, r2, r3, r4, r5) \ + setup() \ + jit_movi(_jit, IR##r1, (jit_imm_t)buff); \ + jit_movi(_jit, IR##r2, off##i); \ + jit_ldxr##i(_jit, IR##r0, IR##r1, IR##r2); \ + check3(ldxr##i, l, r3, r4, r5) + +#define ildxr0(i, l, r0, r1, r2, r3, r4, r5) \ + setup() \ + jit_movi(_jit, IR##r1, (jit_imm_t)buff); \ + jit_movi(_jit, IR##r0, off##i); \ + jit_ldxr##i(_jit, IR##r0, IR##r1, IR##r0); \ + check4(ldxr0##i, l, r2, r3, r4, r5) + +#define ildxr1(i, l, r0, r1, r2, r3, r4, r5) \ + setup() \ + jit_movi(_jit, IR##r0, (jit_imm_t)buff); \ + jit_movi(_jit, IR##r1, off##i); \ + jit_ldxr##i(_jit, IR##r0, IR##r0, IR##r1); \ + check4(ldxr1##i, l, r2, r3, r4, r5) + +#define xxldi(i, l, r0, r1, r2, r3, r4, r5) \ + ildi(i, l, r0, r1, r2, r3, r4, r5) \ + ildr(i, l, r0, r1, r2, r3, r4, r5) \ + ildr0(i, l, r0, r1, r2, r3, r4, r5) \ + ildxi(i, l, r0, r1, r2, r3, r4, r5) \ + ildxr(i, l, r0, r1, r2, r3, r4, r5) \ + ildxr0(i, l, r0, r1, r2, r3, r4, r5) \ + ildxr1(i, l, r0, r1, r2, r3, r4, r5) +#if __WORDSIZE == 32 +#define xxxldi(l, r0, r1, r2, r3, r4, r5) +#else +#define xxxldi(l, r0, r1, r2, r3, r4, r5) \ + xxldi(_ui, l, r0, r1, r2, r3, r4, r5) \ + xxldi( _l, l, r0, r1, r2, r3, r4, r5) +#endif +#define xldi(l, r0, r1, r2, r3, r4, r5) \ + xxldi( _c, l, r0, r1, r2, r3, r4, r5) \ + xxldi(_uc, l, r0, r1, r2, r3, r4, r5) \ + xxldi( _s, l, r0, r1, r2, r3, r4, r5) \ + xxldi(_us, l, r0, r1, r2, r3, r4, r5) \ + xxldi( _i, l, r0, r1, r2, r3, r4, r5) \ + xxxldi(l, r0, r1, r2, r3, r4, r5) +#if __ia64__ +# define ldi(l) \ + xldi(l, 0, 1, 2, 3, 4, 5) +#else +# define ldi(l) \ + xldi(l, 0, 1, 2, 3, 4, 5) \ + xldi(l, 1, 2, 3, 4, 5, 0) \ + xldi(l, 2, 3, 4, 5, 0, 1) \ + xldi(l, 3, 4, 5, 0, 1, 2) \ + xldi(l, 4, 5, 0, 1, 2, 3) \ + xldi(l, 5, 0, 1, 2, 3, 4) +#endif + +#define fldi(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + setup() \ + setup##f() \ + jit_ldi##f(_jit, FR##f0, buff); \ + check6(ldi##f##r0##f0, l, r0, r1, r2, r3, r4, r5) \ + checkf5(f, ldi##r0##f0, l, f1, f2, f3, f4, f5) + +#define fldr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + setup() \ + setup##f() \ + jit_movi(_jit, IR##r0, (jit_imm_t)buff); \ + jit_ldr##f(_jit, FR##f0, IR##r0); \ + check5(ldr##f##r0##f0, l, r1, r2, r3, r4, r5) \ + checkf5(f, ldr##r0##f0, l, f1, f2, f3, f4, f5) + +#define fldxi(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + setup() \ + setup##f() \ + jit_movi(_jit, IR##r0, (jit_imm_t)buff); \ + jit_ldxi##f(_jit, FR##f0, IR##r0, off##f); \ + check5(ldxi##f##r0##f0, l, r1, r2, r3, r4, r5) \ + checkf5(f, ldxi##r0##f0, l, f1, f2, f3, f4, f5) + +#define fldxr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + setup() \ + setup##f() \ + jit_movi(_jit, IR##r0, (jit_imm_t)buff); \ + jit_movi(_jit, IR##r1, off##f); \ + jit_ldxr##f(_jit, FR##f0, IR##r0, IR##r1); \ + check4(ldxr##f##r0##f0, l, r2, r3, r4, r5) \ + checkf5(f, ldxr##r0##f0, l, f1, f2, f3, f4, f5) + +#define xldf(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + fldi(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + fldr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + fldxi(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + fldxr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) + +#define xxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + xldf(_f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + xldf(_d, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) +#if __ia64__ +# define ixldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + xxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) +#else +# define fxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + xxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + xxldf(l, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0) \ + xxldf(l, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1) \ + xxldf(l, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2) \ + xxldf(l, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3) \ + xxldf(l, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4) +# define ixldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + fxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + fxldf(l, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5) \ + fxldf(l, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5) \ + fxldf(l, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5) \ + fxldf(l, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5) \ + fxldf(l, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5) +#endif +#define ldf(l) \ + ixldf(l, 0,1,2,3,4,5, 0,1,2,3,4,5) + +#define isti(i, l, r0, r1, r2, r3, r4, r5) \ + setup() \ + jit_sti##i(_jit, buff, IR##r0); \ + check5(sti##i, l, r1, r2, r3, r4, r5) + +#define istr(i, l, r0, r1, r2, r3, r4, r5) \ + setup() \ + jit_movi(_jit, IR##r1, (jit_imm_t)buff); \ + jit_str##i(_jit, IR##r1, IR##r0); \ + check4(str##i, l, r2, r3, r4, r5) + +#define istr0(i, l, r0, r1, r2, r3, r4, r5) \ + setup() \ + jit_movi(_jit, IR##r1, (jit_imm_t)buff); \ + jit_str##i(_jit, IR##r1, IR##r0); \ + check4(str0##i, l, r2, r3, r4, r5) + +#define istxi(i, l, r0, r1, r2, r3, r4, r5) \ + setup() \ + jit_movi(_jit, IR##r1, (jit_imm_t)buff); \ + jit_stxi##i(_jit, off##i, IR##r1, IR##r0); \ + check4(stxi##i, l, r2, r3, r4, r5) + +#define istxr(i, l, r0, r1, r2, r3, r4, r5) \ + setup() \ + jit_movi(_jit, IR##r1, (jit_imm_t)buff); \ + jit_movi(_jit, IR##r2, off##i); \ + jit_stxr##i(_jit, IR##r2, IR##r1, IR##r0); \ + check3(stxr##i, l, r3, r4, r5) + +#define istxr0(i, l, r0, r1, r2, r3, r4, r5) \ + setup() \ + jit_movi(_jit, IR##r1, (jit_imm_t)buff); \ + jit_movi(_jit, IR##r0, off##i); \ + jit_stxr##i(_jit, IR##r0, IR##r1, IR##r0); \ + check4(stxr0##i, l, r2, r3, r4, r5) + +#define istxr1(i, l, r0, r1, r2, r3, r4, r5) \ + setup() \ + jit_movi(_jit, IR##r0, (jit_imm_t)buff); \ + jit_movi(_jit, IR##r1, off##i); \ + jit_stxr##i(_jit, IR##r1, IR##r0, IR##r0); \ + check4(stxr1##i, l, r2, r3, r4, r5) + +#define xxsti(i, l, r0, r1, r2, r3, r4, r5) \ + isti(i, l, r0, r1, r2, r3, r4, r5) \ + istr(i, l, r0, r1, r2, r3, r4, r5) \ + istr0(i, l, r0, r1, r2, r3, r4, r5) \ + istxi(i, l, r0, r1, r2, r3, r4, r5) \ + istxr(i, l, r0, r1, r2, r3, r4, r5) \ + istxr0(i, l, r0, r1, r2, r3, r4, r5) \ + istxr1(i, l, r0, r1, r2, r3, r4, r5) +#if __WORDSIZE == 32 +#define xxxsti(l, r0, r1, r2, r3, r4, r5) +#else +#define xxxsti(l, r0, r1, r2, r3, r4, r5) \ + xxsti( _l, l, r0, r1, r2, r3, r4, r5) +#endif +#define xsti(l, r0, r1, r2, r3, r4, r5) \ + xxsti( _c, l, r0, r1, r2, r3, r4, r5) \ + xxsti( _s, l, r0, r1, r2, r3, r4, r5) \ + xxsti( _i, l, r0, r1, r2, r3, r4, r5) \ + xxxsti(l, r0, r1, r2, r3, r4, r5) +#if __ia64__ +# define sti(l) \ + xsti(l, 0, 1, 2, 3, 4, 5) +#else +# define sti(l) \ + xsti(l, 0, 1, 2, 3, 4, 5) \ + xsti(l, 1, 2, 3, 4, 5, 0) \ + xsti(l, 2, 3, 4, 5, 0, 1) \ + xsti(l, 3, 4, 5, 0, 1, 2) \ + xsti(l, 4, 5, 0, 1, 2, 3) \ + xsti(l, 5, 0, 1, 2, 3, 4) +#endif + +#define fsti(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + setup() \ + setup##f() \ + jit_sti##f(_jit, buff, FR##f0); \ + check6(sti##f##r0##f0, l, r0, r1, r2, r3, r4, r5) \ + checkf5(f, sti##r0##f0, l, f1, f2, f3, f4, f5) + +#define fstr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + setup() \ + setup##f() \ + jit_movi(_jit, IR##r0, (jit_imm_t)buff); \ + jit_str##f(_jit, IR##r0, FR##f0); \ + check5(str##f##r0##f0, l, r1, r2, r3, r4, r5) \ + checkf5(f, str##r0##f0, l, f1, f2, f3, f4, f5) + +#define fstxi(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + setup() \ + setup##f() \ + jit_movi(_jit, IR##r0, (jit_imm_t)buff); \ + jit_stxi##f(_jit, off##f, IR##r0, FR##f0); \ + check5(stxi##f##r0##f0, l, r1, r2, r3, r4, r5) \ + checkf5(f, stxi##r0##f0, l, f1, f2, f3, f4, f5) + +#define fstxr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + setup() \ + setup##f() \ + jit_movi(_jit, IR##r0, (jit_imm_t)buff); \ + jit_movi(_jit, IR##r1, off##f); \ + jit_stxr##f(_jit, IR##r1, IR##r0, FR##f0); \ + check4(stxr##f##r0##f0, l, r2, r3, r4, r5) \ + checkf5(f, stxr##r0##f0, l, f1, f2, f3, f4, f5) + +#define xstf(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + fsti(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + fstr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + fstxi(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + fstxr(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) +#define xxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + xstf(_f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + xstf(_d, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) +#if __ia64__ +# define ixstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + xxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) +#else +# define fxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + xxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + xxstf(l, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0) \ + xxstf(l, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1) \ + xxstf(l, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2) \ + xxstf(l, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3) \ + xxstf(l, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4) +# define ixstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + fxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \ + fxstf(l, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5) \ + fxstf(l, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5) \ + fxstf(l, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5) \ + fxstf(l, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5) \ + fxstf(l, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5) +#endif +#define stf(l) \ + ixstf(l, 0,1,2,3,4,5, 0,1,2,3,4,5) + +#define bri(l, op, u, il, ir, r0, r1, r2, r3, r4, r5) \ +{ \ + setup() \ + jit_movi(_jit, IR##r0, il); \ + jit_reloc_t r = jit_b##op##i##u(_jit, IR##r0, ir); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ + check5(i, l, r1, r2, r3, r4, r5) \ +} + +#define brr(l, op, u, il, ir, r0, r1, r2, r3, r4, r5) \ +{ \ + setup() \ + jit_movi(_jit, IR##r0, il); \ + jit_movi(_jit, IR##r1, ir); \ + jit_reloc_t r = jit_b##op##r##u(_jit, IR##r0, IR##r1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ + check4(r, l, r2, r3, r4, r5) \ +} + +#define xjmpi(l, op, u, il, ir, r0, r1, r2, r3, r4, r5) \ + bri(l, op, u, il, ir, r0, r1, r2, r3, r4, r5) \ + brr(l, op, u, il, ir, r0, r1, r2, r3, r4, r5) +#if __ia64__ +# define jmpi(l, op, u, il, ir) \ + xjmpi(l, op, u, il, ir, 0, 1, 2, 3, 4, 5) +#else +# define jmpi(l, op, u, il, ir) \ + xjmpi(l, op, u, il, ir, 0, 1, 2, 3, 4, 5) \ + xjmpi(l, op, u, il, ir, 1, 2, 3, 4, 5, 0) \ + xjmpi(l, op, u, il, ir, 2, 3, 4, 5, 0, 1) \ + xjmpi(l, op, u, il, ir, 3, 4, 5, 0, 1, 2) \ + xjmpi(l, op, u, il, ir, 4, 5, 0, 1, 2, 3) \ + xjmpi(l, op, u, il, ir, 5, 0, 1, 2, 3, 4) +#endif + +#define bfi(f, l, op, il, ir, f0, f1, f2, f3, f4, f5) \ +{ \ + setup##f() \ + jit_movi##f(_jit, FR##f0, il); \ + jit_movi##f(_jit, JIT_F6, ir); \ + jit_reloc_t r = jit_b##op##r##f(_jit, FR##f0, JIT_F6); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ + checkf5(f, i, l, f1, f2, f3, f4, f5) \ +} + +#define bff(f, l, op, il, ir, f0, f1, f2, f3, f4, f5) \ +{ \ + setup##f() \ + jit_movi##f(_jit, FR##f0, il); \ + jit_movi##f(_jit, FR##f1, ir); \ + jit_reloc_t r = jit_b##op##r##f(_jit, FR##f0, FR##f1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ + checkf4(f, r, l, f2, f3, f4, f5) \ +} + +#define xjmpf(f, l, op, il, ir, f0, f1, f2, f3, f4, f5) \ + bfi(f, l, op, il, ir, f0, f1, f2, f3, f4, f5) \ + bff(f, l, op, il, ir, f0, f1, f2, f3, f4, f5) +#define xxjmpf(l, op, il, ir, f0, f1, f2, f3, f4, f5) \ + xjmpf(_f, l, op, il, ir, f0, f1, f2, f3, f4, f5) \ + xjmpf(_d, l, op, il, ir, f0, f1, f2, f3, f4, f5) +#if __ia64__ +# define jmpf(l, op, il, ir) \ + xxjmpf(l, op, il, ir, 0, 1, 2, 3, 4, 5) +#else +# define jmpf(l, op, il, ir) \ + xxjmpf(l, op, il, ir, 0, 1, 2, 3, 4, 5) \ + xxjmpf(l, op, il, ir, 1, 2, 3, 4, 5, 0) \ + xxjmpf(l, op, il, ir, 2, 3, 4, 5, 0, 1) \ + xxjmpf(l, op, il, ir, 3, 4, 5, 0, 1, 2) \ + xxjmpf(l, op, il, ir, 4, 5, 0, 1, 2, 3) \ + xxjmpf(l, op, il, ir, 5, 0, 1, 2, 3, 4) +#endif + +static size_t +run_test(jit_state_t *_jit, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(_jit, arena_base, arena_size); + size_t frame = jit_enter_jit_abi(_jit, 3, 0, 0); + + void (*function)(void); + + char *buff = malloc(16); + ASSERT(buff); + + alu(__LINE__, add) + alux(__LINE__, add) + fop(__LINE__, add) + alu(__LINE__, sub) + alux(__LINE__, sub) + fop(__LINE__, sub) + alu(__LINE__, mul) + fop(__LINE__, mul) + alu(__LINE__, div) + alu_u(__LINE__, div) + fop(__LINE__, div) + alu(__LINE__, rem) + alu_u(__LINE__, rem) + alu(__LINE__, and) + alu(__LINE__, or) + alu(__LINE__, xor) + alu(__LINE__, lsh) + alu(__LINE__, rsh) + alu_u(__LINE__, rsh) + uni(__LINE__, negr) + unf(__LINE__, negr) + uni(__LINE__, comr) + unf(__LINE__, absr) + unf(__LINE__, sqrtr) + mvi(__LINE__) + mvf(__LINE__) + uni(__LINE__, extr_c) + uni(__LINE__, extr_uc) + uni(__LINE__, extr_s) + uni(__LINE__, extr_us) +#if __WORDSIZE == 64 + uni(__LINE__, extr_ui) +#endif + uni(__LINE__, bswapr_us) + uni(__LINE__, bswapr_ui) +#if __WORDSIZE == 64 + uni(__LINE__, bswapr_ul) +#endif + f2f(__LINE__, _f, extr_d_f) + f2f(__LINE__, _d, extr_f_d) + f2i(__LINE__, truncr) + i2f(__LINE__, extr) + ldi(__LINE__) + ldf(__LINE__) + sti(__LINE__) + stf(__LINE__) + jmpi(__LINE__, lt, , 0, 1) + jmpi(__LINE__, lt, _u, 0, 1) + jmpf(__LINE__, lt, 0, 1) + jmpi(__LINE__, le, , 1, 1) + jmpi(__LINE__, le, _u, 1, 1) + jmpf(__LINE__, le, 1, 1) + jmpi(__LINE__, eq, , -1, -1) + jmpf(__LINE__, eq, -1, -1) + jmpi(__LINE__, ge, , 2, 2) + jmpi(__LINE__, ge, _u, 2, 2) + jmpf(__LINE__, ge, 2, 2) + jmpi(__LINE__, gt, , 2, 1) + jmpi(__LINE__, gt, _u, 2, 1) + jmpf(__LINE__, gt, 2, 1) + jmpi(__LINE__, ne, , 3, 2) + jmpf(__LINE__, ne, 3, 2) + jmpi(__LINE__, ms, , 1, 1) + jmpi(__LINE__, mc, , 1, 2) +#if __WORDSIZE == 32 +# define ix7f 0x7fffffff +# define ix80 0x80000000 +# define ixff 0xffffffff +#else +# define ix7f 0x7fffffffffffffff +# define ix80 0x8000000000000000 +# define ixff 0xffffffffffffffff +#endif + jmpi(__LINE__, oadd, , ix7f, 1) + jmpi(__LINE__, oadd, _u, ixff, 1) + jmpi(__LINE__, xadd, , ix80, 1) + jmpi(__LINE__, xadd, _u, ix7f, 1) + jmpi(__LINE__, osub, , ix80, 1) + jmpi(__LINE__, osub, _u, 0, 1) + jmpi(__LINE__, xsub, , ix7f, 1) + jmpi(__LINE__, xsub, _u, ix80, 1) + jmpf(__LINE__, unlt, 0, 1) + jmpf(__LINE__, unle, 1, 1) + jmpf(__LINE__, uneq, 2, 2) + jmpf(__LINE__, unge, 3, 3) + jmpf(__LINE__, ungt, 4, 3) + jmpf(__LINE__, ltgt, 5, 4) + jmpf(__LINE__, ord, 0, 0) + jmpf(__LINE__, unord, 0, (0.0 / 0.0)) + + jit_leave_jit_abi(_jit, 3, 0, frame); + jit_ret(_jit); + + size_t size = 0; + function = jit_end(_jit, &size); + + if (function) + (*function)(); + else { + free(buff); + return size; + } + + free(buff); + return 0; +} + +int main(int argc, char *argv[]) +{ + return main_compiler(argc, argv, run_test); +} diff --git a/deps/lightening/tests/z_range.c b/deps/lightening/tests/z_range.c new file mode 100644 index 0000000..a8b82f4 --- /dev/null +++ b/deps/lightening/tests/z_range.c @@ -0,0 +1,577 @@ +#include "test.h" + +#define M64 67108864 + +#define aB1 (1<<1) +#define aB2 (1<<2) +#define aB3 (1<<3) +#define aB4 (1<<4) +#define aB5 (1<<5) +#define aB6 (1<<6) +#define aB7 (1<<7) +#define aB8 (1<<8) +#define aB9 (1<<9) +#define aB10 (1<<10) +#define aB11 (1<<11) +#define aB12 (1<<12) +#define aB13 (1<<13) +#define aB14 (1<<14) +#define aB15 (1<<15) +#define aB16 (1<<16) +#define aB17 (1<<17) +#define aB18 (1<<18) +#define aB19 (1<<19) +#define aB20 (1<<20) +#define aB21 (1<<21) +#define aB22 (1<<22) +#define aB23 (1<<23) +#define aB24 (1<<24) +#define aB25 (1<<25) +#define aB26 (1<<26) +#define bB1 (-aB1) +#define bB2 (-aB2) +#define bB3 (-aB3) +#define bB4 (-aB4) +#define bB5 (-aB5) +#define bB6 (-aB6) +#define bB7 (-aB7) +#define bB8 (-aB8) +#define bB9 (-aB9) +#define bB10 (-aB10) +#define bB11 (-aB11) +#define bB12 (-aB12) +#define bB13 (-aB13) +#define bB14 (-aB14) +#define bB15 (-aB15) +#define bB16 (-aB16) +#define bB17 (-aB17) +#define bB18 (-aB18) +#define bB19 (-aB19) +#define bB20 (-aB20) +#define bB21 (-aB21) +#define bB22 (-aB22) +#define bB23 (-aB23) +#define bB24 (-aB24) +#define bB25 (-aB25) +#define bB26 (-aB26) +#define cB1 (aB1-1) +#define cB2 (aB2-1) +#define cB3 (aB3-1) +#define cB4 (aB4-1) +#define cB5 (aB5-1) +#define cB6 (aB6-1) +#define cB7 (aB7-1) +#define cB8 (aB8-1) +#define cB9 (aB9-1) +#define cB10 (aB10-1) +#define cB11 (aB11-1) +#define cB12 (aB12-1) +#define cB13 (aB13-1) +#define cB14 (aB14-1) +#define cB15 (aB15-1) +#define cB16 (aB16-1) +#define cB17 (aB17-1) +#define cB18 (aB18-1) +#define cB19 (aB19-1) +#define cB20 (aB20-1) +#define cB21 (aB21-1) +#define cB22 (aB22-1) +#define cB23 (aB23-1) +#define cB24 (aB24-1) +#define cB25 (aB25-1) +#define cB26 (aB26-1) +#define dB1 (-aB1+1) +#define dB2 (-aB2+1) +#define dB3 (-aB3+1) +#define dB4 (-aB4+1) +#define dB5 (-aB5+1) +#define dB6 (-aB6+1) +#define dB7 (-aB7+1) +#define dB8 (-aB8+1) +#define dB9 (-aB9+1) +#define dB10 (-aB10+1) +#define dB11 (-aB11+1) +#define dB12 (-aB12+1) +#define dB13 (-aB13+1) +#define dB14 (-aB14+1) +#define dB15 (-aB15+1) +#define dB16 (-aB16+1) +#define dB17 (-aB17+1) +#define dB18 (-aB18+1) +#define dB19 (-aB19+1) +#define dB20 (-aB20+1) +#define dB21 (-aB21+1) +#define dB22 (-aB22+1) +#define dB23 (-aB23+1) +#define dB24 (-aB24+1) +#define dB25 (-aB25+1) +#define dB26 (-aB26+1) + +#define add(a, b) (a + b) +#define sub(a, b) (a - b) +#define mul(a, b) (a * b) +#define div(a, b) (a / b) +#define rem(a, b) (a % b) +#define and(a, b) (a & b) +#define or(a, b) (a | b) +#define xor(a, b) (a ^ b) + +#if defined(DEBUG) +#define dump_args(N, X, L, R, V)\ + jit_calli_1(_jit, puts,\ + jit_operand_imm(JIT_OPERAND_ABI_POINTER,\ + (jit_imm_t)#N " " #X " " #L " " #R " " #V)) +#else +#define dump_args(N, X, L, R, V) +#endif + +/* alu2 doesn't really work for jit_rshi_u, so define a shim */ +#define jit_rsh_ui jit_rshi_u + +#define alu2(N, X, L, R, V) \ +{ \ + dump_args(N, X, L, R, V); \ + jit_movi(_jit, JIT_R1, L); \ + jit_##N##i(_jit, JIT_R0, JIT_R1, R); \ + jit_reloc_t r = jit_beqi(_jit, JIT_R0, V); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} + +#define alu1(N, M) \ + alu2(N, N##M##1, 3, (M##1), N(3, M##1)) \ + alu2(N, N##M##2, 3, (M##2), N(3, M##2)) \ + alu2(N, N##M##3, 3, (M##3), N(3, M##3)) \ + alu2(N, N##M##4, 3, (M##4), N(3, M##4)) \ + alu2(N, N##M##5, 3, (M##5), N(3, M##5)) \ + alu2(N, N##M##6, 3, (M##6), N(3, M##6)) \ + alu2(N, N##M##7, 3, (M##7), N(3, M##7)) \ + alu2(N, N##M##8, 3, (M##8), N(3, M##8)) \ + alu2(N, N##M##9, 3, (M##9), N(3, M##9)) \ + alu2(N, N##M##10, 3, (M##10), N(3, M##10)) \ + alu2(N, N##M##11, 3, (M##11), N(3, M##11)) \ + alu2(N, N##M##12, 3, (M##12), N(3, M##12)) \ + alu2(N, N##M##13, 3, (M##13), N(3, M##13)) \ + alu2(N, N##M##14, 3, (M##14), N(3, M##14)) \ + alu2(N, N##M##15, 3, (M##15), N(3, M##15)) \ + alu2(N, N##M##16, 3, (M##16), N(3, M##16)) \ + alu2(N, N##M##17, 3, (M##17), N(3, M##17)) \ + alu2(N, N##M##18, 3, (M##18), N(3, M##18)) \ + alu2(N, N##M##19, 3, (M##19), N(3, M##19)) \ + alu2(N, N##M##20, 3, (M##20), N(3, M##20)) \ + alu2(N, N##M##21, 3, (M##21), N(3, M##21)) \ + alu2(N, N##M##22, 3, (M##22), N(3, M##22)) \ + alu2(N, N##M##23, 3, (M##23), N(3, M##23)) \ + alu2(N, N##M##24, 3, (M##24), N(3, M##24)) \ + alu2(N, N##M##25, 3, (M##25), N(3, M##25)) \ + alu2(N, N##M##26, 3, (M##26), N(3, M##26)) + +#define alu(N) \ + alu1(N, aB) \ + alu1(N, bB) \ + alu1(N, cB) \ + alu1(N, dB) + +#define _lsh(N) \ + alu2(lsh, L##N, 1, N, (1L<<N)) + +#if __WORDSIZE == 64 +#define _rsh(N) \ + alu2(rsh, R##N, (1L<<63), N, ((1L<<63)>>N)) + +#define _rush(N) \ + alu2(rsh_u, R##N, (1UL<<63), N, ((1UL<<63)>>N)) +#else +#define _rsh(N) \ + alu2(rsh, R##N, (1L<<31), N, ((1L<<31)>>N)) + +#define _rush(N) \ + alu2(rsh_u, R##N, (1UL<<31), N, ((1UL<<31)>>N)) +#endif + +#if __WORDSIZE == 32 +# define xsh64(X) /**/ +#else +# define xsh64(X) \ + _##X##sh(32) \ + _##X##sh(33) \ + _##X##sh(34) \ + _##X##sh(35) \ + _##X##sh(36) \ + _##X##sh(37) \ + _##X##sh(38) \ + _##X##sh(39) \ + _##X##sh(40) \ + _##X##sh(41) \ + _##X##sh(42) \ + _##X##sh(43) \ + _##X##sh(44) \ + _##X##sh(45) \ + _##X##sh(46) \ + _##X##sh(47) \ + _##X##sh(48) \ + _##X##sh(49) \ + _##X##sh(50) \ + _##X##sh(51) \ + _##X##sh(52) \ + _##X##sh(53) \ + _##X##sh(54) \ + _##X##sh(55) \ + _##X##sh(56) \ + _##X##sh(57) \ + _##X##sh(58) \ + _##X##sh(59) \ + _##X##sh(60) \ + _##X##sh(61) \ + _##X##sh(62) \ + _##X##sh(63) +#endif + +#define xsh(X) \ + _##X##sh(0) \ + _##X##sh(1) \ + _##X##sh(2) \ + _##X##sh(3) \ + _##X##sh(4) \ + _##X##sh(5) \ + _##X##sh(6) \ + _##X##sh(7) \ + _##X##sh(8) \ + _##X##sh(9) \ + _##X##sh(10) \ + _##X##sh(11) \ + _##X##sh(12) \ + _##X##sh(13) \ + _##X##sh(14) \ + _##X##sh(15) \ + _##X##sh(16) \ + _##X##sh(17) \ + _##X##sh(18) \ + _##X##sh(19) \ + _##X##sh(20) \ + _##X##sh(21) \ + _##X##sh(22) \ + _##X##sh(23) \ + _##X##sh(24) \ + _##X##sh(25) \ + _##X##sh(26) \ + _##X##sh(27) \ + _##X##sh(28) \ + _##X##sh(29) \ + _##X##sh(30) \ + _##X##sh(31) \ + xsh64(X) + +#define lsh() \ + xsh(l) + +#define rsh() \ + xsh(r) + +#define rsh_u() \ + xsh(ru) + +#define reset(V) \ + jit_calli_3(_jit, memset, \ + jit_operand_imm(JIT_OPERAND_ABI_POINTER, (jit_imm_t)buf),\ + jit_operand_imm(JIT_OPERAND_ABI_INT32, V), \ + jit_operand_imm(JIT_OPERAND_ABI_UINT32, M64 + 8)); + +#define stx(T, N, O, V) \ + jit_movi(_jit, JIT_R0, V); \ + jit_stxi##T(_jit, O, JIT_V0, JIT_R0); + +#define stx8(T, M, V) \ + stx(T, 3, (M##B3), V) \ + stx(T, 4, (M##B4), V) \ + stx(T, 5, (M##B5), V) \ + stx(T, 6, (M##B6), V) \ + stx(T, 7, (M##B7), V) \ + stx(T, 8, (M##B8), V) \ + stx(T, 9, (M##B9), V) \ + stx(T, 10, (M##B10), V) \ + stx(T, 11, (M##B11), V) \ + stx(T, 12, (M##B12), V) \ + stx(T, 13, (M##B13), V) \ + stx(T, 14, (M##B14), V) \ + stx(T, 15, (M##B15), V) \ + stx(T, 16, (M##B16), V) \ + stx(T, 17, (M##B17), V) \ + stx(T, 18, (M##B18), V) \ + stx(T, 19, (M##B19), V) \ + stx(T, 20, (M##B20), V) \ + stx(T, 21, (M##B21), V) \ + stx(T, 22, (M##B22), V) \ + stx(T, 23, (M##B23), V) \ + stx(T, 24, (M##B24), V) \ + stx(T, 25, (M##B25), V) \ + stx(T, 26, (M##B26), V) + +#define stx4(T, M, V) \ + stx(T, 2, (M##B2), V) \ + stx8(T, M, V) + +#define stx2(T, M, V) \ + stx(T, 1, (M##B1), V) \ + stx4(T, M, V) + +#define ldx(T, N, M, O, V) \ +{ \ + dump_args(T, N, M, O, V); \ + jit_movi(_jit, JIT_R0, 0); \ + jit_ldxi##T(_jit, JIT_R0, JIT_V0, O); \ + jit_reloc_t r = jit_beqi(_jit, JIT_R0, V); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} + +#define ldx8(T, M, V) \ + ldx(T, 3, M, (M##B3), V) \ + ldx(T, 4, M, (M##B4), V) \ + ldx(T, 5, M, (M##B5), V) \ + ldx(T, 6, M, (M##B6), V) \ + ldx(T, 7, M, (M##B7), V) \ + ldx(T, 8, M, (M##B8), V) \ + ldx(T, 9, M, (M##B9), V) \ + ldx(T, 10, M, (M##B10), V) \ + ldx(T, 11, M, (M##B11), V) \ + ldx(T, 12, M, (M##B12), V) \ + ldx(T, 13, M, (M##B13), V) \ + ldx(T, 14, M, (M##B14), V) \ + ldx(T, 15, M, (M##B15), V) \ + ldx(T, 16, M, (M##B16), V) \ + ldx(T, 17, M, (M##B17), V) \ + ldx(T, 18, M, (M##B18), V) \ + ldx(T, 19, M, (M##B19), V) \ + ldx(T, 20, M, (M##B20), V) \ + ldx(T, 21, M, (M##B21), V) \ + ldx(T, 22, M, (M##B22), V) \ + ldx(T, 23, M, (M##B23), V) \ + ldx(T, 24, M, (M##B24), V) \ + ldx(T, 25, M, (M##B25), V) \ + ldx(T, 26, M, (M##B26), V) +#define ldx4(T, M, V) \ + ldx(T, 2, M, (M##B2), V) \ + ldx8(T, M, V) +#define ldx2(T, M, V) \ + ldx(T, 1, M, (M##B1), V) \ + ldx4(T, M, V) + +#define stf(T, N, O, V) \ + jit_movi##T(_jit, JIT_F0, V); \ + jit_stxi##T(_jit, O, JIT_V0, JIT_F0); + +#define stf8(T, M, V) \ + stf(T, 3, (M##B3), V) \ + stf(T, 4, (M##B4), V) \ + stf(T, 5, (M##B5), V) \ + stf(T, 6, (M##B6), V) \ + stf(T, 7, (M##B7), V) \ + stf(T, 8, (M##B8), V) \ + stf(T, 9, (M##B9), V) \ + stf(T, 10, (M##B10), V) \ + stf(T, 11, (M##B11), V) \ + stf(T, 12, (M##B12), V) \ + stf(T, 13, (M##B13), V) \ + stf(T, 14, (M##B14), V) \ + stf(T, 15, (M##B15), V) \ + stf(T, 16, (M##B16), V) \ + stf(T, 17, (M##B17), V) \ + stf(T, 18, (M##B18), V) \ + stf(T, 19, (M##B19), V) \ + stf(T, 20, (M##B20), V) \ + stf(T, 21, (M##B21), V) \ + stf(T, 22, (M##B22), V) \ + stf(T, 23, (M##B23), V) \ + stf(T, 24, (M##B24), V) \ + stf(T, 25, (M##B25), V) \ + stf(T, 26, (M##B26), V) + +#define stf4(T, M, V) \ + stf(T, 2, (M##B2), V) \ + stf8(T, M, V) + +#define ldf(T, N, M, O, V) \ +{ \ + dump_args(T, N, M, O, V); \ + jit_movi##T(_jit, JIT_F0, 0); \ + jit_ldxi##T(_jit, JIT_F0, JIT_V0, O); \ + jit_movi##T(_jit, JIT_F1, V); \ + jit_reloc_t r = jit_beqr##T(_jit, JIT_F0, JIT_F1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} + +#define ldf8(T, M, V) \ + ldf(T, 3, M, (M##B3), V) \ + ldf(T, 4, M, (M##B4), V) \ + ldf(T, 5, M, (M##B5), V) \ + ldf(T, 6, M, (M##B6), V) \ + ldf(T, 7, M, (M##B7), V) \ + ldf(T, 8, M, (M##B8), V) \ + ldf(T, 9, M, (M##B9), V) \ + ldf(T, 10, M, (M##B10), V) \ + ldf(T, 11, M, (M##B11), V) \ + ldf(T, 12, M, (M##B12), V) \ + ldf(T, 13, M, (M##B13), V) \ + ldf(T, 14, M, (M##B14), V) \ + ldf(T, 15, M, (M##B15), V) \ + ldf(T, 16, M, (M##B16), V) \ + ldf(T, 17, M, (M##B17), V) \ + ldf(T, 18, M, (M##B18), V) \ + ldf(T, 19, M, (M##B19), V) \ + ldf(T, 20, M, (M##B20), V) \ + ldf(T, 21, M, (M##B21), V) \ + ldf(T, 22, M, (M##B22), V) \ + ldf(T, 23, M, (M##B23), V) \ + ldf(T, 24, M, (M##B24), V) \ + ldf(T, 25, M, (M##B25), V) \ + ldf(T, 26, M, (M##B26), V) +#define ldf4(T, M, V) \ + ldf(T, 2, M, (M##B2), V) \ + ldf8(T, M, V) + +#define ldst_c() \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \ + stx2(_c, a, 0x5a) \ + ldx2(_c, a, 0x5a) \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \ + stx2(_c, b, 0x5a) \ + ldx2(_c, b, 0x5a) + +#define ldst_uc() \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \ + stx2(_c, a, 0x5a) \ + ldx2(_uc, a, 0x5a) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \ + stx2(_c, b, 0x5a) \ + ldx2(_uc, b, 0x5a) + +#define ldst_s() \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \ + stx2(_s, a, 0x5a5a) \ + ldx2(_s, a, 0x5a5a) \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \ + stx2(_s, b, 0x5a5a) \ + ldx2(_s, b, 0x5a5a) + +#define ldst_us() \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \ + stx2(_s, a, 0x5a5a) \ + ldx2(_us, a, 0x5a5a) \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \ + stx2(_s, b, 0x5a5a) \ + ldx2(_us, b, 0x5a5a) + +#define ldst_i() \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \ + stx4(_i, a, 0x5a5a5a5a) \ + ldx4(_i, a, 0x5a5a5a5a) \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \ + stx4(_i, b, 0x5a5a5a5a) \ + ldx4(_i, b, 0x5a5a5a5a) + +#define ldst_ui() \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \ + stx4(_i, a, 0x5a5a5a5a) \ + ldx4(_ui, a, 0x5a5a5a5a) \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \ + stx4(_i, b, 0x5a5a5a5a) \ + ldx4(_ui, b, 0x5a5a5a5a) + +#define ldst_l() \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \ + stx8(_l, a, 0x5a5a5a5a5a5a5a5a) \ + ldx8(_l, a, 0x5a5a5a5a5a5a5a5a) \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \ + stx8(_l, b, 0x5a5a5a5a5a5a5a5a) \ + ldx8(_l, b, 0x5a5a5a5a5a5a5a5a) + +#define ldst_f() \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \ + stf4(_f, a, 0.5) \ + ldf4(_f, a, 0.5) \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \ + stf4(_f, b, 0.5) \ + ldf4(_f, b, 0.5) + +#define ldst_d() \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \ + stf8(_d, a, 0.5) \ + ldf8(_d, a, 0.5) \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \ + stf8(_d, b, 0.5) \ + ldf8(_d, b, 0.5) + +static size_t +run_test(jit_state_t *_jit, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(_jit, arena_base, arena_size); + size_t frame = jit_enter_jit_abi(_jit, 3, 0, 0); + + void (*function)(void); + + char *buf = malloc(M64 + 8); + ASSERT(buf); + + alu(add) + alu(sub) + alu(mul) + alu(div) + alu(rem) + lsh() + rsh() + rsh_u() + alu(and) + alu(or) + alu(xor) + ldst_c() + ldst_uc() + ldst_s() + ldst_us() + ldst_i() +#if __WORDSIZE == 64 + ldst_ui() + ldst_l() +#endif + ldst_f() + ldst_d() + + jit_leave_jit_abi(_jit, 3, 0, frame); + jit_ret(_jit); + + size_t size = 0; + function = jit_end(_jit, &size); + + if (function) + (*function)(); + else { + free(buf); + return size; + } + + free(buf); + return 0; +} + +int main(int argc, char *argv[]) +{ + return main_compiler(argc, argv, run_test); +} diff --git a/deps/lightening/tests/z_ranger.c b/deps/lightening/tests/z_ranger.c new file mode 100644 index 0000000..aa9eadd --- /dev/null +++ b/deps/lightening/tests/z_ranger.c @@ -0,0 +1,580 @@ +#include "test.h" + +#define M64 67108864 + +#define aB1 (1<<1) +#define aB2 (1<<2) +#define aB3 (1<<3) +#define aB4 (1<<4) +#define aB5 (1<<5) +#define aB6 (1<<6) +#define aB7 (1<<7) +#define aB8 (1<<8) +#define aB9 (1<<9) +#define aB10 (1<<10) +#define aB11 (1<<11) +#define aB12 (1<<12) +#define aB13 (1<<13) +#define aB14 (1<<14) +#define aB15 (1<<15) +#define aB16 (1<<16) +#define aB17 (1<<17) +#define aB18 (1<<18) +#define aB19 (1<<19) +#define aB20 (1<<20) +#define aB21 (1<<21) +#define aB22 (1<<22) +#define aB23 (1<<23) +#define aB24 (1<<24) +#define aB25 (1<<25) +#define aB26 (1<<26) +#define bB1 (-aB1) +#define bB2 (-aB2) +#define bB3 (-aB3) +#define bB4 (-aB4) +#define bB5 (-aB5) +#define bB6 (-aB6) +#define bB7 (-aB7) +#define bB8 (-aB8) +#define bB9 (-aB9) +#define bB10 (-aB10) +#define bB11 (-aB11) +#define bB12 (-aB12) +#define bB13 (-aB13) +#define bB14 (-aB14) +#define bB15 (-aB15) +#define bB16 (-aB16) +#define bB17 (-aB17) +#define bB18 (-aB18) +#define bB19 (-aB19) +#define bB20 (-aB20) +#define bB21 (-aB21) +#define bB22 (-aB22) +#define bB23 (-aB23) +#define bB24 (-aB24) +#define bB25 (-aB25) +#define bB26 (-aB26) +#define cB1 (aB1-1) +#define cB2 (aB2-1) +#define cB3 (aB3-1) +#define cB4 (aB4-1) +#define cB5 (aB5-1) +#define cB6 (aB6-1) +#define cB7 (aB7-1) +#define cB8 (aB8-1) +#define cB9 (aB9-1) +#define cB10 (aB10-1) +#define cB11 (aB11-1) +#define cB12 (aB12-1) +#define cB13 (aB13-1) +#define cB14 (aB14-1) +#define cB15 (aB15-1) +#define cB16 (aB16-1) +#define cB17 (aB17-1) +#define cB18 (aB18-1) +#define cB19 (aB19-1) +#define cB20 (aB20-1) +#define cB21 (aB21-1) +#define cB22 (aB22-1) +#define cB23 (aB23-1) +#define cB24 (aB24-1) +#define cB25 (aB25-1) +#define cB26 (aB26-1) +#define dB1 (-aB1+1) +#define dB2 (-aB2+1) +#define dB3 (-aB3+1) +#define dB4 (-aB4+1) +#define dB5 (-aB5+1) +#define dB6 (-aB6+1) +#define dB7 (-aB7+1) +#define dB8 (-aB8+1) +#define dB9 (-aB9+1) +#define dB10 (-aB10+1) +#define dB11 (-aB11+1) +#define dB12 (-aB12+1) +#define dB13 (-aB13+1) +#define dB14 (-aB14+1) +#define dB15 (-aB15+1) +#define dB16 (-aB16+1) +#define dB17 (-aB17+1) +#define dB18 (-aB18+1) +#define dB19 (-aB19+1) +#define dB20 (-aB20+1) +#define dB21 (-aB21+1) +#define dB22 (-aB22+1) +#define dB23 (-aB23+1) +#define dB24 (-aB24+1) +#define dB25 (-aB25+1) +#define dB26 (-aB26+1) + +#define add(a, b) (a + b) +#define sub(a, b) (a - b) +#define mul(a, b) (a * b) +#define div(a, b) (a / b) +#define rem(a, b) (a % b) +#define and(a, b) (a & b) +#define or(a, b) (a | b) +#define xor(a, b) (a ^ b) + +#if defined(DEBUG) +#define dump_args(N, X, L, R, V)\ + jit_calli_1(_jit, puts,\ + jit_operand_imm(JIT_OPERAND_ABI_POINTER,\ + (jit_imm_t)#N " " #X " " #L " " #R " " #V)) +#else +#define dump_args(N, X, L, R, V) +#endif + +#define jit_rshi_ui jit_rshi_u + +#define alu2(N, X, L, R, V) \ +{ \ + dump_args(N, X, L, R, V); \ + jit_movi(_jit, JIT_R1, L); \ + jit_movi(_jit, JIT_R2, R); \ + jit_##N##r(_jit, JIT_R0, JIT_R1, JIT_R2); \ + jit_reloc_t r = jit_beqi(_jit, JIT_R0, V); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} + +#define alu1(N, M) \ + alu2(N, N##M##1, 3, (M##1), N(3, M##1)) \ + alu2(N, N##M##2, 3, (M##2), N(3, M##2)) \ + alu2(N, N##M##3, 3, (M##3), N(3, M##3)) \ + alu2(N, N##M##4, 3, (M##4), N(3, M##4)) \ + alu2(N, N##M##5, 3, (M##5), N(3, M##5)) \ + alu2(N, N##M##6, 3, (M##6), N(3, M##6)) \ + alu2(N, N##M##7, 3, (M##7), N(3, M##7)) \ + alu2(N, N##M##8, 3, (M##8), N(3, M##8)) \ + alu2(N, N##M##9, 3, (M##9), N(3, M##9)) \ + alu2(N, N##M##10, 3, (M##10), N(3, M##10)) \ + alu2(N, N##M##11, 3, (M##11), N(3, M##11)) \ + alu2(N, N##M##12, 3, (M##12), N(3, M##12)) \ + alu2(N, N##M##13, 3, (M##13), N(3, M##13)) \ + alu2(N, N##M##14, 3, (M##14), N(3, M##14)) \ + alu2(N, N##M##15, 3, (M##15), N(3, M##15)) \ + alu2(N, N##M##16, 3, (M##16), N(3, M##16)) \ + alu2(N, N##M##17, 3, (M##17), N(3, M##17)) \ + alu2(N, N##M##18, 3, (M##18), N(3, M##18)) \ + alu2(N, N##M##19, 3, (M##19), N(3, M##19)) \ + alu2(N, N##M##20, 3, (M##20), N(3, M##20)) \ + alu2(N, N##M##21, 3, (M##21), N(3, M##21)) \ + alu2(N, N##M##22, 3, (M##22), N(3, M##22)) \ + alu2(N, N##M##23, 3, (M##23), N(3, M##23)) \ + alu2(N, N##M##24, 3, (M##24), N(3, M##24)) \ + alu2(N, N##M##25, 3, (M##25), N(3, M##25)) \ + alu2(N, N##M##26, 3, (M##26), N(3, M##26)) + +#define alu(N) \ + alu1(N, aB) \ + alu1(N, bB) \ + alu1(N, cB) \ + alu1(N, dB) + +#define _lsh(N) \ + alu2(lsh, L##N, 1, N, (1L<<N)) + +#if __WORDSIZE == 64 +#define _rsh(N) \ + alu2(rsh, R##N, (1L<<63), N, ((1L<<63)>>N)) + +#define _rush(N) \ + alu2(rsh_u, R##N, (1UL<<63), N, ((1UL<<63)>>N)) +#else +#define _rsh(N) \ + alu2(rsh, R##N, (1L<<31), N, ((1L<<31)>>N)) + +#define _rush(N) \ + alu2(rsh_u, R##N, (1UL<<31), N, ((1UL<<31)>>N)) +#endif + +#if __WORDSIZE == 32 +# define xsh64(X) /**/ +#else +# define xsh64(X) \ + _##X##sh(32) \ + _##X##sh(33) \ + _##X##sh(34) \ + _##X##sh(35) \ + _##X##sh(36) \ + _##X##sh(37) \ + _##X##sh(38) \ + _##X##sh(39) \ + _##X##sh(40) \ + _##X##sh(41) \ + _##X##sh(42) \ + _##X##sh(43) \ + _##X##sh(44) \ + _##X##sh(45) \ + _##X##sh(46) \ + _##X##sh(47) \ + _##X##sh(48) \ + _##X##sh(49) \ + _##X##sh(50) \ + _##X##sh(51) \ + _##X##sh(52) \ + _##X##sh(53) \ + _##X##sh(54) \ + _##X##sh(55) \ + _##X##sh(56) \ + _##X##sh(57) \ + _##X##sh(58) \ + _##X##sh(59) \ + _##X##sh(60) \ + _##X##sh(61) \ + _##X##sh(62) \ + _##X##sh(63) +#endif + +#define xsh(X) \ + _##X##sh(0) \ + _##X##sh(1) \ + _##X##sh(2) \ + _##X##sh(3) \ + _##X##sh(4) \ + _##X##sh(5) \ + _##X##sh(6) \ + _##X##sh(7) \ + _##X##sh(8) \ + _##X##sh(9) \ + _##X##sh(10) \ + _##X##sh(11) \ + _##X##sh(12) \ + _##X##sh(13) \ + _##X##sh(14) \ + _##X##sh(15) \ + _##X##sh(16) \ + _##X##sh(17) \ + _##X##sh(18) \ + _##X##sh(19) \ + _##X##sh(20) \ + _##X##sh(21) \ + _##X##sh(22) \ + _##X##sh(23) \ + _##X##sh(24) \ + _##X##sh(25) \ + _##X##sh(26) \ + _##X##sh(27) \ + _##X##sh(28) \ + _##X##sh(29) \ + _##X##sh(30) \ + _##X##sh(31) \ + xsh64(X) + +#define lsh() \ + xsh(l) + +#define rsh() \ + xsh(r) + +#define rsh_u() \ + xsh(ru) + +#define reset(V) \ + jit_calli_3(_jit, memset, \ + jit_operand_imm(JIT_OPERAND_ABI_POINTER, (jit_imm_t)buf),\ + jit_operand_imm(JIT_OPERAND_ABI_INT32, V), \ + jit_operand_imm(JIT_OPERAND_ABI_UINT32, M64 + 8)); + +#define stx(T, N, O, V) \ + jit_movi(_jit, JIT_R0, V); \ + jit_movi(_jit, JIT_R1, O); \ + jit_stxr##T(_jit, JIT_R1, JIT_V0, JIT_R0); + +#define stx8(T, M, V) \ + stx(T, 3, (M##B3), V) \ + stx(T, 4, (M##B4), V) \ + stx(T, 5, (M##B5), V) \ + stx(T, 6, (M##B6), V) \ + stx(T, 7, (M##B7), V) \ + stx(T, 8, (M##B8), V) \ + stx(T, 9, (M##B9), V) \ + stx(T, 10, (M##B10), V) \ + stx(T, 11, (M##B11), V) \ + stx(T, 12, (M##B12), V) \ + stx(T, 13, (M##B13), V) \ + stx(T, 14, (M##B14), V) \ + stx(T, 15, (M##B15), V) \ + stx(T, 16, (M##B16), V) \ + stx(T, 17, (M##B17), V) \ + stx(T, 18, (M##B18), V) \ + stx(T, 19, (M##B19), V) \ + stx(T, 20, (M##B20), V) \ + stx(T, 21, (M##B21), V) \ + stx(T, 22, (M##B22), V) \ + stx(T, 23, (M##B23), V) \ + stx(T, 24, (M##B24), V) \ + stx(T, 25, (M##B25), V) \ + stx(T, 26, (M##B26), V) + +#define stx4(T, M, V) \ + stx(T, 2, (M##B2), V) \ + stx8(T, M, V) + +#define stx2(T, M, V) \ + stx(T, 1, (M##B1), V) \ + stx4(T, M, V) + +#define ldx(T, N, M, O, V) \ +{ \ + dump_args(T, N, M, O, V); \ + jit_movi(_jit, JIT_R0, 0); \ + jit_ldxi##T(_jit, JIT_R0, JIT_V0, O); \ + jit_reloc_t r = jit_beqi(_jit, JIT_R0, V); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} + +#define ldx8(T, M, V) \ + ldx(T, 3, M, (M##B3), V) \ + ldx(T, 4, M, (M##B4), V) \ + ldx(T, 5, M, (M##B5), V) \ + ldx(T, 6, M, (M##B6), V) \ + ldx(T, 7, M, (M##B7), V) \ + ldx(T, 8, M, (M##B8), V) \ + ldx(T, 9, M, (M##B9), V) \ + ldx(T, 10, M, (M##B10), V) \ + ldx(T, 11, M, (M##B11), V) \ + ldx(T, 12, M, (M##B12), V) \ + ldx(T, 13, M, (M##B13), V) \ + ldx(T, 14, M, (M##B14), V) \ + ldx(T, 15, M, (M##B15), V) \ + ldx(T, 16, M, (M##B16), V) \ + ldx(T, 17, M, (M##B17), V) \ + ldx(T, 18, M, (M##B18), V) \ + ldx(T, 19, M, (M##B19), V) \ + ldx(T, 20, M, (M##B20), V) \ + ldx(T, 21, M, (M##B21), V) \ + ldx(T, 22, M, (M##B22), V) \ + ldx(T, 23, M, (M##B23), V) \ + ldx(T, 24, M, (M##B24), V) \ + ldx(T, 25, M, (M##B25), V) \ + ldx(T, 26, M, (M##B26), V) + +#define ldx4(T, M, V) \ + ldx(T, 2, M, (M##B2), V) \ + ldx8(T, M, V) + +#define ldx2(T, M, V) \ + ldx(T, 1, M, (M##B1), V) \ + ldx4(T, M, V) + +#define stf(T, N, O, V) \ + jit_movi##T(_jit, JIT_F0, V); \ + jit_movi(_jit, JIT_R0, O); \ + jit_stxr##T(_jit, JIT_R0, JIT_V0, JIT_F0); + +#define stf8(T, M, V) \ + stf(T, 3, (M##B3), V) \ + stf(T, 4, (M##B4), V) \ + stf(T, 5, (M##B5), V) \ + stf(T, 6, (M##B6), V) \ + stf(T, 7, (M##B7), V) \ + stf(T, 8, (M##B8), V) \ + stf(T, 9, (M##B9), V) \ + stf(T, 10, (M##B10), V) \ + stf(T, 11, (M##B11), V) \ + stf(T, 12, (M##B12), V) \ + stf(T, 13, (M##B13), V) \ + stf(T, 14, (M##B14), V) \ + stf(T, 15, (M##B15), V) \ + stf(T, 16, (M##B16), V) \ + stf(T, 17, (M##B17), V) \ + stf(T, 18, (M##B18), V) \ + stf(T, 19, (M##B19), V) \ + stf(T, 20, (M##B20), V) \ + stf(T, 21, (M##B21), V) \ + stf(T, 22, (M##B22), V) \ + stf(T, 23, (M##B23), V) \ + stf(T, 24, (M##B24), V) \ + stf(T, 25, (M##B25), V) \ + stf(T, 26, (M##B26), V) + +#define stf4(T, M, V) \ + stf(T, 2, (M##B2), V) \ + stf8(T, M, V) + +#define ldf(T, N, M, O, V) \ +{ \ + dump_args(T, N, M, O, V); \ + jit_movi##T(_jit, JIT_F0, 0); \ + jit_ldxi##T(_jit, JIT_F0, JIT_V0, O); \ + jit_movi##T(_jit, JIT_F1, V); \ + jit_reloc_t r = jit_beqr##T(_jit, JIT_F0, JIT_F1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} + +#define ldf8(T, M, V) \ + ldf(T, 3, M, (M##B3), V) \ + ldf(T, 4, M, (M##B4), V) \ + ldf(T, 5, M, (M##B5), V) \ + ldf(T, 6, M, (M##B6), V) \ + ldf(T, 7, M, (M##B7), V) \ + ldf(T, 8, M, (M##B8), V) \ + ldf(T, 9, M, (M##B9), V) \ + ldf(T, 10, M, (M##B10), V) \ + ldf(T, 11, M, (M##B11), V) \ + ldf(T, 12, M, (M##B12), V) \ + ldf(T, 13, M, (M##B13), V) \ + ldf(T, 14, M, (M##B14), V) \ + ldf(T, 15, M, (M##B15), V) \ + ldf(T, 16, M, (M##B16), V) \ + ldf(T, 17, M, (M##B17), V) \ + ldf(T, 18, M, (M##B18), V) \ + ldf(T, 19, M, (M##B19), V) \ + ldf(T, 20, M, (M##B20), V) \ + ldf(T, 21, M, (M##B21), V) \ + ldf(T, 22, M, (M##B22), V) \ + ldf(T, 23, M, (M##B23), V) \ + ldf(T, 24, M, (M##B24), V) \ + ldf(T, 25, M, (M##B25), V) \ + ldf(T, 26, M, (M##B26), V) +#define ldf4(T, M, V) \ + ldf(T, 2, M, (M##B2), V) \ + ldf8(T, M, V) + +#define ldst_c() \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \ + stx2(_c, a, 0x5a) \ + ldx2(_c, a, 0x5a) \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \ + stx2(_c, b, 0x5a) \ + ldx2(_c, b, 0x5a) + +#define ldst_uc() \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \ + stx2(_c, a, 0x5a) \ + ldx2(_uc, a, 0x5a) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \ + stx2(_c, b, 0x5a) \ + ldx2(_uc, b, 0x5a) + +#define ldst_s() \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \ + stx2(_s, a, 0x5a5a) \ + ldx2(_s, a, 0x5a5a) \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \ + stx2(_s, b, 0x5a5a) \ + ldx2(_s, b, 0x5a5a) + +#define ldst_us() \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \ + stx2(_s, a, 0x5a5a) \ + ldx2(_us, a, 0x5a5a) \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \ + stx2(_s, b, 0x5a5a) \ + ldx2(_us, b, 0x5a5a) + +#define ldst_i() \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \ + stx4(_i, a, 0x5a5a5a5a) \ + ldx4(_i, a, 0x5a5a5a5a) \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \ + stx4(_i, b, 0x5a5a5a5a) \ + ldx4(_i, b, 0x5a5a5a5a) + +#define ldst_ui() \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \ + stx4(_i, a, 0x5a5a5a5a) \ + ldx4(_ui, a, 0x5a5a5a5a) \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \ + stx4(_i, b, 0x5a5a5a5a) \ + ldx4(_ui, b, 0x5a5a5a5a) + +#define ldst_l() \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \ + stx8(_l, a, 0x5a5a5a5a5a5a5a5a) \ + ldx8(_l, a, 0x5a5a5a5a5a5a5a5a) \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \ + stx8(_l, b, 0x5a5a5a5a5a5a5a5a) \ + ldx8(_l, b, 0x5a5a5a5a5a5a5a5a) + +#define ldst_f() \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \ + stf4(_f, a, 0.5) \ + ldf4(_f, a, 0.5) \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \ + stf4(_f, b, 0.5) \ + ldf4(_f, b, 0.5) + +#define ldst_d() \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)buf); \ + stf8(_d, a, 0.5) \ + ldf8(_d, a, 0.5) \ + reset(0xa5) \ + jit_movi(_jit, JIT_V0, (jit_imm_t)(buf + M64)); \ + stf8(_d, b, 0.5) \ + ldf8(_d, b, 0.5) + +static size_t +run_test(jit_state_t *_jit, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(_jit, arena_base, arena_size); + size_t frame = jit_enter_jit_abi(_jit, 3, 0, 0); + + void (*function)(void); + + char *buf = malloc(M64 + 8); + ASSERT(buf); + + alu(add) + alu(sub) + alu(mul) + alu(div) + alu(rem) + lsh() + rsh() + alu(and) + alu(or) + alu(xor) + ldst_c() + ldst_uc() + ldst_s() + ldst_us() + ldst_i() +#if __WORDSIZE == 64 + ldst_ui() + ldst_l() +#endif + ldst_f() + ldst_d() + + jit_leave_jit_abi(_jit, 3, 0, frame); + jit_ret(_jit); + + size_t size = 0; + function = jit_end(_jit, &size); + + if (function) + (*function)(); + else { + free(buf); + return size; + } + + free(buf); + return 0; +} + +int main(int argc, char *argv[]) +{ + return main_compiler(argc, argv, run_test); +} diff --git a/deps/lightening/tests/z_stack.c b/deps/lightening/tests/z_stack.c new file mode 100644 index 0000000..0ee9590 --- /dev/null +++ b/deps/lightening/tests/z_stack.c @@ -0,0 +1,374 @@ +#include "test.h" + +#if defined(DEBUG) +#define dump_args(N, M, T) \ + jit_calli_1(_jit, puts, \ + jit_operand_imm(JIT_OPERAND_ABI_POINTER, \ + (jit_imm_t)#N " " #M " " #T)); +#else +#define dump_args(N, M, T) +#endif + +#define szof_c 1 +#define szof_uc szof_c +#define szof_s 2 +#define szof_us szof_s +#define szof_i 4 +#if __WORDSIZE == 64 +# define szof_ui szof_i +# define szof_l 8 +#endif +#define szof_max 8 + +#define operand_c JIT_OPERAND_ABI_INT8 +#define operand_uc JIT_OPERAND_ABI_UINT8 +#define operand_s JIT_OPERAND_ABI_INT16 +#define operand_us JIT_OPERAND_ABI_UINT16 +#define operand_i JIT_OPERAND_ABI_INT32 +#define operand_ui JIT_OPERAND_ABI_UINT32 +#define operand_l JIT_OPERAND_ABI_INT64 +#define operand_ul JIT_OPERAND_ABI_UINT64 +#define operand_f JIT_OPERAND_ABI_FLOAT +#define operand_d JIT_OPERAND_ABI_DOUBLE + +#define FILL(T) \ + void *fill##T = jit_address(_jit); \ +{ \ + size_t frame = jit_enter_jit_abi(_jit, 2, 0, 0); \ + jit_load_args_2(_jit, \ + jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V0), \ + jit_operand_gpr(JIT_OPERAND_ABI_UINT32, JIT_R0)); \ + \ + jit_muli(_jit, JIT_R0, JIT_R0, szof##T); \ + jit_addr(_jit, JIT_V1, JIT_V0, JIT_R0); \ + jit_movi(_jit, JIT_R0, 0); \ + \ + void *loop = jit_address(_jit); \ + jit_reloc_t done = jit_bger(_jit, JIT_V0, JIT_V1); \ + jit_str##T(_jit, JIT_V0, JIT_R0); \ + jit_addi(_jit, JIT_R0, JIT_R0, 1); \ + jit_addi(_jit, JIT_V0, JIT_V0, szof##T); \ + jit_jmpi(_jit, loop); \ + \ + jit_patch_here(_jit, done); \ + jit_leave_jit_abi(_jit, 2, 0, frame); \ + jit_ret(_jit); \ +} + +#define fill_uc fill_c +#define fill_us fill_s +#define fill_ui fill_i + +#define ARG( T, N) jit_operand_mem(operand##T, JIT_SP, - ((N + 1) * szof##T)) + +#define ARG1( K, T) ARG##K(T, 0) +#define ARG2( K, T) ARG1( K, T), ARG##K(T, 1) +#define ARG3( K, T) ARG2( K, T), ARG##K(T, 2) +#define ARG4( K, T) ARG3( K, T), ARG##K(T, 3) +#define ARG5( K, T) ARG4( K, T), ARG##K(T, 4) +#define ARG6( K, T) ARG5( K, T), ARG##K(T, 5) +#define ARG7( K, T) ARG6( K, T), ARG##K(T, 6) +#define ARG8( K, T) ARG7( K, T), ARG##K(T, 7) +#define ARG9( K, T) ARG8( K, T), ARG##K(T, 8) +#define ARG10(K, T) ARG9( K, T), ARG##K(T, 9) +#define ARG11(K, T) ARG10(K, T), ARG##K(T, 10) +#define ARG12(K, T) ARG11(K, T), ARG##K(T, 11) +#define ARG13(K, T) ARG12(K, T), ARG##K(T, 12) +#define ARG14(K, T) ARG13(K, T), ARG##K(T, 13) +#define ARG15(K, T) ARG14(K, T), ARG##K(T, 14) +#define ARG16(K, T) ARG15(K, T), ARG##K(T, 15) +#define ARG_c(N) ARG##N( , _c) +#define ARG_uc(N) ARG##N( , _uc) +#define ARG_s(N) ARG##N( , _s) +#define ARG_us(N) ARG##N( , _us) +#define ARG_i(N) ARG##N( , _i) +#define ARG_ui(N) ARG##N( , _ui) +#define ARG_l(N) ARG##N( , _l) +#define ARG_f(N) ARG##N(F, _f) +#define ARG_d(N) ARG##N(F, _d) + +#define CHK(N, T, V) \ +{ \ + jit_ldxi##T(_jit, JIT_R0, JIT_SP, arg_space - ((V + 1) * szof##T)); \ + jit_ldxi##T(_jit, JIT_R1, JIT_V0, (V * szof##T)); \ + jit_reloc_t r = jit_beqr(_jit, JIT_R0, JIT_R1); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ +} + +#define GET1( K, N, T, V) CHK##K(N, T, 0) +#define GET2( K, N, T, V) GET1( K, N, T, V) CHK##K(N, T, 1) +#define GET3( K, N, T, V) GET2( K, N, T, V) CHK##K(N, T, 2) +#define GET4( K, N, T, V) GET3( K, N, T, V) CHK##K(N, T, 3) +#define GET5( K, N, T, V) GET4( K, N, T, V) CHK##K(N, T, 4) +#define GET6( K, N, T, V) GET5( K, N, T, V) CHK##K(N, T, 5) +#define GET7( K, N, T, V) GET6( K, N, T, V) CHK##K(N, T, 6) +#define GET8( K, N, T, V) GET7( K, N, T, V) CHK##K(N, T, 7) +#define GET9( K, N, T, V) GET8( K, N, T, V) CHK##K(N, T, 8) +#define GET10(K, N, T, V) GET9( K, N, T, V) CHK##K(N, T, 9) +#define GET11(K, N, T, V) GET10(K, N, T, V) CHK##K(N, T, 10) +#define GET12(K, N, T, V) GET11(K, N, T, V) CHK##K(N, T, 11) +#define GET13(K, N, T, V) GET12(K, N, T, V) CHK##K(N, T, 12) +#define GET14(K, N, T, V) GET13(K, N, T, V) CHK##K(N, T, 13) +#define GET15(K, N, T, V) GET14(K, N, T, V) CHK##K(N, T, 14) +#define GET16(K, N, T, V) GET15(K, N, T, V) CHK##K(N, T, 15) + +#define GET_c(N, M) GET##N( , c##N, _c, M) +#define GET_uc(N, M) GET##N( , uc##N, _uc, M) +#define GET_s(N, M) GET##N( , s##N, _s, M) +#define GET_us(N, M) GET##N( , us##N, _us, M) +#define GET_i(N, M) GET##N( , i##N, _i, M) +#define GET_ui(N, M) GET##N( , ui##N, _ui, M) +#define GET_l(N, M) GET##N( , l##N, _l, M) +#define GET_f(N, M) GET##N(F, f##N, _f, M) +#define GET_d(N, M) GET##N(F, d##N, _d, M) + +#define PUSH( T, V) jit_operand_imm(operand##T, V) +#define PUSH0( K, T) /**/ +#define PUSH1( K, T) PUSH##K(T, 0) +#define PUSH2( K, T) PUSH1( K, T), PUSH##K(T, 1) +#define PUSH3( K, T) PUSH2( K, T), PUSH##K(T, 2) +#define PUSH4( K, T) PUSH3( K, T), PUSH##K(T, 3) +#define PUSH5( K, T) PUSH4( K, T), PUSH##K(T, 4) +#define PUSH6( K, T) PUSH5( K, T), PUSH##K(T, 5) +#define PUSH7( K, T) PUSH6( K, T), PUSH##K(T, 6) +#define PUSH8( K, T) PUSH7( K, T), PUSH##K(T, 7) +#define PUSH9( K, T) PUSH8( K, T), PUSH##K(T, 8) +#define PUSH10(K, T) PUSH9( K, T), PUSH##K(T, 9) +#define PUSH11(K, T) PUSH10(K, T), PUSH##K(T, 10) +#define PUSH12(K, T) PUSH11(K, T), PUSH##K(T, 11) +#define PUSH13(K, T) PUSH12(K, T), PUSH##K(T, 12) +#define PUSH14(K, T) PUSH13(K, T), PUSH##K(T, 13) +#define PUSH15(K, T) PUSH14(K, T), PUSH##K(T, 14) +#define PUSH16(K, T) PUSH15(K, T), PUSH##K(T, 15) + +#define PUSH_c( N) PUSH##N( , _c) +#define PUSH_uc(N) PUSH##N( , _uc) +#define PUSH_s( N) PUSH##N( , _s) +#define PUSH_us(N) PUSH##N( , _us) +#define PUSH_i( N) PUSH##N( , _i) +#define PUSH_ui(N) PUSH##N( , _ui) +#define PUSH_l( N) PUSH##N( , _l) +#define PUSH_f( N) PUSH##N(F, _f) +#define PUSH_d( N) PUSH##N(F, _d) + +/* bottom function */ +#define DEF0(T) \ + void *test##T##_0 = jit_address(_jit); \ +{ \ + size_t frame = jit_enter_jit_abi(_jit, 0, 0, 0); \ + dump_args(0, 0, T); \ + jit_leave_jit_abi(_jit, 0, 0, frame); \ + jit_ret(_jit); \ +} + +/* + * stack frame: + * | lightening reserved stuff - frame | + * |-----------------------------------| <- JIT_SP at entry + * | argument save area - arg_space | + * |-----------------------------------| <- JIT_SP during argument validation + * | stack buffer - stack | + * |-----------------------------------| <- JIT_SP during next call + * + * at entry, first push arguments in ascending order (0, 1, 2, ...) + * to stack, and afterwards move JIT_SP forward to not mess with the internal + * load_args stuff. + */ +#define DEFN(N, M, T) \ + void *test##T##_##N = jit_address(_jit); \ +{ \ + size_t frame = jit_enter_jit_abi(_jit, 3, 0, 0); \ + jit_operand_t args[] = \ + {jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V0), \ + ARG##T(N)}; \ + jit_load_args(_jit, N + 1, args); \ + \ + size_t arg_space = jit_align_stack(_jit, N * szof##T); \ + \ + dump_args(N, M, T); \ + \ + /* validate arguments */ \ + GET##T(N, M) \ + \ + /* heap buffer in %v1 */ \ + jit_calli_1(_jit, malloc, \ + jit_operand_imm(JIT_OPERAND_ABI_UINT32, N * szof##T)); \ + jit_retval(_jit, JIT_V1); \ + \ + /* copy stack buffer to heap buffer */ \ + jit_calli_3(_jit, memcpy, \ + jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V1), \ + jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V0), \ + jit_operand_imm(JIT_OPERAND_ABI_UINT32, N * szof##T)); \ + \ + /* stack buffer for next function in %v2 */ \ + size_t stack = jit_align_stack(_jit, M * szof##T); \ + jit_movr(_jit, JIT_V2, JIT_SP); \ + \ + /* fill stack buffer for next function */ \ + jit_calli_2(_jit, fill##T, \ + jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V2), \ + jit_operand_imm(JIT_OPERAND_ABI_UINT32, M)); \ + \ + /* call next function */ \ + jit_operand_t call_args[] = \ + {jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V2), \ + PUSH##T(M)}; \ + jit_calli(_jit, test##T##_##M, M + 1, call_args); \ + \ + /* validate stack buffer */ \ + jit_calli_3(_jit, memcmp, \ + jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V1), \ + jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V0), \ + jit_operand_imm(JIT_OPERAND_ABI_UINT32, N * szof##T)); \ + jit_retval(_jit, JIT_R0); \ + jit_reloc_t r = jit_beqi(_jit, JIT_R0, 0); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ + \ + /* release heap bufer */ \ + jit_calli_1(_jit, free, \ + jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V1)); \ + jit_shrink_stack(_jit, arg_space); \ + jit_shrink_stack(_jit, stack); \ + jit_leave_jit_abi(_jit, 3, 0, frame); \ + jit_ret(_jit); \ +} + +/* top function */ +#define DEFX(T) \ + void *test##T##_17 = jit_address(_jit); \ +{ \ + size_t frame = jit_enter_jit_abi(_jit, 3, 0, 0); \ + size_t arg_space = jit_align_stack(_jit, 16 * szof##T); \ + \ + dump_args(17, top, T) \ + \ + /* heap buffer in %v1 */ \ + jit_calli_1(_jit, malloc, \ + jit_operand_imm(JIT_OPERAND_ABI_UINT32, 16 * szof##T)); \ + jit_retval(_jit, JIT_V1); \ + \ + /* stack buffer for next function in %v2 */ \ + size_t stack = jit_align_stack(_jit, 16 * szof##T); \ + jit_movr(_jit, JIT_V2, JIT_SP); \ + \ + /* fill stack buffer for next function */ \ + jit_calli_2(_jit, fill##T, \ + jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V2), \ + jit_operand_imm(JIT_OPERAND_ABI_UINT32, 16)); \ + \ + /* copy stack buffer to heap buffer */ \ + jit_calli_3(_jit, memcpy, \ + jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V1), \ + jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V2), \ + jit_operand_imm(JIT_OPERAND_ABI_UINT32, 16 * szof##T)); \ + \ + /* call next function */ \ + jit_operand_t args[] = \ + {jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V2), \ + PUSH##T(16)}; \ + jit_calli(_jit, test##T##_16, 17, args); \ + \ + /* validate stack buffer */ \ + jit_calli_3(_jit, memcmp, \ + jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V1), \ + jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V2), \ + jit_operand_imm(JIT_OPERAND_ABI_UINT32, 16 * szof##T)); \ + jit_retval(_jit, JIT_R0); \ + jit_reloc_t r = jit_beqi(_jit, JIT_R0, 0); \ + jit_calli_0(_jit, abort); \ + jit_patch_here(_jit, r); \ + \ + /* release heap bufer */ \ + jit_calli_1(_jit, free, \ + jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_V1)); \ + /* technically speaking not necessary */ \ + /* jit_leave_jit_abi will shrink stack for us */ \ + jit_shrink_stack(_jit, arg_space); \ + jit_shrink_stack(_jit, stack); \ + jit_leave_jit_abi(_jit, 3, 0, frame); \ + jit_ret(_jit); \ +} + +#define DEF( T) \ + DEF0( T) \ + DEFN( 1, 0, T) \ + DEFN( 2, 1, T) \ + DEFN( 3, 2, T) \ + DEFN( 4, 3, T) \ + DEFN( 5, 4, T) \ + DEFN( 6, 5, T) \ + DEFN( 7, 6, T) \ + DEFN( 8, 7, T) \ + DEFN( 9, 8, T) \ + DEFN(10, 9, T) \ + DEFN(11, 10, T) \ + DEFN(12, 11, T) \ + DEFN(13, 12, T) \ + DEFN(14, 13, T) \ + DEFN(15, 14, T) \ + DEFN(16, 15, T) \ + DEFX(T) + +#define CALL(T) jit_calli_0(_jit, test##T##_17); + +static size_t +run_test(jit_state_t *_jit, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(_jit, arena_base, arena_size); + int32_t (*function)(); + + jit_reloc_t main = jit_jmp(_jit); + + FILL(_c) + FILL(_s) + FILL(_i) +#if __WORDSIZE == 64 + FILL(_l) +#endif + + DEF(_c) + DEF(_uc) + DEF(_s) + DEF(_us) + DEF(_i) +#if __WORDSIZE == 64 + DEF(_ui) + DEF(_l) +#endif + + jit_patch_here(_jit, main); + /* not sure about the actual number of registers, but too many can't + * hurt. */ + size_t frame = jit_enter_jit_abi(_jit, 3, 0, 0); + + CALL(_c) + CALL(_uc) + CALL(_s) + CALL(_us) + CALL(_i) +#if __WORDSIZE == 64 + CALL(_ui) + CALL(_l) +#endif + + jit_leave_jit_abi(_jit, 3, 0, frame); + jit_ret(_jit); + + size_t size; + function = jit_end(_jit, &size); + + if (function) + (*function)(); + else + return size; + + return 0; +} + +int main(int argc, char *argv[]) +{ + return main_compiler(argc, argv, run_test); +} |