diff options
Diffstat (limited to 'deps/lightening/lightening/x86.c')
-rw-r--r-- | deps/lightening/lightening/x86.c | 407 |
1 files changed, 407 insertions, 0 deletions
diff --git a/deps/lightening/lightening/x86.c b/deps/lightening/lightening/x86.c new file mode 100644 index 0000000..f8ac4b0 --- /dev/null +++ b/deps/lightening/lightening/x86.c @@ -0,0 +1,407 @@ +/* + * Copyright (C) 2012-2020 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#define _NOREG 0xffff + +typedef struct { + /* x87 present */ + uint32_t fpu : 1; + /* cmpxchg8b instruction */ + uint32_t cmpxchg8b : 1; + /* cmov and fcmov branchless conditional mov */ + uint32_t cmov : 1; + /* mmx registers/instructions available */ + uint32_t mmx : 1; + /* sse registers/instructions available */ + uint32_t sse : 1; + /* sse2 registers/instructions available */ + uint32_t sse2 : 1; + /* sse3 instructions available */ + uint32_t sse3 : 1; + /* pcmulqdq instruction */ + uint32_t pclmulqdq : 1; + /* ssse3 suplemental sse3 instructions available */ + uint32_t ssse3 : 1; + /* fused multiply/add using ymm state */ + uint32_t fma : 1; + /* cmpxchg16b instruction */ + uint32_t cmpxchg16b : 1; + /* sse4.1 instructions available */ + uint32_t sse4_1 : 1; + /* sse4.2 instructions available */ + uint32_t sse4_2 : 1; + /* movbe instruction available */ + uint32_t movbe : 1; + /* popcnt instruction available */ + uint32_t popcnt : 1; + /* aes instructions available */ + uint32_t aes : 1; + /* avx instructions available */ + uint32_t avx : 1; + /* lahf/sahf available in 64 bits mode */ + uint32_t lahf : 1; +} jit_cpu_t; + +static jit_cpu_t jit_cpu; + +static inline jit_reloc_t +emit_rel8_reloc (jit_state_t *_jit, uint8_t inst_start) +{ + uint8_t *loc = _jit->pc.uc; + emit_u8 (_jit, 0); + return jit_reloc(_jit, JIT_RELOC_REL8, inst_start, loc, _jit->pc.uc, 0); +} + +static inline jit_reloc_t +emit_rel32_reloc (jit_state_t *_jit, uint8_t inst_start) +{ + uint8_t *loc = _jit->pc.uc; + emit_u32 (_jit, 0); + return jit_reloc(_jit, JIT_RELOC_REL32, inst_start, loc, _jit->pc.uc, 0); +} + +#include "x86-cpu.c" +#include "x86-sse.c" + +jit_bool_t +jit_get_cpu(void) +{ + union { + struct { + uint32_t sse3 : 1; + uint32_t pclmulqdq : 1; + uint32_t dtes64 : 1; /* amd reserved */ + uint32_t monitor : 1; + uint32_t ds_cpl : 1; /* amd reserved */ + uint32_t vmx : 1; /* amd reserved */ + uint32_t smx : 1; /* amd reserved */ + uint32_t est : 1; /* amd reserved */ + uint32_t tm2 : 1; /* amd reserved */ + uint32_t ssse3 : 1; + uint32_t cntx_id : 1; /* amd reserved */ + uint32_t __reserved0 : 1; + uint32_t fma : 1; + uint32_t cmpxchg16b : 1; + uint32_t xtpr : 1; /* amd reserved */ + uint32_t pdcm : 1; /* amd reserved */ + uint32_t __reserved1 : 1; + uint32_t pcid : 1; /* amd reserved */ + uint32_t dca : 1; /* amd reserved */ + uint32_t sse4_1 : 1; + uint32_t sse4_2 : 1; + uint32_t x2apic : 1; /* amd reserved */ + uint32_t movbe : 1; /* amd reserved */ + uint32_t popcnt : 1; + uint32_t tsc : 1; /* amd reserved */ + uint32_t aes : 1; + uint32_t xsave : 1; + uint32_t osxsave : 1; + uint32_t avx : 1; + uint32_t __reserved2 : 1; /* amd F16C */ + uint32_t __reserved3 : 1; + uint32_t __alwayszero : 1; /* amd RAZ */ + } bits; + jit_uword_t cpuid; + } ecx; + union { + struct { + uint32_t fpu : 1; + uint32_t vme : 1; + uint32_t de : 1; + uint32_t pse : 1; + uint32_t tsc : 1; + uint32_t msr : 1; + uint32_t pae : 1; + uint32_t mce : 1; + uint32_t cmpxchg8b : 1; + uint32_t apic : 1; + uint32_t __reserved0 : 1; + uint32_t sep : 1; + uint32_t mtrr : 1; + uint32_t pge : 1; + uint32_t mca : 1; + uint32_t cmov : 1; + uint32_t pat : 1; + uint32_t pse36 : 1; + uint32_t psn : 1; /* amd reserved */ + uint32_t clfsh : 1; + uint32_t __reserved1 : 1; + uint32_t ds : 1; /* amd reserved */ + uint32_t acpi : 1; /* amd reserved */ + uint32_t mmx : 1; + uint32_t fxsr : 1; + uint32_t sse : 1; + uint32_t sse2 : 1; + uint32_t ss : 1; /* amd reserved */ + uint32_t htt : 1; + uint32_t tm : 1; /* amd reserved */ + uint32_t __reserved2 : 1; + uint32_t pbe : 1; /* amd reserved */ + } bits; + jit_uword_t cpuid; + } edx; +#if __X32 + int ac, flags; +#endif + jit_uword_t eax, ebx; + +#if __X32 + /* adapted from glibc __sysconf */ + __asm__ volatile ("pushfl;\n\t" + "popl %0;\n\t" + "movl $0x240000, %1;\n\t" + "xorl %0, %1;\n\t" + "pushl %1;\n\t" + "popfl;\n\t" + "pushfl;\n\t" + "popl %1;\n\t" + "xorl %0, %1;\n\t" + "pushl %0;\n\t" + "popfl" + : "=r" (flags), "=r" (ac)); + + /* i386 or i486 without cpuid */ + if ((ac & (1 << 21)) == 0) + /* probably without x87 as well */ + return 0; +#endif + + /* query %eax = 1 function */ + __asm__ volatile ( +#if __X32 || __X64_32 + "xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" +#else + "xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1" +#endif + : "=a" (eax), "=r" (ebx), + "=c" (ecx.cpuid), "=d" (edx.cpuid) + : "0" (1)); + + jit_cpu.fpu = edx.bits.fpu; + jit_cpu.cmpxchg8b = edx.bits.cmpxchg8b; + jit_cpu.cmov = edx.bits.cmov; + jit_cpu.mmx = edx.bits.mmx; + jit_cpu.sse = edx.bits.sse; + jit_cpu.sse2 = edx.bits.sse2; + jit_cpu.sse3 = ecx.bits.sse3; + jit_cpu.pclmulqdq = ecx.bits.pclmulqdq; + jit_cpu.ssse3 = ecx.bits.ssse3; + jit_cpu.fma = ecx.bits.fma; + jit_cpu.cmpxchg16b = ecx.bits.cmpxchg16b; + jit_cpu.sse4_1 = ecx.bits.sse4_1; + jit_cpu.sse4_2 = ecx.bits.sse4_2; + jit_cpu.movbe = ecx.bits.movbe; + jit_cpu.popcnt = ecx.bits.popcnt; + jit_cpu.aes = ecx.bits.aes; + jit_cpu.avx = ecx.bits.avx; + + /* query %eax = 0x80000001 function */ +#if __X64 + __asm__ volatile ( +# if __X64_32 + "xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" +# else + "xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1" +# endif + : "=a" (eax), "=r" (ebx), + "=c" (ecx.cpuid), "=d" (edx.cpuid) + : "0" (0x80000001)); + jit_cpu.lahf = ecx.cpuid & 1; +#endif + + return jit_cpu.sse2; +} + +jit_bool_t +jit_init(jit_state_t *_jit) +{ + return jit_cpu.sse2; +} + +static const jit_gpr_t abi_gpr_args[] = { +#if __X32 + /* No GPRs in args. */ +#elif __CYGWIN__ + _RCX, _RDX, _R8, _R9 +#else + _RDI, _RSI, _RDX, _RCX, _R8, _R9 +#endif +}; + +static const jit_fpr_t abi_fpr_args[] = { +#if __X32 + /* No FPRs in args. */ +#elif __CYGWIN__ + _XMM0, _XMM1, _XMM2, _XMM3 +#else + _XMM0, _XMM1, _XMM2, _XMM3, _XMM4, _XMM5, _XMM6, _XMM7 +#endif +}; + +static const int abi_gpr_arg_count = sizeof(abi_gpr_args) / sizeof(abi_gpr_args[0]); +static const int abi_fpr_arg_count = sizeof(abi_fpr_args) / sizeof(abi_fpr_args[0]); + +struct abi_arg_iterator +{ + const jit_operand_t *args; + size_t argc; + + size_t arg_idx; + size_t gpr_idx; + size_t fpr_idx; + size_t stack_size; + size_t stack_padding; +}; + +static size_t +jit_operand_abi_sizeof(enum jit_operand_abi abi) +{ + switch (abi) { + case JIT_OPERAND_ABI_UINT8: + case JIT_OPERAND_ABI_INT8: + return 1; + case JIT_OPERAND_ABI_UINT16: + case JIT_OPERAND_ABI_INT16: + return 2; + case JIT_OPERAND_ABI_UINT32: + case JIT_OPERAND_ABI_INT32: + return 4; + case JIT_OPERAND_ABI_UINT64: + case JIT_OPERAND_ABI_INT64: + return 8; + case JIT_OPERAND_ABI_POINTER: + return CHOOSE_32_64(4, 8); + case JIT_OPERAND_ABI_FLOAT: + return 4; + case JIT_OPERAND_ABI_DOUBLE: + return 8; + default: + abort(); + } +} + +static size_t +round_size_up_to_words(size_t bytes) +{ + size_t word_size = CHOOSE_32_64(4, 8); + size_t words = (bytes + word_size - 1) / word_size; + return words * word_size; +} + +static size_t +jit_initial_frame_size (void) +{ + return __WORDSIZE / 8; // Saved return address is on stack. +} + +static void +reset_abi_arg_iterator(struct abi_arg_iterator *iter, size_t argc, + const jit_operand_t *args) +{ + memset(iter, 0, sizeof *iter); + iter->argc = argc; + iter->args = args; +#if __CYGWIN__ && __X64 + // Reserve slots on the stack for 4 register parameters (8 bytes each). + iter->stack_size = 32; +#endif +} + +static void +next_abi_arg(struct abi_arg_iterator *iter, jit_operand_t *arg) +{ + ASSERT(iter->arg_idx < iter->argc); + enum jit_operand_abi abi = iter->args[iter->arg_idx].abi; + if (is_gpr_arg(abi) && iter->gpr_idx < abi_gpr_arg_count) { + *arg = jit_operand_gpr (abi, abi_gpr_args[iter->gpr_idx++]); +#ifdef __CYGWIN__ + iter->fpr_idx++; +#endif + } else if (is_fpr_arg(abi) && iter->fpr_idx < abi_fpr_arg_count) { + *arg = jit_operand_fpr (abi, abi_fpr_args[iter->fpr_idx++]); +#ifdef __CYGWIN__ + iter->gpr_idx++; +#endif + } else { + *arg = jit_operand_mem (abi, JIT_SP, iter->stack_size); + size_t bytes = jit_operand_abi_sizeof (abi); + iter->stack_size += round_size_up_to_words (bytes); + } + iter->arg_idx++; +} + +static void +jit_flush(void *fptr, void *tptr) +{ +} + +static inline size_t +jit_stack_alignment(void) +{ + return 16; +} + +static void +jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc, jit_pointer_t addr) +{ + uint8_t *loc = _jit->start + reloc.offset; + uint8_t *start = loc - reloc.inst_start_offset; + uint8_t *end = _jit->pc.uc; + jit_imm_t i0 = (jit_imm_t)addr; + + if (loc == start) + return; + + if (start < (uint8_t*)addr && (uint8_t*)addr <= end) + return; + + switch (reloc.kind) + { + case JIT_RELOC_ABSOLUTE: { + _jit->pc.uc = start; + ASSERT((loc[-1] & ~7) == 0xb8); // MOVI + int32_t r0 = loc[-1] & 7; + if (start != loc - 1) { + ASSERT(start == loc - 2); + r0 |= (loc[-2] & 1) << 3; + } + return movi(_jit, r0, i0); + } + case JIT_RELOC_REL8: + ASSERT((loc[-1] & ~0xf) == 0x70 || loc[-1] == 0xeb); // JCCSI or JMPSI + /* Nothing useful to do. */ + return; + case JIT_RELOC_REL32: + _jit->pc.uc = start; + if (start[0] == 0xe9) { // JMP + return jmpi(_jit, i0); + } + ASSERT(start[0] == 0x0f); // JCC + return jcci(_jit, start[1] & ~0x80, i0); + default: + /* We don't emit other kinds of reloc. */ + abort (); + } +} + +static void* +bless_function_pointer(void *ptr) +{ + return ptr; +} |