#include #include #include #include #include #include #include #include /* we have a couple assumptions running through this code: * 1) For each branch/jump, there's always a corresponding label * 2) The global value array is in the virtual register JIT_V0 * 3) At the start of a function, we immediately allocate f->max_sp slots, and * slot indexes are relative to JIT_SP * * Lightening is a very cool library that provides a kind of virtual assembler, * as in there's a number of virtual registers that map to physical registers * and each virtual instruction maps to some number of physical instructions. * This means that the user is required to handle register allocations, (virtual) * code generation and possible optimizations, but with some clever tricks we can * pretty much ignore these limitations completely. * * Each bytecode instruction gets compiled down to some number of virtual * Lightening instructions that perform the same task 'as if' the instruction was * being interpreted. This means each operation first loads its arguments from * memory to virtual registers, does whatever with them, and stores the result into * some memory location. This is pretty inefficient but very easy to implement, and * even a poor JIT is almost always faster than the best bytecode. * * As for register allocations, as long as we ensure that each bytecode instruction * effectively 'frees' the registers it used once it's been executed, we don't * need to worry about it. The values are safely stored in memory where the * input/output locs can fetch them later, and there are no register-register * dependencies between bytecode instructions. * * Each operation uses at most two caller-save registers to perform its operation. * The only callee-save register we use is JIT_V0 for the global array, so * we can ask Lightening to not save the other callee-save registers, saving a bit of * overhead when entering/exiting functions. * * There's no particularly good way to view the generated machine code, as it would * require adding something like binutils' BFD library as a dependency. You can * always just dump each code arena to a file and look at it later, but I * haven't implemented it. */ static void compile_fn(struct fn *f); static void *alloc_arena(size_t size) { return mmap(NULL, size, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); } static void free_arena(void *arena, size_t size) { munmap(arena, size); } static jit_operand_t formal_at(size_t i) { return jit_operand_mem(JIT_OPERAND_ABI_INT64, JIT_SP, i * sizeof(int64_t)); } /* load value from global array/stack location l and place it into virtual register r */ static void get(jit_state_t *j, jit_gpr_t r, struct loc l) { if (l.l) { jit_ldxi_l(j, r, JIT_SP, l.o * sizeof(int64_t)); return; } jit_ldxi_l(j, r, JIT_V0, l.o * sizeof(int64_t)); } /* store value from virtual register r to global array/stack location l*/ static void put(jit_state_t *j, jit_gpr_t r, struct loc l) { if (l.l) { jit_stxi_l(j, l.o * sizeof(int64_t), JIT_SP, r); return; } jit_stxi_l(j, l.o * sizeof(int64_t), JIT_V0, r); } static void compile_add(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); get(j, JIT_R1, i.i1); jit_addr(j, JIT_R0, JIT_R0, JIT_R1); put(j, JIT_R0, i.o); } static void compile_sub(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); get(j, JIT_R1, i.i1); jit_subr(j, JIT_R0, JIT_R0, JIT_R1); put(j, JIT_R0, i.o); } static void compile_mul(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); get(j, JIT_R1, i.i1); jit_mulr(j, JIT_R0, JIT_R0, JIT_R1); put(j, JIT_R0, i.o); } static void compile_div(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); get(j, JIT_R1, i.i1); jit_divr(j, JIT_R0, JIT_R0, JIT_R1); put(j, JIT_R0, i.o); } static void compile_move(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); put(j, JIT_R0, i.o); } static void compile_const(jit_state_t *j, struct insn i) { jit_movi(j, JIT_R0, i.v); put(j, JIT_R0, i.o); } static void compile_eq(jit_state_t *j, struct insn i) { /* Lightening doesn't really have any register-register comparison * instructions, so implement them as local branches */ get(j, JIT_R0, i.i0); get(j, JIT_R1, i.i1); jit_reloc_t branch = jit_beqr(j, JIT_R0, JIT_R1); jit_movi(j, JIT_R0, 0); jit_reloc_t jump = jit_jmp(j); jit_patch_there(j, branch, jit_address(j)); jit_movi(j, JIT_R0, 1); jit_patch_there(j, jump, jit_address(j)); put(j, JIT_R0, i.o); } static void compile_lt(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); get(j, JIT_R1, i.i1); jit_reloc_t branch = jit_bltr(j, JIT_R0, JIT_R1); jit_movi(j, JIT_R0, 0); jit_reloc_t jump = jit_jmp(j); jit_patch_there(j, branch, jit_address(j)); jit_movi(j, JIT_R0, 1); jit_patch_there(j, jump, jit_address(j)); put(j, JIT_R0, i.o); } /* helper function for compiling PRINT_DATE, Lightening doesn't handle variadics * so do the heavy lifting with GCC (or Clang or whatever) */ static void print_date(int64_t date) { char str[11]; date_to_string(str, (ph_date_t)date); printf("%s", str); } static void compile_print_date(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); jit_calli_1(j, print_date, jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R0)); } static void print_int(int64_t i) { printf("%lli", (long long)i); } static void compile_print_int(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); jit_calli_1(j, print_int, jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R0)); } static void print_string(const char *s) { printf("%s", s); } static void compile_print_string(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); jit_calli_1(j, print_string, jit_operand_gpr(JIT_OPERAND_ABI_POINTER, JIT_R0)); } static void print_bool(int64_t b) { printf("%s", b ? "true" : "false"); } static void compile_print_bool(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); jit_calli_1(j, print_bool, jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R0)); } static void compile_print_newline(jit_state_t *j, struct insn i) { UNUSED(i); jit_calli_1(j, putchar, jit_operand_imm(JIT_OPERAND_ABI_INT8, '\n')); } static void compile_print_space(jit_state_t *j, struct insn i) { UNUSED(i); jit_calli_1(j, putchar, jit_operand_imm(JIT_OPERAND_ABI_INT8, ' ')); } static void compile_label(jit_state_t *j, size_t ii, struct vec *labels) { /* add a mapping between instruction index (ii) and current address */ vect_at(jit_addr_t, *labels, ii) = jit_address(j); } struct reloc_helper { jit_reloc_t r; size_t to; }; static void compile_j(jit_state_t *j, struct insn i, struct vec *relocs) { jit_reloc_t r = jit_jmp(j); struct reloc_helper h = {.r = r, .to = i.v}; vect_append(struct reloc_helper, *relocs, &h); } static void compile_b(jit_state_t *j, struct insn i, struct vec *relocs) { get(j, JIT_R0, i.i0); jit_reloc_t r = jit_bnei(j, JIT_R0, 0); struct reloc_helper h = {.r = r, .to = i.v}; vect_append(struct reloc_helper, *relocs, &h); } static void compile_bz(jit_state_t *j, struct insn i, struct vec *relocs) { get(j, JIT_R0, i.i0); jit_reloc_t r = jit_beqi(j, JIT_R0, 0); struct reloc_helper h = {.r = r, .to = i.v}; vect_append(struct reloc_helper, *relocs, &h); } static void compile_arg(struct insn i, struct vec *params) { jit_operand_t operand; struct loc l = i.i0; /* Lightening allows us to specify memory locations as arguments to * function calls, and will automatically move the value from memory to * the correct register according to the ABI */ if (l.l) { operand = jit_operand_mem(JIT_OPERAND_ABI_INT64, JIT_SP, l.o * sizeof(int64_t)); } else { operand = jit_operand_mem(JIT_OPERAND_ABI_INT64, JIT_V0, l.o * sizeof(int64_t)); } vect_append(jit_operand_t, *params, &operand); } static void compile_call(jit_state_t *j, struct insn i, struct vec *params) { jit_operand_t gp = jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_V0); vect_append(jit_operand_t, *params, &gp); struct fn *f = find_fn(i.v); assert(f); /* if we haven't already compiled this function, do it now */ if (!f->arena) compile_fn(f); jit_calli(j, f->arena, vec_len(params), params->buf); /* argument instructions are only ever directly before a call * instruction, so there's no risk of messing up some other call's * args and we can avoid allocating a new vector */ vec_reset(params); } static void compile_retval(jit_state_t *j, struct insn i) { jit_retval_l(j, JIT_R0); put(j, JIT_R0, i.o); } static void compile_neg(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); jit_negr(j, JIT_R0, JIT_R0); put(j, JIT_R0, i.o); } static void compile_today(jit_state_t *j, struct insn i) { jit_calli_0(j, current_date); jit_retval_l(j, JIT_R0); put(j, JIT_R0, i.o); } /* lots of these date handling instructions I've implemented as helper functions, mostly * just for my own sanity */ static ph_date_t date_add(int64_t i0, int64_t i1) { struct tm time = tm_from_date((ph_date_t)i0); time.tm_mday += i1; mktime(&time); return date_from_tm(time); } static void compile_date_add(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); get(j, JIT_R1, i.i1); jit_calli_2(j, date_add, jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R0), jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R1)); jit_retval_l(j, JIT_R0); put(j, JIT_R0, i.o); } static ph_date_t date_sub(int64_t i0, int64_t i1) { struct tm time = tm_from_date((ph_date_t)i0); time.tm_mday -= i1; mktime(&time); return date_from_tm(time); } static void compile_date_sub(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); get(j, JIT_R1, i.i1); jit_calli_2(j, date_sub, jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R0), jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R1)); jit_retval_l(j, JIT_R0); put(j, JIT_R0, i.o); } static int64_t date_diff(int64_t i0, int64_t i1) { struct tm tm0 = tm_from_date((ph_date_t)i0); struct tm tm1 = tm_from_date((ph_date_t)i1); time_t t0 = mktime(&tm0); time_t t1 = mktime(&tm1); double seconds = difftime(t0, t1); return round(seconds / 86400); } static void compile_date_diff(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); get(j, JIT_R1, i.i1); jit_calli_2(j, date_diff, jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R0), jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R1)); jit_retval_l(j, JIT_R0); put(j, JIT_R0, i.o); } static int64_t load_year(int64_t i0) { unsigned year = 0; date_split((ph_date_t)i0, &year, NULL, NULL); return year; } static void compile_load_year(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); jit_calli_1(j, load_year, jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R0)); jit_retval_l(j, JIT_R0); put(j, JIT_R0, i.o); } static int64_t load_month(int64_t i0) { unsigned month = 0; date_split((ph_date_t)i0, NULL, &month, NULL); return month; } static void compile_load_month(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); jit_calli_1(j, load_month, jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R0)); jit_retval_l(j, JIT_R0); put(j, JIT_R0, i.o); } static int64_t load_day(int64_t i0) { unsigned day = 0; date_split((ph_date_t)i0, NULL, NULL, &day); return day; } static void compile_load_day(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); jit_calli_1(j, load_day, jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R0)); jit_retval_l(j, JIT_R0); put(j, JIT_R0, i.o); } static int64_t load_weekday(int64_t i0) { struct tm time = tm_from_date((ph_date_t)i0); return (int64_t)time.tm_wday; } static void compile_load_weekday(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); jit_calli_1(j, load_weekday, jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R0)); jit_retval_l(j, JIT_R0); put(j, JIT_R0, i.o); } static int64_t load_weeknum(int64_t i0) { struct tm time = tm_from_date((ph_date_t)i0); return time.tm_yday / 7; } static void compile_load_weeknum(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); jit_calli_1(j, load_weeknum, jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R0)); jit_retval_l(j, JIT_R0); put(j, JIT_R0, i.o); } static int64_t store_year(int64_t i0, int64_t i1) { unsigned month = 0; unsigned day = 0; date_split((ph_date_t)i0, NULL, &month, &day); return date_from_numbers(i1, month, day); } static void compile_store_year(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); get(j, JIT_R1, i.i1); jit_calli_2(j, store_year, jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R0), jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R1)); jit_retval_l(j, JIT_R0); put(j, JIT_R0, i.o); } static int64_t store_month(int64_t i0, int64_t i1) { unsigned year = 0; unsigned day = 0; date_split((ph_date_t)i0, &year, NULL, &day); return date_from_numbers(year, i1, day); } static void compile_store_month(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); get(j, JIT_R1, i.i1); jit_calli_2(j, store_month, jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R0), jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R1)); jit_retval_l(j, JIT_R0); put(j, JIT_R0, i.o); } static int64_t store_day(int64_t i0, int64_t i1) { unsigned year = 0; unsigned month = 0; date_split((ph_date_t)i0, &year, &month, NULL); return date_from_numbers(year, month, i1); } static void compile_store_day(jit_state_t *j, struct insn i) { get(j, JIT_R0, i.i0); get(j, JIT_R1, i.i1); jit_calli_2(j, store_day, jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R0), jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_R1)); jit_retval_l(j, JIT_R0); put(j, JIT_R0, i.o); } static size_t compile_fn_body(struct fn *f, jit_state_t *j) { jit_begin(j, f->arena, f->size); size_t frame = jit_enter_jit_abi(j, 1, 0, 0); /* easy off-by-one to miss, damn */ size_t stack = jit_align_stack(j, (f->max_sp + 1) * sizeof(int64_t)); /* move parameters to where they belong */ struct vec params = vec_create(sizeof(jit_operand_t)); for (size_t i = 0; i < f->params; ++i) { jit_operand_t operand = formal_at(i); vec_append(¶ms, &operand); } jit_operand_t gp = jit_operand_gpr(JIT_OPERAND_ABI_INT64, JIT_V0); vec_append(¶ms, &gp); jit_load_args(j, f->params + 1, params.buf); /* reuse vector as argument vector */ vec_reset(¶ms); struct vec relocs = vec_create(sizeof(struct reloc_helper)); struct vec labels = vec_create(sizeof(jit_addr_t)); vec_reserve(&labels, vec_len(&f->insns)); foreach_vec(ii, f->insns) { struct insn i = vect_at(struct insn, f->insns, ii); switch (i.k) { case LABEL: compile_label(j, ii, &labels); break; case J: compile_j(j, i, &relocs); break; case B: compile_b(j, i, &relocs); break; case BZ: compile_bz(j, i, &relocs); break; case ADD: compile_add(j, i); break; case SUB: compile_sub(j, i); break; case MUL: compile_mul(j, i); break; case DIV: compile_div(j, i); break; case MOVE: compile_move(j, i); break; case CONST: compile_const(j, i); break; case EQ: compile_eq(j, i); break; case LT: compile_lt(j, i); break; case PRINT_DATE: compile_print_date(j, i); break; case PRINT_INT: compile_print_int(j, i); break; case PRINT_STRING: compile_print_string(j, i); break; case PRINT_BOOL: compile_print_bool(j, i); break; case PRINT_NEWLINE: compile_print_newline(j, i); break; case PRINT_SPACE: compile_print_space(j, i); break; case ARG: compile_arg(i, ¶ms); break; case CALL: compile_call(j, i, ¶ms); break; case RETVAL: compile_retval(j, i); break; case NEG: compile_neg(j, i); break; case TODAY: compile_today(j, i); break; case DATE_ADD: compile_date_add(j, i); break; case DATE_SUB: compile_date_sub(j, i); break; case DATE_DIFF: compile_date_diff(j, i); break; case LOAD_YEAR: compile_load_year(j, i); break; case LOAD_MONTH: compile_load_month(j, i); break; case LOAD_DAY: compile_load_day(j, i); break; case LOAD_WEEKDAY: compile_load_weekday(j, i); break; case LOAD_WEEKNUM: compile_load_weeknum(j, i); break; case STORE_YEAR: compile_store_year(j, i); break; case STORE_MONTH: compile_store_month(j, i); break; case STORE_DAY: compile_store_day(j, i); break; case RET: { get(j, JIT_R0, i.i0); jit_shrink_stack(j, stack); jit_leave_jit_abi(j, 1, 0, frame); jit_retr(j, JIT_R0); break; } case STOP: { jit_shrink_stack(j, stack); jit_leave_jit_abi(j, 1, 0, frame); jit_ret(j); break; } } } /* fix relocs */ foreach_vec(ri, relocs) { struct reloc_helper h = vect_at(struct reloc_helper, relocs, ri); jit_addr_t a = vect_at(jit_addr_t, labels, h.to); jit_reloc_t r = h.r; assert(a); jit_patch_there(j, r, a); } vec_destroy(&relocs); vec_destroy(&labels); vec_destroy(¶ms); size_t size = 0; void *p = jit_end(j, &size); /* if we had enough room to finish compilation, return 0 to signify that * we don't have to try to compile anything again */ if (p) return 0; /* otherwise, return how many bytes lightening estimates that we would * need to succesfully compile this function */ return size; } static void compile_fn(struct fn *f) { init_jit(); jit_state_t *j = jit_new_state(NULL, NULL); assert(j); void *arena_base = NULL; size_t arena_size = 4096; /* technically there should probably be a limit to how many times we * attempt compilation, but in practice we're unlikely to ever need it. */ while (1) { arena_base = alloc_arena(arena_size); assert(arena_base && "failed allocating executable arena, aborting"); f->arena = arena_base; f->size = arena_size; size_t required_size = compile_fn_body(f, j); if (required_size == 0) break; free_arena(arena_base, arena_size); /* give at least one page more than necessary to be on the safe * side */ arena_size = required_size + 4096; } jit_destroy_state(j); } void compile() { struct fn *f = find_fn(0); compile_fn(f); }