aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKimplul <kimi.h.kuparinen@gmail.com>2024-04-26 20:09:13 +0300
committerKimplul <kimi.h.kuparinen@gmail.com>2024-04-26 20:09:13 +0300
commit930e0f29ff5637f38a878d2e61352b151698a0ea (patch)
tree8d3110cf68ce216062987556b6e3189bf4be32f3
parent6bc52441237fe182c05703d3a7e1d93d208ce5e7 (diff)
downloadposthaste-930e0f29ff5637f38a878d2e61352b151698a0ea.tar.gz
posthaste-930e0f29ff5637f38a878d2e61352b151698a0ea.zip
add some comments here and there
-rw-r--r--src/compile.c60
-rw-r--r--src/lower.c40
2 files changed, 100 insertions, 0 deletions
diff --git a/src/compile.c b/src/compile.c
index dd4c73d..e5f232b 100644
--- a/src/compile.c
+++ b/src/compile.c
@@ -9,6 +9,43 @@
#include <lightening/lightening.h>
+/* we have a couple assumptions running through this code:
+ * 1) For each branch/jump, there's always a corresponding label
+ * 2) The global value array is in the virtual register JIT_V0
+ * 3) At the start of a function, we immediately allocate f->max_sp slots, and
+ * slot indexes are relative to JIT_SP
+ *
+ * Lightening is a very cool library that provides a kind of virtual assembler,
+ * as in there's a number of virtual registers that map to physical registers
+ * and each virtual instruction maps to some number of physical instructions.
+ * This means that the user is required to handle register allocations, (virtual)
+ * code generation and possible optimizations, but with some clever tricks we can
+ * pretty much ignore these limitations completely.
+ *
+ * Each bytecode instruction gets compiled down to some number of virtual
+ * Lightening instructions that perform the same task 'as if' the instruction was
+ * being interpreted. This means each operation first loads its arguments from
+ * memory to virtual registers, does whatever with them, and stores the result into
+ * some memory location. This is pretty inefficient but very easy to implement, and
+ * even a poor JIT is almost always faster than the best bytecode.
+ *
+ * As for register allocations, as long as we ensure that each bytecode instruction
+ * effectively 'frees' the registers it used once it's been executed, we don't
+ * need to worry about it. The values are safely stored in memory where the
+ * input/output locs can fetch them later, and there are no register-register
+ * dependencies between bytecode instructions.
+ *
+ * Each operation uses at most two caller-save registers to perform its operation.
+ * The only callee-save register we use is JIT_V0 for the global array, so
+ * we can ask Lightening to not save the other callee-save registers, saving a bit of
+ * overhead when entering/exiting functions.
+ *
+ * There's no particularly good way to view the generated machine code, as it would
+ * require adding something like binutils' BFD library as a dependency. You can
+ * always just dump each code arena to a file and look at it later, but I
+ * haven't implemented it.
+ */
+
static void compile_fn(struct fn *f);
static void *alloc_arena(size_t size)
@@ -29,6 +66,7 @@ static jit_operand_t formal_at(size_t i)
i * sizeof(int64_t));
}
+/* load value from global array/stack location l and place it into virtual register r */
static void get(jit_state_t *j, jit_gpr_t r, struct loc l)
{
if (l.l) {
@@ -39,6 +77,7 @@ static void get(jit_state_t *j, jit_gpr_t r, struct loc l)
jit_ldxi_l(j, r, JIT_V0, l.o * sizeof(int64_t));
}
+/* store value from virtual register r to global array/stack location l*/
static void put(jit_state_t *j, jit_gpr_t r, struct loc l)
{
if (l.l) {
@@ -95,6 +134,8 @@ static void compile_const(jit_state_t *j, struct insn i)
static void compile_eq(jit_state_t *j, struct insn i)
{
+ /* Lightening doesn't really have any register-register comparison
+ * instructions, so implement them as local branches */
get(j, JIT_R0, i.i0);
get(j, JIT_R1, i.i1);
jit_reloc_t branch = jit_beqr(j, JIT_R0, JIT_R1);
@@ -125,6 +166,8 @@ static void compile_lt(jit_state_t *j, struct insn i)
put(j, JIT_R0, i.o);
}
+/* helper function for compiling PRINT_DATE, Lightening doesn't handle variadics
+ * so do the heavy lifting with GCC (or Clang or whatever) */
static void print_date(int64_t date)
{
char str[11];
@@ -189,6 +232,7 @@ static void compile_print_space(jit_state_t *j, struct insn i)
static void compile_label(jit_state_t *j, size_t ii, struct vec *labels)
{
+ /* add a mapping between instruction index (ii) and current address */
vect_at(jit_addr_t, *labels, ii) = jit_address(j);
}
@@ -224,6 +268,9 @@ static void compile_arg(struct insn i, struct vec *params)
{
jit_operand_t operand;
struct loc l = i.i0;
+ /* Lightening allows us to specify memory locations as arguments to
+ * function calls, and will automatically move the value from memory to
+ * the correct register according to the ABI */
if (l.l) {
operand = jit_operand_mem(JIT_OPERAND_ABI_INT64, JIT_SP,
l.o * sizeof(int64_t));
@@ -244,10 +291,14 @@ static void compile_call(jit_state_t *j, struct insn i, struct vec *params)
struct fn *f = find_fn(i.v);
assert(f);
+ /* if we haven't already compiled this function, do it now */
if (!f->arena)
compile_fn(f);
jit_calli(j, f->arena, vec_len(params), params->buf);
+ /* argument instructions are only ever directly before a call
+ * instruction, so there's no risk of messing up some other call's
+ * args and we can avoid allocating a new vector */
vec_reset(params);
}
@@ -271,6 +322,8 @@ static void compile_today(jit_state_t *j, struct insn i)
put(j, JIT_R0, i.o);
}
+/* lots of these date handling instructions I've implemented as helper functions, mostly
+ * just for my own sanity */
static ph_date_t date_add(int64_t i0, int64_t i1)
{
struct tm time = tm_from_date((ph_date_t)i0);
@@ -585,9 +638,13 @@ static size_t compile_fn_body(struct fn *f, jit_state_t *j)
size_t size = 0;
void *p = jit_end(j, &size);
+ /* if we had enough room to finish compilation, return 0 to signify that
+ * we don't have to try to compile anything again */
if (p)
return 0;
+ /* otherwise, return how many bytes lightening estimates that we would
+ * need to succesfully compile this function */
return size;
}
@@ -599,6 +656,9 @@ static void compile_fn(struct fn *f)
void *arena_base = NULL;
size_t arena_size = 4096;
+ /* technically there should probably be a limit to how many times we
+ * attempt compilation, but in practice we're unlikely to ever need it.
+ */
while (1) {
arena_base = alloc_arena(arena_size);
assert(arena_base &&
diff --git a/src/lower.c b/src/lower.c
index 0a6c07d..7c5fdbd 100644
--- a/src/lower.c
+++ b/src/lower.c
@@ -6,6 +6,27 @@
#include <posthaste/scope.h>
#include <posthaste/utils.h>
+/* The bytecode is similar in construction to Lua's, as in there's two arrays, one
+ * for global values and one for local values. The local value array is referred
+ * to as the stack. In theory each function/procedure could get its own stack,
+ * but in this case there's just a single global stack, and each call moves a
+ * stack pointer up to signify entering a new function. Procedure/function
+ * formal parameters are the first things on the stack.
+ *
+ * All instructions are of the three-value kinds, i.e. an instruction has an
+ * opcode, an output location and two input locations, not all of which have to
+ * be used. There's a special null_loc() to signify an unused location.
+ * Locations are effectively just indexes into either the global or local
+ * array. I've also added a separate CONST instruction that pushes a constant
+ * value to a specified output location.
+ *
+ * Individual instructions are very wide, which decreases cache performance and
+ * makes this particular implementation fairly slow to interpret, but the extra
+ * width means it's a little bit easier to work due to not having to
+ * compress/decompress anything. In particular, I tried to make JIT compilation
+ * as easy as possible, at expense of interpretation speed.
+ */
+
/* globals are kind of ugly and could/should be made into parameters (or
* potentially even members of some all-encompassing state) but this works for
* this simple implementation, implementing the change would mosty just require
@@ -102,8 +123,16 @@ static void lower_add(struct fn *f, struct ast *n)
struct ast *l = add_l(n);
struct ast *r = add_r(n);
+ /* since variable definitions can't appear in expressions, there's no
+ * danger of some variable claiming a stack location above this point and
+ * it being overwritten by accident. If variable definitions were
+ * expressions, we would probably have to do an extra pass before
+ * lowering that pre-assigns locations to all variables */
f->sp += 1; lower(f, l);
f->sp += 1; lower(f, r);
+ /* technically we could reuse the stack location for l, but I found that
+ * reading the generated instruction sequences was a bit easier with
+ * separate output and input stack locations */
f->sp -= 2;
output_insn(f, ADD, n->l, l->l, r->l, 0);
@@ -194,6 +223,8 @@ static void lower_id(struct fn *f, struct ast *n)
struct ast *exists = file_scope_find(n->scope, n->id);
assert(exists);
+ /* using ast nodes/scope lookup as convenient way to store variable ->
+ * location mappings */
n->l = exists->l;
}
@@ -253,6 +284,8 @@ static void lower_proc_call(struct fn *f, struct ast *c)
{
size_t count = 0;
foreach_node(n, proc_call_args(c)) {
+ /* place each argument in its own stack location to await being
+ * used */
f->sp += 1; lower(f, n); count += 1;
}
@@ -429,6 +462,7 @@ static void lower_until(struct fn *f, struct ast *n)
static void patch_branch(struct fn *f, size_t branch, size_t off)
{
+ /* the patch destination must always be a label, required by the JIT */
assert((vect_at(struct insn, f->insns, off)).k == LABEL);
struct insn i = vect_at(struct insn, f->insns, branch);
@@ -510,6 +544,10 @@ static void lower_builtin_call(struct fn *f, struct ast *n)
static void lower(struct fn *f, struct ast *n)
{
+ /* technically it would be possible for some lowering to use stack space
+ * without calling lower(), probably causing difficult to debug runtime weirdness.
+ * Not currently an issue, but something that would need to be taken
+ * into account if this instruction set was extended at some point. */
if (f->max_sp < f->sp)
f->max_sp = f->sp;
@@ -552,6 +590,8 @@ static void lower(struct fn *f, struct ast *n)
case AST_DOT: break;
}
+ /* each ast node is assigned some location, regardless of if it actually
+ * needs one as a sanity check */
assert(loc_as_int(n->l) > 0);
}