add some comments here and there

author: Kimplul <kimi.h.kuparinen@gmail.com> 2024-04-26 20:09:13 +0300
committer: Kimplul <kimi.h.kuparinen@gmail.com> 2024-04-26 20:09:13 +0300
commit: 930e0f29ff5637f38a878d2e61352b151698a0ea (patch)
tree: 8d3110cf68ce216062987556b6e3189bf4be32f3 /src
parent: 6bc52441237fe182c05703d3a7e1d93d208ce5e7 (diff)
download: posthaste-930e0f29ff5637f38a878d2e61352b151698a0ea.tar.gz
posthaste-930e0f29ff5637f38a878d2e61352b151698a0ea.zip
2 files changed, 100 insertions, 0 deletions
diff --git a/src/compile.c b/src/compile.c
index dd4c73d..e5f232b 100644
--- a/src/compile.c
+++ b/src/compile.c
@@ -9,6 +9,43 @@
 
 #include <lightening/lightening.h>
 
+/* we have a couple assumptions running through this code:
+ * 1) For each branch/jump, there's always a corresponding label
+ * 2) The global value array is in the virtual register JIT_V0
+ * 3) At the start of a function, we immediately allocate f->max_sp slots, and
+ * slot indexes are relative to JIT_SP
+ *
+ * Lightening is a very cool library that provides a kind of virtual assembler,
+ * as in there's a number of virtual registers that map to physical registers
+ * and each virtual instruction maps to some number of physical instructions.
+ * This means that the user is required to handle register allocations, (virtual)
+ * code generation and possible optimizations, but with some clever tricks we can
+ * pretty much ignore these limitations completely.
+ *
+ * Each bytecode instruction gets compiled down to some number of virtual
+ * Lightening instructions that perform the same task 'as if' the instruction was
+ * being interpreted. This means each operation first loads its arguments from
+ * memory to virtual registers, does whatever with them, and stores the result into
+ * some memory location. This is pretty inefficient but very easy to implement, and
+ * even a poor JIT is almost always faster than the best bytecode.
+ *
+ * As for register allocations, as long as we ensure that each bytecode instruction
+ * effectively 'frees' the registers it used once it's been executed, we don't
+ * need to worry about it. The values are safely stored in memory where the
+ * input/output locs can fetch them later, and there are no register-register
+ * dependencies between bytecode instructions.
+ *
+ * Each operation uses at most two caller-save registers to perform its operation.
+ * The only callee-save register we use is JIT_V0 for the global array, so
+ * we can ask Lightening to not save the other callee-save registers, saving a bit of
+ * overhead when entering/exiting functions.
+ *
+ * There's no particularly good way to view the generated machine code, as it would
+ * require adding something like binutils' BFD library as a dependency. You can
+ * always just dump each code arena to a file and look at it later, but I
+ * haven't implemented it.
+ */
+
 static void compile_fn(struct fn *f);
 
 static void *alloc_arena(size_t size)
@@ -29,6 +66,7 @@ static jit_operand_t formal_at(size_t i)
 	                       i * sizeof(int64_t));
 }
 
+/* load value from global array/stack location l and place it into virtual register r */
 static void get(jit_state_t *j, jit_gpr_t r, struct loc l)
 {
 	if (l.l) {
@@ -39,6 +77,7 @@ static void get(jit_state_t *j, jit_gpr_t r, struct loc l)
 	jit_ldxi_l(j, r, JIT_V0, l.o * sizeof(int64_t));
 }
 
+/* store value from virtual register r to global array/stack location l*/
 static void put(jit_state_t *j, jit_gpr_t r, struct loc l)
 {
 	if (l.l) {
@@ -95,6 +134,8 @@ static void compile_const(jit_state_t *j, struct insn i)
 
 static void compile_eq(jit_state_t *j, struct insn i)
 {
+	/* Lightening doesn't really have any register-register comparison
+	 * instructions, so implement them as local branches */
 	get(j, JIT_R0, i.i0);
 	get(j, JIT_R1, i.i1);
 	jit_reloc_t branch = jit_beqr(j, JIT_R0, JIT_R1);
@@ -125,6 +166,8 @@ static void compile_lt(jit_state_t *j, struct insn i)
 	put(j, JIT_R0, i.o);
 }
 
+/* helper function for compiling PRINT_DATE, Lightening doesn't handle variadics
+ * so do the heavy lifting with GCC (or Clang or whatever) */
 static void print_date(int64_t date)
 {
 	char str[11];
@@ -189,6 +232,7 @@ static void compile_print_space(jit_state_t *j, struct insn i)
 
 static void compile_label(jit_state_t *j, size_t ii, struct vec *labels)
 {
+	/* add a mapping between instruction index (ii) and current address */
 	vect_at(jit_addr_t, *labels, ii) = jit_address(j);
 }
 
@@ -224,6 +268,9 @@ static void compile_arg(struct insn i, struct vec *params)
 {
 	jit_operand_t operand;
 	struct loc l = i.i0;
+	/* Lightening allows us to specify memory locations as arguments to
+	 * function calls, and will automatically move the value from memory to
+	 * the correct register according to the ABI */
 	if (l.l) {
 		operand = jit_operand_mem(JIT_OPERAND_ABI_INT64, JIT_SP,
 		                          l.o * sizeof(int64_t));
@@ -244,10 +291,14 @@ static void compile_call(jit_state_t *j, struct insn i, struct vec *params)
 	struct fn *f = find_fn(i.v);
 	assert(f);
 
+	/* if we haven't already compiled this function, do it now */
 	if (!f->arena)
 		compile_fn(f);
 
 	jit_calli(j, f->arena, vec_len(params), params->buf);
+	/* argument instructions are only ever directly before a call
+	 * instruction, so there's no risk of messing up some other call's
+	 * args and we can avoid allocating a new vector */
 	vec_reset(params);
 }
 
@@ -271,6 +322,8 @@ static void compile_today(jit_state_t *j, struct insn i)
 	put(j, JIT_R0, i.o);
 }
 
+/* lots of these date handling instructions I've implemented as helper functions, mostly
+ * just for my own sanity */
 static ph_date_t date_add(int64_t i0, int64_t i1)
 {
 	struct tm time = tm_from_date((ph_date_t)i0);
@@ -585,9 +638,13 @@ static size_t compile_fn_body(struct fn *f, jit_state_t *j)
 
 	size_t size = 0;
 	void *p = jit_end(j, &size);
+	/* if we had enough room to finish compilation, return 0 to signify that
+	 * we don't have to try to compile anything again */
 	if (p)
 		return 0;
 
+	/* otherwise, return how many bytes lightening estimates that we would
+	 * need to succesfully compile this function */
 	return size;
 }
 
@@ -599,6 +656,9 @@ static void compile_fn(struct fn *f)
 
 	void *arena_base = NULL;
 	size_t arena_size = 4096;
+	/* technically there should probably be a limit to how many times we
+	 * attempt compilation, but in practice we're unlikely to ever need it.
+	 */
 	while (1) {
 		arena_base = alloc_arena(arena_size);
 		assert(arena_base &&
diff --git a/src/lower.c b/src/lower.c
index 0a6c07d..7c5fdbd 100644
--- a/src/lower.c
+++ b/src/lower.c
@@ -6,6 +6,27 @@
 #include <posthaste/scope.h>
 #include <posthaste/utils.h>
 
+/* The bytecode is similar in construction to Lua's, as in there's two arrays, one
+ * for global values and one for local values. The local value array is referred
+ * to as the stack. In theory each function/procedure could get its own stack,
+ * but in this case there's just a single global stack, and each call moves a
+ * stack pointer up to signify entering a new function. Procedure/function
+ * formal parameters are the first things on the stack.
+ *
+ * All instructions are of the three-value kinds, i.e. an instruction has an
+ * opcode, an output location and two input locations, not all of which have to
+ * be used. There's a special null_loc() to signify an unused location.
+ * Locations are effectively just indexes into either the global or local
+ * array. I've also added a separate CONST instruction that pushes a constant
+ * value to a specified output location.
+ *
+ * Individual instructions are very wide, which decreases cache performance and
+ * makes this particular implementation fairly slow to interpret, but the extra
+ * width means it's a little bit easier to work due to not having to
+ * compress/decompress anything. In particular, I tried to make JIT compilation
+ * as easy as possible, at expense of interpretation speed.
+ */
+
 /* globals are kind of ugly and could/should be made into parameters (or
  * potentially even members of some all-encompassing state) but this works for
  * this simple implementation, implementing the change would mosty just require
@@ -102,8 +123,16 @@ static void lower_add(struct fn *f, struct ast *n)
 	struct ast *l = add_l(n);
 	struct ast *r = add_r(n);
 
+	/* since variable definitions can't appear in expressions, there's no
+	 * danger of some variable claiming a stack location above this point and
+	 * it being overwritten by accident. If variable definitions were
+	 * expressions, we would probably have to do an extra pass before
+	 * lowering that pre-assigns locations to all variables */
 	f->sp += 1; lower(f, l);
 	f->sp += 1; lower(f, r);
+	/* technically we could reuse the stack location for l, but I found that
+	 * reading the generated instruction sequences was a bit easier with
+	 * separate output and input stack locations */
 	f->sp -= 2;
 
 	output_insn(f, ADD, n->l, l->l, r->l, 0);
@@ -194,6 +223,8 @@ static void lower_id(struct fn *f, struct ast *n)
 	struct ast *exists = file_scope_find(n->scope, n->id);
 	assert(exists);
 
+	/* using ast nodes/scope lookup as convenient way to store variable ->
+	 * location mappings */
 	n->l = exists->l;
 }
 
@@ -253,6 +284,8 @@ static void lower_proc_call(struct fn *f, struct ast *c)
 {
 	size_t count = 0;
 	foreach_node(n, proc_call_args(c)) {
+		/* place each argument in its own stack location to await being
+		 * used */
 		f->sp += 1; lower(f, n); count += 1;
 	}
 
@@ -429,6 +462,7 @@ static void lower_until(struct fn *f, struct ast *n)
 
 static void patch_branch(struct fn *f, size_t branch, size_t off)
 {
+	/* the patch destination must always be a label, required by the JIT */
 	assert((vect_at(struct insn, f->insns, off)).k == LABEL);
 
 	struct insn i = vect_at(struct insn, f->insns, branch);
@@ -510,6 +544,10 @@ static void lower_builtin_call(struct fn *f, struct ast *n)
 
 static void lower(struct fn *f, struct ast *n)
 {
+	/* technically it would be possible for some lowering to use stack space
+	 * without calling lower(), probably causing difficult to debug runtime weirdness.
+	 * Not currently an issue, but something that would need to be taken
+	 * into account if this instruction set was extended at some point. */
 	if (f->max_sp < f->sp)
 		f->max_sp = f->sp;
 
@@ -552,6 +590,8 @@ static void lower(struct fn *f, struct ast *n)
 	case AST_DOT: break;
 	}
 
+	/* each ast node is assigned some location, regardless of if it actually
+	 * needs one as a sanity check */
 	assert(loc_as_int(n->l) > 0);
 }
author	Kimplul <kimi.h.kuparinen@gmail.com>	2024-04-26 20:09:13 +0300
committer	Kimplul <kimi.h.kuparinen@gmail.com>	2024-04-26 20:09:13 +0300
commit	930e0f29ff5637f38a878d2e61352b151698a0ea (patch)
tree	8d3110cf68ce216062987556b6e3189bf4be32f3 /src
parent	6bc52441237fe182c05703d3a7e1d93d208ce5e7 (diff)
download	posthaste-930e0f29ff5637f38a878d2e61352b151698a0ea.tar.gz posthaste-930e0f29ff5637f38a878d2e61352b151698a0ea.zip