diff options
| author | Kimplul <kimi.h.kuparinen@gmail.com> | 2024-04-26 20:09:13 +0300 | 
|---|---|---|
| committer | Kimplul <kimi.h.kuparinen@gmail.com> | 2024-04-26 20:09:13 +0300 | 
| commit | 930e0f29ff5637f38a878d2e61352b151698a0ea (patch) | |
| tree | 8d3110cf68ce216062987556b6e3189bf4be32f3 /src | |
| parent | 6bc52441237fe182c05703d3a7e1d93d208ce5e7 (diff) | |
| download | posthaste-930e0f29ff5637f38a878d2e61352b151698a0ea.tar.gz posthaste-930e0f29ff5637f38a878d2e61352b151698a0ea.zip | |
add some comments here and there
Diffstat (limited to 'src')
| -rw-r--r-- | src/compile.c | 60 | ||||
| -rw-r--r-- | src/lower.c | 40 | 
2 files changed, 100 insertions, 0 deletions
| diff --git a/src/compile.c b/src/compile.c index dd4c73d..e5f232b 100644 --- a/src/compile.c +++ b/src/compile.c @@ -9,6 +9,43 @@  #include <lightening/lightening.h> +/* we have a couple assumptions running through this code: + * 1) For each branch/jump, there's always a corresponding label + * 2) The global value array is in the virtual register JIT_V0 + * 3) At the start of a function, we immediately allocate f->max_sp slots, and + * slot indexes are relative to JIT_SP + * + * Lightening is a very cool library that provides a kind of virtual assembler, + * as in there's a number of virtual registers that map to physical registers + * and each virtual instruction maps to some number of physical instructions. + * This means that the user is required to handle register allocations, (virtual) + * code generation and possible optimizations, but with some clever tricks we can + * pretty much ignore these limitations completely. + * + * Each bytecode instruction gets compiled down to some number of virtual + * Lightening instructions that perform the same task 'as if' the instruction was + * being interpreted. This means each operation first loads its arguments from + * memory to virtual registers, does whatever with them, and stores the result into + * some memory location. This is pretty inefficient but very easy to implement, and + * even a poor JIT is almost always faster than the best bytecode. + * + * As for register allocations, as long as we ensure that each bytecode instruction + * effectively 'frees' the registers it used once it's been executed, we don't + * need to worry about it. The values are safely stored in memory where the + * input/output locs can fetch them later, and there are no register-register + * dependencies between bytecode instructions. + * + * Each operation uses at most two caller-save registers to perform its operation. + * The only callee-save register we use is JIT_V0 for the global array, so + * we can ask Lightening to not save the other callee-save registers, saving a bit of + * overhead when entering/exiting functions. + * + * There's no particularly good way to view the generated machine code, as it would + * require adding something like binutils' BFD library as a dependency. You can + * always just dump each code arena to a file and look at it later, but I + * haven't implemented it. + */ +  static void compile_fn(struct fn *f);  static void *alloc_arena(size_t size) @@ -29,6 +66,7 @@ static jit_operand_t formal_at(size_t i)  	                       i * sizeof(int64_t));  } +/* load value from global array/stack location l and place it into virtual register r */  static void get(jit_state_t *j, jit_gpr_t r, struct loc l)  {  	if (l.l) { @@ -39,6 +77,7 @@ static void get(jit_state_t *j, jit_gpr_t r, struct loc l)  	jit_ldxi_l(j, r, JIT_V0, l.o * sizeof(int64_t));  } +/* store value from virtual register r to global array/stack location l*/  static void put(jit_state_t *j, jit_gpr_t r, struct loc l)  {  	if (l.l) { @@ -95,6 +134,8 @@ static void compile_const(jit_state_t *j, struct insn i)  static void compile_eq(jit_state_t *j, struct insn i)  { +	/* Lightening doesn't really have any register-register comparison +	 * instructions, so implement them as local branches */  	get(j, JIT_R0, i.i0);  	get(j, JIT_R1, i.i1);  	jit_reloc_t branch = jit_beqr(j, JIT_R0, JIT_R1); @@ -125,6 +166,8 @@ static void compile_lt(jit_state_t *j, struct insn i)  	put(j, JIT_R0, i.o);  } +/* helper function for compiling PRINT_DATE, Lightening doesn't handle variadics + * so do the heavy lifting with GCC (or Clang or whatever) */  static void print_date(int64_t date)  {  	char str[11]; @@ -189,6 +232,7 @@ static void compile_print_space(jit_state_t *j, struct insn i)  static void compile_label(jit_state_t *j, size_t ii, struct vec *labels)  { +	/* add a mapping between instruction index (ii) and current address */  	vect_at(jit_addr_t, *labels, ii) = jit_address(j);  } @@ -224,6 +268,9 @@ static void compile_arg(struct insn i, struct vec *params)  {  	jit_operand_t operand;  	struct loc l = i.i0; +	/* Lightening allows us to specify memory locations as arguments to +	 * function calls, and will automatically move the value from memory to +	 * the correct register according to the ABI */  	if (l.l) {  		operand = jit_operand_mem(JIT_OPERAND_ABI_INT64, JIT_SP,  		                          l.o * sizeof(int64_t)); @@ -244,10 +291,14 @@ static void compile_call(jit_state_t *j, struct insn i, struct vec *params)  	struct fn *f = find_fn(i.v);  	assert(f); +	/* if we haven't already compiled this function, do it now */  	if (!f->arena)  		compile_fn(f);  	jit_calli(j, f->arena, vec_len(params), params->buf); +	/* argument instructions are only ever directly before a call +	 * instruction, so there's no risk of messing up some other call's +	 * args and we can avoid allocating a new vector */  	vec_reset(params);  } @@ -271,6 +322,8 @@ static void compile_today(jit_state_t *j, struct insn i)  	put(j, JIT_R0, i.o);  } +/* lots of these date handling instructions I've implemented as helper functions, mostly + * just for my own sanity */  static ph_date_t date_add(int64_t i0, int64_t i1)  {  	struct tm time = tm_from_date((ph_date_t)i0); @@ -585,9 +638,13 @@ static size_t compile_fn_body(struct fn *f, jit_state_t *j)  	size_t size = 0;  	void *p = jit_end(j, &size); +	/* if we had enough room to finish compilation, return 0 to signify that +	 * we don't have to try to compile anything again */  	if (p)  		return 0; +	/* otherwise, return how many bytes lightening estimates that we would +	 * need to succesfully compile this function */  	return size;  } @@ -599,6 +656,9 @@ static void compile_fn(struct fn *f)  	void *arena_base = NULL;  	size_t arena_size = 4096; +	/* technically there should probably be a limit to how many times we +	 * attempt compilation, but in practice we're unlikely to ever need it. +	 */  	while (1) {  		arena_base = alloc_arena(arena_size);  		assert(arena_base && diff --git a/src/lower.c b/src/lower.c index 0a6c07d..7c5fdbd 100644 --- a/src/lower.c +++ b/src/lower.c @@ -6,6 +6,27 @@  #include <posthaste/scope.h>  #include <posthaste/utils.h> +/* The bytecode is similar in construction to Lua's, as in there's two arrays, one + * for global values and one for local values. The local value array is referred + * to as the stack. In theory each function/procedure could get its own stack, + * but in this case there's just a single global stack, and each call moves a + * stack pointer up to signify entering a new function. Procedure/function + * formal parameters are the first things on the stack. + * + * All instructions are of the three-value kinds, i.e. an instruction has an + * opcode, an output location and two input locations, not all of which have to + * be used. There's a special null_loc() to signify an unused location. + * Locations are effectively just indexes into either the global or local + * array. I've also added a separate CONST instruction that pushes a constant + * value to a specified output location. + * + * Individual instructions are very wide, which decreases cache performance and + * makes this particular implementation fairly slow to interpret, but the extra + * width means it's a little bit easier to work due to not having to + * compress/decompress anything. In particular, I tried to make JIT compilation + * as easy as possible, at expense of interpretation speed. + */ +  /* globals are kind of ugly and could/should be made into parameters (or   * potentially even members of some all-encompassing state) but this works for   * this simple implementation, implementing the change would mosty just require @@ -102,8 +123,16 @@ static void lower_add(struct fn *f, struct ast *n)  	struct ast *l = add_l(n);  	struct ast *r = add_r(n); +	/* since variable definitions can't appear in expressions, there's no +	 * danger of some variable claiming a stack location above this point and +	 * it being overwritten by accident. If variable definitions were +	 * expressions, we would probably have to do an extra pass before +	 * lowering that pre-assigns locations to all variables */  	f->sp += 1; lower(f, l);  	f->sp += 1; lower(f, r); +	/* technically we could reuse the stack location for l, but I found that +	 * reading the generated instruction sequences was a bit easier with +	 * separate output and input stack locations */  	f->sp -= 2;  	output_insn(f, ADD, n->l, l->l, r->l, 0); @@ -194,6 +223,8 @@ static void lower_id(struct fn *f, struct ast *n)  	struct ast *exists = file_scope_find(n->scope, n->id);  	assert(exists); +	/* using ast nodes/scope lookup as convenient way to store variable -> +	 * location mappings */  	n->l = exists->l;  } @@ -253,6 +284,8 @@ static void lower_proc_call(struct fn *f, struct ast *c)  {  	size_t count = 0;  	foreach_node(n, proc_call_args(c)) { +		/* place each argument in its own stack location to await being +		 * used */  		f->sp += 1; lower(f, n); count += 1;  	} @@ -429,6 +462,7 @@ static void lower_until(struct fn *f, struct ast *n)  static void patch_branch(struct fn *f, size_t branch, size_t off)  { +	/* the patch destination must always be a label, required by the JIT */  	assert((vect_at(struct insn, f->insns, off)).k == LABEL);  	struct insn i = vect_at(struct insn, f->insns, branch); @@ -510,6 +544,10 @@ static void lower_builtin_call(struct fn *f, struct ast *n)  static void lower(struct fn *f, struct ast *n)  { +	/* technically it would be possible for some lowering to use stack space +	 * without calling lower(), probably causing difficult to debug runtime weirdness. +	 * Not currently an issue, but something that would need to be taken +	 * into account if this instruction set was extended at some point. */  	if (f->max_sp < f->sp)  		f->max_sp = f->sp; @@ -552,6 +590,8 @@ static void lower(struct fn *f, struct ast *n)  	case AST_DOT: break;  	} +	/* each ast node is assigned some location, regardless of if it actually +	 * needs one as a sanity check */  	assert(loc_as_int(n->l) > 0);  } | 
