From 1cc7990ef7d5483d0434dda412f2d88e0b17df27 Mon Sep 17 00:00:00 2001 From: Kimplul Date: Sat, 20 Apr 2024 03:39:40 +0300 Subject: initial working bytecode --- src/ast.c | 225 +++++++++++++++++ src/check.c | 767 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/core.c | 60 ++++- src/date.c | 30 +++ src/execute.c | 367 ++++++++++++++++++++++++++++ src/lexer.l | 10 +- src/lower.c | 730 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/parser.y | 239 +++++++++++------- src/scope.c | 153 ++++++++++++ src/vec.c | 75 ++++++ 10 files changed, 2555 insertions(+), 101 deletions(-) create mode 100644 src/ast.c create mode 100644 src/check.c create mode 100644 src/execute.c create mode 100644 src/lower.c create mode 100644 src/scope.c create mode 100644 src/vec.c (limited to 'src') diff --git a/src/ast.c b/src/ast.c new file mode 100644 index 0000000..3860c97 --- /dev/null +++ b/src/ast.c @@ -0,0 +1,225 @@ +#include +#include +#include +#include +#include +#include + +#include +#include + +static struct vec nodes = {0}; + +static void destroy_ast_node(struct ast *n) +{ + if (!n) + return; + + if (n->id) + free(n->id); + + if (n->type) + free(n->type); + + free(n); +} + +void destroy_ast_nodes() +{ + foreach_vec(ni, nodes) { + struct ast *n = vect_at(struct ast *, nodes, ni); + destroy_ast_node(n); + } + + vec_destroy(&nodes); +} + +static struct ast *create_empty_ast() +{ + if (vec_uninit(nodes)) { + nodes = vec_create(sizeof(struct ast *)); + } + + struct ast *n = calloc(1, sizeof(struct ast)); + vect_append(struct ast *, nodes, &n); + return n; +} + +struct ast *gen_ast(enum ast_kind kind, + struct ast *a0, + struct ast *a1, + struct ast *a2, + char *id, + char *type, + int64_t v, + struct src_loc loc) +{ + struct ast *n = create_empty_ast(); + n->k = kind; + n->a0 = a0; + n->a1 = a1; + n->a2 = a2; + n->id = id; + n->type = type; + n->v = v; + n->loc = loc; + return n; +} + +static void dump(int depth, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + printf("//"); + for (int i = 0; i < depth; ++i) + printf(" "); + + vprintf(fmt, args); + + va_end(args); +} + +void type_dump(enum type_kind t) +{ + switch (t) { + case TYPE_DATE: printf("TYPE_DATE"); break; + case TYPE_INT: printf("TYPE_INT"); break; + default: printf("NO TYPE"); break; + } +} + +void ast_dump(int depth, struct ast *n) +{ + if (!n) { + dump(depth, "{NULL}\n"); + return; + } + +#define DUMP(x) case x: dump(depth, #x); break; + switch (n->k) { + DUMP(AST_BUILTIN_CALL); + DUMP(AST_PRINT_STRING); + DUMP(AST_PRINT_BOOL); + DUMP(AST_PRINT_DATE); + DUMP(AST_PRINT_INT); + DUMP(AST_CONST_INT); + DUMP(AST_CONST_DATE); + DUMP(AST_CONST_STRING); + DUMP(AST_DATE_ADD); + DUMP(AST_DATE_SUB); + DUMP(AST_DATE_DIFF); + DUMP(AST_EQ); + DUMP(AST_LT); + DUMP(AST_ADD); + DUMP(AST_SUB); + DUMP(AST_MUL); + DUMP(AST_DIV); + DUMP(AST_POS); + DUMP(AST_NEG); + DUMP(AST_UNLESS); + DUMP(AST_UNLESS_EXPR); + DUMP(AST_UNTIL); + DUMP(AST_FUNC_CALL); + DUMP(AST_PROC_CALL); + DUMP(AST_ID); + DUMP(AST_RETURN); + DUMP(AST_ASSIGN); + DUMP(AST_DOT); + DUMP(AST_ATTR); + DUMP(AST_PRINT); + DUMP(AST_FUNC_DEF); + DUMP(AST_PROC_DEF); + DUMP(AST_FORMAL_DEF); + DUMP(AST_VAR_DEF); + } +#undef DUMP + + depth++; + + printf(" ("); type_dump(n->t); printf(")\n"); + + if (n->id) + dump(depth, "%s\n", n->id); + + if (n->type) + dump(depth, "%s\n", n->type); + + if (n->a0) + ast_dump_list(depth, n->a0); + + if (n->a1) + ast_dump_list(depth, n->a1); + + if (n->a2) + ast_dump_list(depth, n->a2); +} + +void ast_dump_list(int depth, struct ast *root) +{ + if (!root) { + dump(depth, "{NULL}\n"); + return; + } + + foreach_node(n, root) { + ast_dump(depth, n); + } +} + +int ast_visit(ast_callback_t before, ast_callback_t after, struct ast *n, + void *d) +{ + int ret = 0; + if (!n) + return ret; + + if (before && (ret = before(n, d))) + return ret; + + if (n->a0 && (ret = ast_visit_list(before, after, n->a0, d))) + return ret; + + if (n->a1 && (ret = ast_visit_list(before, after, n->a1, d))) + return ret; + + if (n->a2 && (ret = ast_visit_list(before, after, n->a2, d))) + return ret; + + if (after && (ret = after(n, d))) + return ret; + + return ret; +} + +int ast_visit_list(ast_callback_t before, ast_callback_t after, struct ast *l, + void *d) +{ + int ret = 0; + foreach_node(n, l) { + if ((ret = ast_visit(before, after, n, d))) + return ret; + } + + return ret; +} + +struct ast *ast_last(struct ast *list) +{ + if (!list) + return NULL; + + while (list->n) + list = list->n; + + return list; +} + +size_t ast_list_len(struct ast *l) +{ + size_t count = 0; + foreach_node(n, l) { + count++; + } + + return count; +} diff --git a/src/check.c b/src/check.c new file mode 100644 index 0000000..54929a1 --- /dev/null +++ b/src/check.c @@ -0,0 +1,767 @@ +#include + +#define UNUSED(x) (void)x + +struct state { + struct ast *parent; +}; + +int analyze_visibility(struct scope *scope, struct ast *n) +{ + switch (n->k) { + case AST_FUNC_DEF: + if (scope_add_func(scope, n)) + return -1; + + break; + + case AST_PROC_DEF: + if (scope_add_proc(scope, n)) + return -1; + + break; + default: + } + + return 0; +} + +static bool has_type(struct ast *n) +{ + return n->t != 0; +} + +static int analyze(struct state *state, struct scope *scope, struct ast *n); +static int analyze_list(struct state *state, struct scope *scope, struct ast *l) +{ + foreach_node(n, l) { + if (analyze(state, scope, n)) + return -1; + } + + return 0; +} + +static struct ast *file_scope_find_analyzed(struct state *state, struct scope *scope, char *id) +{ + struct ast *exists = file_scope_find(scope, id); + if (!exists) + return NULL; + + if (analyze(state, scope, exists)) + return NULL; + + return exists; +} + +static int analyze_type(struct scope *scope, struct ast *n) +{ + if (!n->type) { + n->t = TYPE_VOID; + return 0; + } + + if (same_id(n->type, "int")) { + n->t = TYPE_INT; + return 0; + } + else if (same_id(n->type, "date")) { + n->t = TYPE_DATE; + return 0; + } + + semantic_error(scope, n, "no such type: %s", n->type); + return -1; +} + +static int analyze_func_def(struct state *state, struct scope *scope, struct ast *f) +{ + UNUSED(state); + if (analyze_type(scope, f)) + return -1; + + /* slightly hacky, but allows recursion */ + ast_set_flags(f, AST_FLAG_CHECKED); + + struct scope *func_scope = create_scope(); + scope_add_scope(scope, func_scope); + f->scope = func_scope; + + struct state func_state = {.parent = f}; + if (analyze_list(&func_state, func_scope, func_formals(f))) + return -1; + + if (analyze_list(&func_state, func_scope, func_vars(f))) + return -1; + + return analyze(&func_state, func_scope, func_body(f)); +} + +static int analyze_proc_def(struct state *state, struct scope *scope, struct ast *p) +{ + UNUSED(state); + if (analyze_type(scope, p)) + return -1; + + /* slightly hacky, but allows recursion */ + ast_set_flags(p, AST_FLAG_CHECKED); + + struct scope *proc_scope = create_scope(); + scope_add_scope(scope, proc_scope); + p->scope = proc_scope; + + struct state proc_state = {.parent = p}; + if (analyze_list(&proc_state, proc_scope, proc_formals(p))) + return -1; + + if (analyze_list(&proc_state, proc_scope, proc_vars(p))) + return -1; + + if (analyze_list(&proc_state, proc_scope, proc_body(p))) + return -1; + + struct ast *last = ast_last(proc_body(p)); + if (p->t != TYPE_VOID && last->k != AST_RETURN) { + semantic_error(scope, p, "can't prove that proc returns"); + return -1; + } + + return 0; +} + +static int analyze_var_def(struct state *state, struct scope *scope, struct ast *n) +{ + if (scope_add_var(scope, n)) + return -1; + + struct ast *expr = var_expr(n); + if (analyze(state, scope, expr)) + return -1; + + n->t = expr->t; + return 0; +} + +static int analyze_formal_def(struct state *state, struct scope *scope, struct ast *n) +{ + UNUSED(state); + if (analyze_type(scope, n)) + return -1; + + if (scope_add_formal(scope, n)) + return -1; + + return 0; +} + +static int analyze_neg(struct state *state, struct scope *scope, struct ast *n) +{ + struct ast *expr = neg_expr(n); + if (analyze(state, scope, expr)) + return -1; + + if (expr->t != TYPE_INT) { + semantic_error(scope, n, + "negation requires %s, got %s\n", + type_str(TYPE_INT), + type_str(expr->t)); + return -1; + } + + n->t = expr->t; + return 0; +} + +static int analyze_pos(struct state *state, struct scope *scope, struct ast *p) +{ + struct ast *expr = pos_expr(p); + if (analyze(state, scope, expr)) + return -1; + + if (expr->t != TYPE_INT) { + semantic_error(scope, p, + "unary pos requires %s, got %s\n", + type_str(TYPE_INT), + type_str(expr->t)); + return -1; + } + + p->t = expr->t; + return 0; +} + +static int analyze_const_date(struct state *state, struct scope *scope, struct ast *d) +{ + UNUSED(state); + UNUSED(scope); + /* should be in correct format as the lexer took care of it */ + d->t = TYPE_DATE; + return 0; +} + +static int analyze_const_int(struct state *state, struct scope *scope, struct ast *i) +{ + UNUSED(state); + UNUSED(scope); + i->t = TYPE_INT; + return 0; +} + +static int analyze_const_string(struct state *state, struct scope *scope, struct ast *s) +{ + UNUSED(state); + UNUSED(scope); + s->t = TYPE_STRING; + return 0; +} + +static int analyze_eq(struct state *state, struct scope *scope, struct ast *e) +{ + struct ast *l = eq_l(e); + if (analyze(state, scope, l)) + return -1; + + struct ast *r = eq_r(e); + if (analyze(state, scope, r)) + return -1; + + if (r->t != l->t) { + semantic_error(scope, e, "type mismatch: %s vs %s\n", + type_str(l->t), + type_str(r->t)); + return -1; + } + + e->t = TYPE_BOOL; + return 0; +} + +static int analyze_lt(struct state *state, struct scope *scope, struct ast *e) +{ + struct ast *l = lt_l(e); + if (analyze(state, scope, l)) + return -1; + + struct ast *r = lt_r(e); + if (analyze(state, scope, r)) + return -1; + + if (r->t != l->t) { + semantic_error(scope, e, "type mismatch: %s vs %s\n", + type_str(l->t), + type_str(r->t)); + return -1; + } + + e->t = TYPE_BOOL; + return 0; +} + +static int analyze_add(struct state *state, struct scope *scope, struct ast *a) +{ + struct ast *l = add_l(a); + if (analyze(state, scope, l)) + return -1; + + struct ast *r = add_r(a); + if (analyze(state, scope, r)) + return -1; + + if (r->t == TYPE_DATE) { + semantic_error(scope, r, "date not allowed as rhs to addition"); + return -1; + } + + if (l->t == TYPE_DATE) { + a->k = AST_DATE_ADD; + } + + a->t = l->t; + return 0; +} + +static int analyze_sub(struct state *state, struct scope *scope, struct ast *s) +{ + struct ast *l = sub_l(s); + if (analyze(state, scope, l)) + return -1; + + struct ast *r = sub_r(s); + if (analyze(state, scope, r)) + return -1; + + if (l->t == TYPE_DATE && r->t == TYPE_DATE) { + s->k = AST_DATE_DIFF; + s->t = TYPE_INT; + return 0; + } + + if (l->t == TYPE_DATE && r->t == TYPE_INT) { + s->k = AST_DATE_SUB; + s->t = TYPE_DATE; + return 0; + } + + if (l->t == TYPE_INT && r->t == TYPE_INT) { + s->t = TYPE_INT; + return 0; + } + + semantic_error(scope, s, "illegal subtraction types: %s, %s", + type_str(l->t), type_str(r->t)); + return -1; +} + +static int analyze_mul(struct state *state, struct scope *scope, struct ast *m) +{ + struct ast *l = mul_l(m); + if (analyze(state, scope, l)) + return -1; + + if (l->t != TYPE_INT) { + semantic_error(scope, l, "expected %s, got %s", + type_str(TYPE_INT), type_str(l->t)); + return -1; + } + + struct ast *r = mul_r(m); + if (analyze(state, scope, r)) + return -1; + + if (r->t != TYPE_INT) { + semantic_error(scope, r, "expected %s, got %s", + type_str(TYPE_INT), type_str(r->t)); + return -1; + } + + m->t = TYPE_INT; + return 0; +} + +static int analyze_div(struct state *state, struct scope *scope, struct ast *d) +{ + struct ast *l = div_l(d); + if (analyze(state, scope, l)) + return -1; + + if (l->t != TYPE_INT) { + semantic_error(scope, l, "expected %s, got %s", + type_str(TYPE_INT), type_str(l->t)); + return -1; + } + + struct ast *r = div_r(d); + if (analyze(state, scope, r)) + return -1; + + if (r->t != TYPE_INT) { + semantic_error(scope, r, "expected %s, got %s", + type_str(TYPE_INT), type_str(r->t)); + return -1; + } + + d->t = TYPE_INT; + return 0; +} + +static int analyze_print(struct state *state, struct scope *scope, struct ast *p) +{ + struct ast *items = print_items(p); + struct ast *fixups = NULL, *last_fixup = NULL; + + foreach_node(n, items) { + if (analyze(state, scope, n)) + return -1; + + if (n->t == TYPE_VOID) { + semantic_error(scope, n, "void not printable"); + return -1; + } + + /* build new list of nodes, helps a bit when lowering */ + struct ast *fixup = fixup_print_item(n); + if (!fixups) + fixups = fixup; + + if (last_fixup) { + last_fixup->n = fixup; + } + + last_fixup = fixup; + } + + print_items(p) = fixups; + p->t = TYPE_VOID; + return 0; +} + +static int analyze_return(struct state *state, struct scope *scope, struct ast *r) +{ + struct ast *parent = state->parent; + if (!parent) { + semantic_error(scope, r, "stray return"); + return -1; + } + + + if (!has_type(parent)) { + semantic_error(scope, r, "stray return in proc without return type"); + return -1; + } + + struct ast *expr = return_expr(r); + if (analyze(state, scope, expr)) + return -1; + + if (expr->t != parent->t) { + semantic_error(scope, r, "return type mismatch: %s vs %s", + type_str(parent->t), type_str(expr->t)); + return -1; + } + + r->t = expr->t; + return 0; +} + +static int analyze_id(struct state *state, struct scope *scope, struct ast *i) +{ + UNUSED(state); + struct ast *exists = file_scope_find_analyzed(state, scope, i->id); + if (!exists) { + semantic_error(scope, i, "no such symbol"); + return -1; + } + + if (exists->k != AST_FORMAL_DEF && exists->k != AST_VAR_DEF) { + semantic_error(scope, i, "no such variable"); + return -1; + } + + i->t = exists->t; + return 0; +} + +static int analyze_dot(struct state *state, struct scope *scope, struct ast *d) +{ + struct ast *base = dot_base(d); + if (analyze(state, scope, base)) + return -1; + + if (base->t != TYPE_DATE) { + semantic_error(scope, d, "expected %d, got %d", + type_str(TYPE_DATE), type_str(base->t)); + return -1; + } + + d->t = TYPE_INT; + if (same_id(d->id, "day")) + return 0; + + if (same_id(d->id, "month")) + return 0; + + if (same_id(d->id, "year")) + return 0; + + semantic_error(scope, d, "illegal write attribute %s", d->id); + return -1; +} + +static int analyze_attr(struct state *state, struct scope *scope, struct ast *d) +{ + struct ast *base = attr_base(d); + if (analyze(state, scope, base)) + return -1; + + if (base->t != TYPE_DATE) { + semantic_error(scope, d, "expected %d, got %d", + type_str(TYPE_DATE), type_str(base->t)); + return -1; + } + + d->t = TYPE_INT; + if (same_id(d->id, "day")) + return 0; + + if (same_id(d->id, "month")) + return 0; + + if (same_id(d->id, "year")) + return 0; + + if (same_id(d->id, "weekday")) { + d->t = TYPE_STRING; + return 0; + } + + if (same_id(d->id, "weeknum")) + return 0; + + semantic_error(scope, d, "illegal write attribute %s", d->id); + return -1; +} + +static int analyze_assign(struct state *state, struct scope *scope, struct ast *n) +{ + struct ast *l = assign_l(n); + if (analyze(state, scope, l)) + return -1; + + struct ast *r = assign_r(n); + if (analyze(state, scope, r)) + return -1; + + if (l->t != r->t) { + semantic_error(scope, n, "type mismatch: %s vs %s", + type_str(l->t), type_str(r->t)); + return -1; + } + + n->t = l->t; + return 0; +} + +static int analyze_today(struct state *state, struct scope *scope, struct ast *c) +{ + UNUSED(state); + if (ast_list_len(func_call_args(c)) != 0) { + semantic_error(scope, c, "expected 0 arguments, got %zd", + ast_list_len(func_call_args(c))); + return -1; + } + + c->k = AST_BUILTIN_CALL; + c->t = TYPE_DATE; + return 0; +} + +static int analyze_proc_call(struct state *state, struct scope *scope, struct ast *c) +{ + struct ast *exists = file_scope_find_analyzed(state, scope, proc_call_id(c)); + if (!exists || exists->k != AST_PROC_DEF) { + semantic_error(scope, c, "no such proc"); + return -1; + } + + struct ast *args = proc_call_args(c); + if (analyze_list(state, scope, args)) + return -1; + + struct ast *formal = proc_formals(exists); + if (ast_list_len(formal) != ast_list_len(args)) { + semantic_error(scope, c, "expected %s args, got %s", + ast_list_len(formal), ast_list_len(args)); + return -1; + } + + foreach_node(a, args) { + if (a->t != formal->t) { + semantic_error(scope, c, "expected %s, got %s", + type_str(formal->t), type_str(a->t)); + } + + formal = formal->n; + } + + c->t = exists->t; + return 0; +} + +static int analyze_func_call(struct state *state, struct scope *scope, struct ast *c) +{ + /* handle special Today() built-in */ + if (same_id(func_call_id(c), "Today")) + return analyze_today(state, scope, c); + + struct ast *exists = file_scope_find_analyzed(state, scope, func_call_id(c)); + if (!exists || exists->k != AST_FUNC_DEF) { + semantic_error(scope, c, "no such func"); + return -1; + } + + struct ast *args = func_call_args(c); + if (analyze_list(state, scope, args)) + return -1; + + struct ast *formal = func_formals(exists); + if (ast_list_len(formal) != ast_list_len(args)) { + semantic_error(scope, c, "expected %s args, got %s", + ast_list_len(formal), ast_list_len(args)); + return -1; + } + + foreach_node(a, args) { + if (a->t != formal->t) { + semantic_error(scope, c, "expected %s, got %s", + type_str(formal->t), type_str(a->t)); + } + + formal = formal->n; + } + + c->t = exists->t; + return 0; +} + +static int analyze_until(struct state *state, struct scope *scope, struct ast *n) +{ + struct scope *until_scope = create_scope(); + scope_add_scope(scope, until_scope); + + struct ast *body = until_body(n); + if (analyze_list(state, until_scope, body)) + return -1; + + struct ast *cond = until_cond(n); + if (analyze(state, until_scope, cond)) + return -1; + + if (cond->t != TYPE_BOOL) { + semantic_error(scope, cond, "expected %s, got %s", + type_str(TYPE_BOOL), type_str(cond->t)); + return -1; + } + + n->t = TYPE_VOID; + return 0; +} + +static int analyze_unless(struct state *state, struct scope *scope, struct ast *n) +{ + struct scope *unless_scope = create_scope(); + struct scope *otherwise_scope = create_scope(); + scope_add_scope(scope, unless_scope); + scope_add_scope(scope, otherwise_scope); + + struct ast *body = unless_body(n); + if (analyze_list(state, unless_scope, body)) + return -1; + + struct ast *cond = unless_cond(n); + if (analyze(state, scope, cond)) + return -1; + + if (cond->t != TYPE_BOOL) { + semantic_error(scope, cond, "expected %s, got %s", + type_str(TYPE_BOOL), type_str(cond->t)); + return -1; + } + + struct ast *otherwise = unless_otherwise(n); + if (otherwise && analyze_list(state, otherwise_scope, otherwise)) + return -1; + + n->t = TYPE_VOID; + return 0; +} + +static int analyze_unless_expr(struct state *state, struct scope *scope, struct ast *n) +{ + struct ast *body = unless_expr_body(n); + if (analyze(state, scope, body)) + return -1; + + struct ast *cond = unless_expr_cond(n); + if (analyze(state, scope, cond)) + return -1; + + if (cond->t != TYPE_BOOL) { + semantic_error(scope, cond, "expected %s, got %s", + type_str(TYPE_BOOL), type_str(cond->t)); + return -1; + } + + struct ast *otherwise = unless_expr_otherwise(n); + if (analyze(state, scope, otherwise)) + return -1; + + if (body->t != otherwise->t) { + semantic_error(scope, n, "type mismatch: %s vs %s", + type_str(body->t), type_str(otherwise->t)); + return -1; + } + + n->t = body->t; + return 0; +} + +static int analyze(struct state *state, struct scope *scope, struct ast *n) +{ + int ret = 0; + + if (ast_flags(n, AST_FLAG_CHECKED)) { + assert(has_type(n)); + return 0; + } + + if (ast_flags(n, AST_FLAG_INIT)) { + semantic_error(scope, n, "semantic loop detected"); + return -1; + } + + ast_set_flags(n, AST_FLAG_INIT); + + /* generally what we want to happen, special cases can handle themselves */ + if (!n->scope) + n->scope = scope; + + switch (n->k) { + case AST_FUNC_DEF: ret = analyze_func_def(state, scope, n); break; + case AST_PROC_DEF: ret = analyze_proc_def(state, scope, n); break; + case AST_VAR_DEF: ret = analyze_var_def(state, scope, n); break; + case AST_FORMAL_DEF: ret = analyze_formal_def(state, scope, n); break; + case AST_NEG: ret = analyze_neg(state, scope, n); break; + case AST_POS: ret = analyze_pos(state, scope, n); break; + case AST_CONST_DATE: ret = analyze_const_date(state, scope, n); break; + case AST_CONST_INT: ret = analyze_const_int(state, scope, n); break; + case AST_CONST_STRING: ret = analyze_const_string(state, scope, n); break; + case AST_EQ: ret = analyze_eq(state, scope, n); break; + case AST_LT: ret = analyze_lt(state, scope, n); break; + case AST_ADD: ret = analyze_add(state, scope, n); break; + case AST_SUB: ret = analyze_sub(state, scope, n); break; + case AST_MUL: ret = analyze_mul(state, scope, n); break; + case AST_DIV: ret = analyze_div(state, scope, n); break; + case AST_PRINT: ret = analyze_print(state, scope, n); break; + case AST_RETURN: ret = analyze_return(state, scope, n); break; + case AST_ID: ret = analyze_id(state, scope, n); break; + case AST_DOT: ret = analyze_dot(state, scope, n); break; + case AST_ATTR: ret = analyze_attr(state, scope, n); break; + case AST_ASSIGN: ret = analyze_assign(state, scope, n); break; + case AST_PROC_CALL: ret = analyze_proc_call(state, scope, n); break; + case AST_FUNC_CALL: ret = analyze_func_call(state, scope, n); break; + case AST_UNTIL: ret = analyze_until(state, scope, n); break; + case AST_UNLESS: ret = analyze_unless(state, scope, n); break; + case AST_UNLESS_EXPR: ret = analyze_unless_expr(state, scope, n); break; + default: break; + } + + if (ret == 0) { + assert(has_type(n)); + } + + ast_set_flags(n, AST_FLAG_CHECKED); + return ret; +} + +int check(struct scope *scope, struct ast *root) +{ + foreach_node(n, root) { + if (analyze_visibility(scope, n)) + return -1; + } + + foreach_node(n, root) { + struct state state = {0}; + if (analyze(&state, scope, n)) + return -1; + } + +#ifdef DEBUG + foreach_node(n, root) { + printf("// after checking:\n"); + ast_dump(0, n); + } +#endif + + return 0; +} diff --git a/src/core.c b/src/core.c index 7eca8bd..c50860e 100644 --- a/src/core.c +++ b/src/core.c @@ -3,9 +3,14 @@ #include #include -#include +#include #include +#include +#include +#include +#include #include +#include /** * Read whole file into a buffer and return pointer to buffer. @@ -41,28 +46,63 @@ static char *read_file(const char *fname, FILE *f) int run(const char *fname) { + int ret = 0; + const char *buf = NULL; + struct scope *scope = NULL; + struct parser *p = NULL; + FILE *f = fopen(fname, "rb"); if (!f) { error("failed opening %s: %s\n", fname, strerror(errno)); return -1; } - const char *buf = read_file(fname, f); + buf = read_file(fname, f); fclose(f); - if (!buf) - return -1; + if (!buf) { + ret = -1; + goto out; + } - struct parser *p = create_parser(); - if (!p) - return -1; + p = create_parser(); + if (!p) { + ret = -1; + goto out; + } parse(p, fname, buf); - int ret = p->failed ? -1 : 0; + if (p->failed) { + ret = -1; + goto out; + } - /* eventually do other stuff as well */ - free((void *)buf); + scope = create_scope(); + if (!scope) { + ret = -1; + goto out; + } + + scope_set_file(scope, fname, buf); + + struct ast *ast = p->tree; + if (check(scope, ast)) { + ret = -1; + goto out; + } + if (lower_ast(ast)) { + ret = -1; + goto out; + } + + execute(); + +out: + free((void *)buf); + destroy_lowering(); + destroy_scopes(); + destroy_ast_nodes(); destroy_parser(p); return ret; } diff --git a/src/date.c b/src/date.c index 7e6bc77..11f35c4 100644 --- a/src/date.c +++ b/src/date.c @@ -23,6 +23,36 @@ ph_date_t date_from_numbers(unsigned year, unsigned month, unsigned day) return year << 9 | month << 5 | day; } +struct tm tm_from_date(ph_date_t date) +{ + unsigned year = 0; + unsigned month = 0; + unsigned day = 0; + date_split(date, &year, &month, &day); + + struct tm time = {.tm_year = year - 1900, .tm_mon = month - 1, .tm_mday = day}; + mktime(&time); + return time; +} + +ph_date_t date_from_tm(struct tm time) +{ + unsigned year = time.tm_year + 1900; + unsigned month = time.tm_mon + 1; + unsigned day = time.tm_mday; + return date_from_numbers(year, month, day); +} + +ph_date_t current_date() +{ + time_t t = time(NULL); + struct tm *time = localtime(&t); + unsigned year = time->tm_year + 1900; + unsigned month = time->tm_mon + 1; + unsigned day = time->tm_mday; + return date_from_numbers(year, month, day); +} + void date_split(ph_date_t date, unsigned *year, unsigned *month, unsigned *day) { if (year) *year = date >> 9; diff --git a/src/execute.c b/src/execute.c new file mode 100644 index 0000000..0d321d7 --- /dev/null +++ b/src/execute.c @@ -0,0 +1,367 @@ +#include + +#include +#include +#include +#include + +#define UNUSED(x) (void)x + +#define DEF(x) \ +static void exec_##x(struct insn i, size_t sp, struct vec *stack, struct vec *globals) + +static int64_t load(struct loc l, size_t sp, struct vec *stack, struct vec *globals) +{ + if (l.l) + return vect_at(int64_t, *stack, l.o + sp); + + return vect_at(int64_t, *globals, l.o); +} + +static void store(struct loc l, int64_t v, size_t sp, struct vec *stack, struct vec *globals) +{ + if (l.l) { + vect_at(int64_t, *stack, l.o + sp) = v; + return; + } + + vect_at(int64_t, *globals, l.o) = v; +} + +#define get(l) \ + load(l, sp, stack, globals) + +#define put(l, v) \ + store(l, v, sp, stack, globals) + +DEF(MOVE) { + int64_t i0 = get(i.i0); + put(i.o, i0); +} + +DEF(ADD) { + int64_t i0 = get(i.i0); + int64_t i1 = get(i.i1); + int64_t o = i0 + i1; + put(i.o, o); +} + +DEF(SUB) { + int64_t i0 = get(i.i0); + int64_t i1 = get(i.i1); + int64_t o = i0 - i1; + put(i.o, o); +} + +DEF(MUL) { + int64_t i0 = get(i.i0); + int64_t i1 = get(i.i1); + int64_t o = i0 * i1; + put(i.o, o); +} + +DEF(DIV) { + int64_t i0 = get(i.i0); + int64_t i1 = get(i.i1); + int64_t o = i0 / i1; + put(i.o, o); +} + +DEF(PRINT_DATE) { + int64_t i0 = get(i.i0); + char str[11] = {0}; + date_to_string(str, i0); + printf("%s", str); +} + +DEF(PRINT_INT) { + int64_t i0 = get(i.i0); + printf("%lli", (long long)i0); +} + +DEF(PRINT_STRING) { + int64_t i0 = get(i.i0); + printf("%s", (const char *)i0); +} + +DEF(PRINT_BOOL) { + int64_t i0 = get(i.i0); + printf("%s", i0 ? "true" : "false"); +} + +DEF(PRINT_NEWLINE) { + UNUSED(i); + UNUSED(sp); + UNUSED(stack); + UNUSED(globals); + printf("\n"); +} + +DEF(PRINT_SPACE) { + UNUSED(i); + UNUSED(sp); + UNUSED(stack); + UNUSED(globals); + printf(" "); +} + +DEF(CONST) { + put(i.o, i.v); +} + +DEF(EQ) { + int64_t i0 = get(i.i0); + int64_t i1 = get(i.i1); + int64_t b = i0 == i1; + put(i.o, b); +} + +DEF(LT) { + int64_t i0 = get(i.i0); + int64_t i1 = get(i.i1); + int64_t b = i0 < i1; + put(i.o, b); +} + +DEF(NEG) { + int64_t i0 = get(i.i0); + put(i.o, -i0); +} + +DEF(LOAD_DAY) { + int64_t i0 = get(i.i0); + unsigned day = 0; + date_split((ph_date_t)i0, NULL, NULL, &day); + put(i.o, (int64_t)day); +} + +DEF(LOAD_MONTH) { + int64_t i0 = get(i.i0); + unsigned month = 0; + date_split((ph_date_t)i0, NULL, &month, NULL); + put(i.o, (int64_t)month); +} + +DEF(LOAD_YEAR) { + int64_t i0 = get(i.i0); + unsigned year = 0; + date_split((ph_date_t)i0, &year, NULL, NULL); + put(i.o, (int64_t)year); +} + +DEF(LOAD_WEEKDAY) { + int64_t i0 = get(i.i0); + struct tm time = tm_from_date((ph_date_t)i0); + + const char *day = "Sunday"; + switch (time.tm_wday) { + case 0: day = "Sunday"; break; + case 1: day = "Monday"; break; + case 2: day = "Tuesday"; break; + case 3: day = "Wednesday"; break; + case 4: day = "Thursday"; break; + case 5: day = "Friday"; break; + case 6: day = "Saturday"; break; + } + + put(i.o, (int64_t)day); +} + +DEF(LOAD_WEEKNUM) { + int64_t i0 = get(i.i0); + struct tm time = tm_from_date((ph_date_t)i0); + put(i.o, time.tm_yday / 7); +} + +DEF(STORE_DAY) { + int64_t i0 = get(i.i0); + int64_t i1 = get(i.i1); + + unsigned year = 0; + unsigned month = 0; + date_split((ph_date_t)i0, &year, &month, NULL); + ph_date_t date = date_from_numbers(year, month, i1); + put(i.o, (int64_t)date); +} + +DEF(STORE_MONTH) { + int64_t i0 = get(i.i0); + int64_t i1 = get(i.i1); + + unsigned year = 0; + unsigned day = 0; + date_split((ph_date_t)i0, &year, NULL, &day); + ph_date_t date = date_from_numbers(year, i1, day); + put(i.o, (int64_t)date); +} + +DEF(STORE_YEAR) { + int64_t i0 = get(i.i0); + int64_t i1 = get(i.i1); + + unsigned month = 0; + unsigned day = 0; + date_split((ph_date_t)i0, NULL, &month, &day); + ph_date_t date = date_from_numbers(i1, month, day); + put(i.o, (int64_t)date); +} + +DEF(TODAY) { + ph_date_t date = current_date(); + put(i.o, (int64_t)date); +} + +DEF(DATE_ADD) { + int64_t i0 = get(i.i0); + int64_t i1 = get(i.i1); + + struct tm time = tm_from_date((ph_date_t)i0); + time.tm_mday += i1; + mktime(&time); + + ph_date_t date = date_from_tm(time); + put(i.o, (int64_t)date); +} + +DEF(DATE_SUB) { + int64_t i0 = get(i.i0); + int64_t i1 = get(i.i1); + + struct tm time = tm_from_date((ph_date_t)i0); + time.tm_mday -= i1; + mktime(&time); + + ph_date_t date = date_from_tm(time); + put(i.o, (int64_t)date); +} + +DEF(DATE_DIFF) { + int64_t i0 = get(i.i0); + int64_t i1 = get(i.i1); + + struct tm time0 = tm_from_date((ph_date_t)i0); + struct tm time1 = tm_from_date((ph_date_t)i1); + + /* close enough at least */ + time_t t0 = mktime(&time0); + time_t t1 = mktime(&time1); + double seconds = difftime(t0, t1); + int64_t days = round(seconds / 86400); + put(i.o, days); +} + +static int64_t exec_func(struct fn *f, struct vec *args, size_t sp, struct vec *stack, struct vec *globals) +{ + /* move args to formal locations */ + size_t i = 0; + foreach_vec(ai, *args) { + int64_t a = vect_at(int64_t, *args, ai); + vect_at(int64_t, *stack, sp + i) = a; + i += 1; + } + + vec_reset(args); + + size_t pc = 0; + int64_t retval = 0; + struct vec insns = f->insns; + while (1) { + struct insn i = vect_at(struct insn, insns, pc); + +#define DO(x) case x: exec_##x(i, sp, stack, globals); break; + switch (i.k) { + /* special cases first */ + case LABEL: break; + case STOP: return 0; + case RET: return get(i.i0); + case RETVAL: put(i.o, retval); break; + case J: pc = i.v; continue; + case B: { + int64_t i0 = get(i.i0); + if (i0) { + pc = i.v; + continue; + } + + break; + } + + case BZ: { + int64_t i0 = get(i.i0); + if (!i0) { + pc = i.v; + continue; + } + + break; + } + + case ARG: { + int64_t a = get(i.i0); + vect_append(int64_t, *args, &a); + break; + } + + case CALL: { + struct fn *cf = find_fn(i.v); + assert(cf); + + retval = exec_func(cf, args, sp + f->max_sp, stack, globals); + break; + } + + DO(MOVE); + DO(ADD); + DO(SUB); + DO(MUL); + DO(DIV); + DO(CONST); + DO(PRINT_DATE); + DO(PRINT_INT); + DO(PRINT_BOOL); + DO(PRINT_STRING); + DO(PRINT_NEWLINE); + DO(PRINT_SPACE); + DO(LOAD_DAY); + DO(LOAD_MONTH); + DO(LOAD_YEAR); + DO(LOAD_WEEKDAY); + DO(LOAD_WEEKNUM); + DO(STORE_DAY); + DO(STORE_MONTH); + DO(STORE_YEAR); + DO(DATE_ADD); + DO(DATE_SUB); + DO(DATE_DIFF); + DO(TODAY); + DO(EQ); + DO(LT); + DO(NEG); + } +#undef DO + + pc += 1; + } +} + +void execute() +{ + struct fn *main = find_fn(0); + assert(main); + + struct vec stack = vec_create(sizeof(int64_t)); + /* arbitrary amount */ + vec_reserve(&stack, 65535); + + struct vec globals = vec_create(sizeof(int64_t)); + /* should really take the value calculated during lowering */ + vec_reserve(&globals, 65535); + + struct vec args = vec_create(sizeof(int64_t)); + + exec_func(main, &args, 0, &stack, &globals); + + vec_destroy(&args); + vec_destroy(&stack); + vec_destroy(&globals); +} diff --git a/src/lexer.l b/src/lexer.l index 6f58bd5..4de6ead 100644 --- a/src/lexer.l +++ b/src/lexer.l @@ -154,7 +154,9 @@ STRING \"(\\.|[^"\\])*\" "end" {return END;} {STRING} { - yylval->str = yytext; + /* skip quotation marks */ + yylval->str = strdup(yytext + 1); + yylval->str[strlen(yylval->str) - 1] = '\0'; return STRING; } @@ -169,17 +171,17 @@ STRING \"(\\.|[^"\\])*\" } {IDENT} { - yylval->str = yytext; + yylval->str = strdup(yytext); return IDENT; } {FUNC_IDENT} { - yylval->str = yytext; + yylval->str = strdup(yytext); return FUNC_IDENT; } {PROC_IDENT} { - yylval->str = yytext; + yylval->str = strdup(yytext); return PROC_IDENT; } diff --git a/src/lower.c b/src/lower.c new file mode 100644 index 0000000..f4ca28e --- /dev/null +++ b/src/lower.c @@ -0,0 +1,730 @@ +#include + +#include +#include + +#define UNUSED(x) (void)x + +static struct vec fns = {0}; +/* zero is unintialized, global 1 reserved as null, so skip two first globals */ +static size_t globals = 2; + +static void lower(struct fn *f, struct ast *n); +static void lower_list(struct fn *f, struct ast *l) +{ + foreach_node(n, l) { + lower(f, n); + } +} + +static size_t loc_as_int(struct loc l) +{ + size_t a = l.o; + a |= (size_t)(l.l) << (sizeof(size_t) * 8 - 1); + return a; +} + +static struct loc build_local_loc(size_t idx) +{ + return (struct loc){.l = 1, .o = idx}; +} + +static struct loc build_global_loc(size_t idx) +{ + return (struct loc){.l = 0, .o = idx}; +} + +static struct loc null_loc() +{ + /* don't exactly love this but I guess it works */ + return build_global_loc(1); +} + +static void output_insn(struct fn *f, enum insn_kind k, struct loc o, struct loc i0, struct loc i1, int64_t v) +{ + struct insn i = {.k = k, .o = o, .i0 = i0, .i1 = i1, .v = v}; + vect_append(struct insn, f->insns, &i); +} + +static void lower_var(struct fn *f, struct ast *n) +{ + n->l = build_local_loc(f->sp++); + struct ast *expr = var_expr(n); + lower(f, expr); + output_insn(f, MOVE, n->l, expr->l, null_loc(), 0); +} + +static void lower_formal(struct fn *f, struct ast *n) +{ + n->l = build_local_loc(f->sp++); +} + +static void lower_date(struct fn *f, struct ast *n) +{ + n->l = build_local_loc(f->sp); + output_insn(f, CONST, n->l, null_loc(), null_loc(), n->v); +} + +static void lower_string(struct fn *f, struct ast *n) +{ + n->l = build_local_loc(f->sp); + output_insn(f, CONST, n->l, null_loc(), null_loc(), (int64_t)n->id); +} + +static void lower_int(struct fn *f, struct ast *n) +{ + n->l = build_local_loc(f->sp); + output_insn(f, CONST, n->l, null_loc(), null_loc(), n->v); +} + +static void lower_add(struct fn *f, struct ast *n) +{ + n->l = build_local_loc(f->sp); + struct ast *l = add_l(n); + struct ast *r = add_r(n); + + f->sp += 1; lower(f, l); + f->sp += 1; lower(f, r); + f->sp -= 2; + + output_insn(f, ADD, n->l, l->l, r->l, 0); +} + +static void lower_sub(struct fn *f, struct ast *n) +{ + n->l = build_local_loc(f->sp); + struct ast *l = sub_l(n); + struct ast *r = sub_r(n); + + f->sp += 1; lower(f, l); + f->sp += 1; lower(f, r); + f->sp -= 2; + + output_insn(f, SUB, n->l, l->l, r->l, 0); +} + +static void lower_mul(struct fn *f, struct ast *n) +{ + n->l = build_local_loc(f->sp); + struct ast *l = mul_l(n); + struct ast *r = mul_r(n); + + f->sp += 1; lower(f, l); + f->sp += 1; lower(f, r); + f->sp -= 2; + + output_insn(f, MUL, n->l, l->l, r->l, 0); +} + +static void lower_div(struct fn *f, struct ast *n) +{ + n->l = build_local_loc(f->sp); + struct ast *l = div_l(n); + struct ast *r = div_r(n); + + f->sp += 1; lower(f, l); + f->sp += 1; lower(f, r); + f->sp -= 2; + + output_insn(f, DIV, n->l, l->l, r->l, 0); +} + +static void lower_dot_assign(struct fn *f, struct ast *n) +{ + struct ast *r = assign_r(n); + struct ast *l = assign_l(n); + + lower(f, r); + + struct ast *base = dot_base(l); + lower(f, base); + + enum insn_kind store = STORE_DAY; + if (same_id(l->id, "day")) + store = STORE_DAY; + + else if (same_id(l->id, "month")) + store = STORE_MONTH; + + else if (same_id(l->id, "year")) + store = STORE_YEAR; + else + abort(); + + output_insn(f, store, base->l, base->l, r->l, 0); + n->l = base->l; +} + +static void lower_assign(struct fn *f, struct ast *n) +{ + struct ast *l = assign_l(n); + if (l->k == AST_DOT) + return lower_dot_assign(f, n); + + struct ast *r = assign_r(n); + + lower(f, r); + lower(f, l); + output_insn(f, MOVE, l->l, r->l, null_loc(), 0); + n->l = null_loc(); +} + +static void lower_id(struct fn *f, struct ast *n) +{ + UNUSED(f); + struct ast *exists = file_scope_find(n->scope, n->id); + assert(exists); + + n->l = exists->l; +} + +static void lower_return(struct fn *f, struct ast *n) +{ + struct ast *expr = return_expr(n); + lower(f, expr); + output_insn(f, RET, null_loc(), expr->l, null_loc(), 0); + n->l = null_loc(); +} + +static void lower_attr(struct fn *f, struct ast *n) +{ + n->l = build_local_loc(f->sp); + + struct ast *base = attr_base(n); + lower(f, base); + + enum insn_kind load = LOAD_DAY; + if (same_id(n->id, "day")) + load = LOAD_DAY; + + else if (same_id(n->id, "month")) + load = LOAD_MONTH; + + else if (same_id(n->id, "year")) + load = LOAD_YEAR; + + else if (same_id(n->id, "weekday")) + load = LOAD_WEEKDAY; + + else if (same_id(n->id, "weeknum")) + load = LOAD_WEEKNUM; + + else + abort(); + + output_insn(f, load, n->l, base->l, null_loc(), 0); +} + +static void lower_print(struct fn *f, struct ast *p) +{ + foreach_node(n, print_items(p)) { + lower(f, n); + + /* don't print space on last element */ + if (n->n) + output_insn(f, PRINT_SPACE, null_loc(), null_loc(), null_loc(), 0); + } + + output_insn(f, PRINT_NEWLINE, null_loc(), null_loc(), null_loc(), 0); + p->l = null_loc(); +} + +static void lower_proc_call(struct fn *f, struct ast *c) +{ + size_t count = 0; + foreach_node(n, proc_call_args(c)) { + f->sp += 1; lower(f, n); count += 1; + } + + size_t i = 0; + foreach_node(n, proc_call_args(c)) { + output_insn(f, ARG, null_loc(), n->l, null_loc(), i); + i++; + } + + struct ast *def = file_scope_find(c->scope, proc_call_id(c)); + output_insn(f, CALL, null_loc(), null_loc(), null_loc(), loc_as_int(def->l)); + + c->l = build_local_loc(f->sp); + if (c->t != TYPE_VOID) + output_insn(f, RETVAL, c->l, null_loc(), null_loc(), 0); + + f->sp -= count; +} + +static void lower_func_call(struct fn *f, struct ast *c) +{ + size_t count = 0; + foreach_node(n, func_call_args(c)) { + f->sp += 1; lower(f, n); count += 1; + } + + size_t i = 0; + foreach_node(n, func_call_args(c)) { + output_insn(f, ARG, null_loc(), n->l, null_loc(), i); + i++; + } + + struct ast *def = file_scope_find(c->scope, func_call_id(c)); + output_insn(f, CALL, null_loc(), null_loc(), null_loc(), loc_as_int(def->l)); + c->l = build_local_loc(f->sp); + if (c->t != TYPE_VOID) + output_insn(f, RETVAL, c->l, null_loc(), null_loc(), 0); + + f->sp -= count; +} + +static void lower_eq(struct fn *f, struct ast *n) +{ + n->l = build_local_loc(f->sp); + struct ast *l = eq_l(n); + struct ast *r = eq_r(n); + + f->sp += 1; lower(f, l); + f->sp += 1; lower(f, r); + f->sp -= 2; + + output_insn(f, EQ, n->l, l->l, r->l, 0); +} + +static void lower_lt(struct fn *f, struct ast *n) +{ + n->l = build_local_loc(f->sp); + struct ast *l = lt_l(n); + struct ast *r = lt_r(n); + + f->sp += 1; lower(f, l); + f->sp += 1; lower(f, r); + f->sp -= 2; + + output_insn(f, LT, n->l, l->l, r->l, 0); +} + +static void lower_pos(struct fn *f, struct ast *n) +{ + struct ast *expr = pos_expr(n); + lower(f, expr); + n->l = expr->l; +} + +static void lower_neg(struct fn *f, struct ast *n) +{ + n->l = build_local_loc(f->sp); + struct ast *expr = neg_expr(n); + f->sp += 1; lower(f, expr); + f->sp -= 1; + + output_insn(f, NEG, n->l, expr->l, null_loc(), 0); +} + +static void lower_print_date(struct fn *f, struct ast *n) +{ + struct ast *expr = print_date_expr(n); + lower(f, expr); + output_insn(f, PRINT_DATE, null_loc(), expr->l, null_loc(), 0); + n->l = null_loc(); +} + +static void lower_print_int(struct fn *f, struct ast *n) +{ + struct ast *expr = print_int_expr(n); + lower(f, expr); + output_insn(f, PRINT_INT, null_loc(), expr->l, null_loc(), 0); + n->l = null_loc(); +} + +static void lower_print_string(struct fn *f, struct ast *n) +{ + struct ast *expr = print_string_expr(n); + lower(f, expr); + output_insn(f, PRINT_STRING, null_loc(), expr->l, null_loc(), 0); + n->l = null_loc(); +} + +static void lower_print_bool(struct fn *f, struct ast *n) +{ + struct ast *expr = print_bool_expr(n); + lower(f, expr); + output_insn(f, PRINT_BOOL, null_loc(), expr->l, null_loc(), 0); + n->l = null_loc(); +} + +static void lower_date_add(struct fn *f, struct ast *n) +{ + n->l = build_local_loc(f->sp); + + struct ast *l = date_add_l(n); + struct ast *r = date_add_r(n); + + f->sp += 1; lower(f, l); + f->sp += 1; lower(f, r); + f->sp -= 2; + + output_insn(f, DATE_ADD, n->l, l->l, r->l, 0); +} + +static void lower_date_sub(struct fn *f, struct ast *n) +{ + n->l = build_local_loc(f->sp); + + struct ast *l = date_sub_l(n); + struct ast *r = date_sub_r(n); + + f->sp += 1; lower(f, l); + f->sp += 1; lower(f, r); + f->sp -= 2; + + output_insn(f, DATE_SUB, n->l, l->l, r->l, 0); +} + +static void lower_date_diff(struct fn *f, struct ast *n) +{ + n->l = build_local_loc(f->sp); + + struct ast *l = date_diff_l(n); + struct ast *r = date_diff_r(n); + + f->sp += 1; lower(f, l); + f->sp += 1; lower(f, r); + f->sp -= 2; + + output_insn(f, DATE_DIFF, n->l, l->l, r->l, 0); +} + +static void lower_until(struct fn *f, struct ast *n) +{ + size_t off = vec_len(&f->insns); + output_insn(f, LABEL, null_loc(), null_loc(), null_loc(), 0); + + lower_list(f, until_body(n)); + + struct ast *cond = until_cond(n); + lower(f, cond); + + output_insn(f, BZ, null_loc(), cond->l, null_loc(), off); + n->l = null_loc(); +} + +static void patch_branch(struct fn *f, size_t branch, size_t off) +{ + struct insn i = vect_at(struct insn, f->insns, branch); + i.v = off; + vect_at(struct insn, f->insns, branch) = i; +} + +static void lower_unless(struct fn *f, struct ast *n) +{ + struct ast *cond = unless_cond(n); + lower(f, cond); + + size_t branch = vec_len(&f->insns); + /* placeholder */ + output_insn(f, B, null_loc(), cond->l, null_loc(), 0); + + struct ast *body = unless_body(n); + lower_list(f, body); + + size_t jump = vec_len(&f->insns); + /* placeholder */ + output_insn(f, J, null_loc(), null_loc(), null_loc(), 0); + + size_t off = vec_len(&f->insns); + patch_branch(f, branch, off); + + struct ast *otherwise = unless_otherwise(n); + lower_list(f, otherwise); + + off = vec_len(&f->insns); + patch_branch(f, jump, off); + + n->l = null_loc(); +} + +static void lower_unless_expr(struct fn *f, struct ast *n) +{ + n->l = build_local_loc(f->sp); + + struct ast *cond = unless_expr_cond(n); + lower(f, cond); + + size_t branch = vec_len(&f->insns); + /* placeholder */ + output_insn(f, B, null_loc(), cond->l, null_loc(), 0); + + struct ast *body = unless_expr_body(n); + lower(f, body); + + output_insn(f, MOVE, n->l, body->l, null_loc(), 0); + + size_t jump = vec_len(&f->insns); + /* placeholder */ + output_insn(f, J, null_loc(), null_loc(), null_loc(), 0); + + size_t off = vec_len(&f->insns); + patch_branch(f, branch, off); + + struct ast *otherwise = unless_expr_otherwise(n); + lower(f, otherwise); + output_insn(f, MOVE, n->l, otherwise->l, null_loc(), 0); + + off = vec_len(&f->insns); + patch_branch(f, jump, off); +} + +static void lower_builtin_call(struct fn *f, struct ast *n) +{ + /* for now we only support Today(), which doesn't have any args */ + assert(same_id(builtin_call_id(n), "Today")); + + n->l = build_local_loc(f->sp); + output_insn(f, TODAY, n->l, null_loc(), null_loc(), 0); +} + +static void lower(struct fn *f, struct ast *n) +{ + if (f->max_sp < f->sp) + f->max_sp = f->sp; + + switch (n->k) { + case AST_PROC_DEF: break; + case AST_FUNC_DEF: break; + case AST_VAR_DEF: lower_var(f, n); break; + case AST_FORMAL_DEF: lower_formal(f, n); break; + case AST_CONST_STRING: lower_string(f, n); break; + case AST_CONST_DATE: lower_date(f, n); break; + case AST_CONST_INT: lower_int(f, n); break; + case AST_ASSIGN: lower_assign(f, n); break; + case AST_ADD: lower_add(f, n); break; + case AST_SUB: lower_sub(f, n); break; + case AST_MUL: lower_mul(f, n); break; + case AST_DIV: lower_div(f, n); break; + case AST_ID: lower_id(f, n); break; + case AST_RETURN: lower_return(f, n); break; + case AST_ATTR: lower_attr(f, n); break; + case AST_PRINT: lower_print(f, n); break; + case AST_PROC_CALL: lower_proc_call(f, n); break; + case AST_FUNC_CALL: lower_func_call(f, n); break; + case AST_BUILTIN_CALL: lower_builtin_call(f, n); break; + case AST_EQ: lower_eq(f, n); break; + case AST_LT: lower_lt(f, n); break; + case AST_POS: lower_pos(f, n); break; + case AST_NEG: lower_neg(f, n); break; + case AST_PRINT_DATE: lower_print_date(f, n); break; + case AST_PRINT_STRING: lower_print_string(f, n); break; + case AST_PRINT_BOOL: lower_print_bool(f, n); break; + case AST_PRINT_INT: lower_print_int(f, n); break; + case AST_DATE_ADD: lower_date_add(f, n); break; + case AST_DATE_SUB: lower_date_sub(f, n); break; + case AST_DATE_DIFF: lower_date_diff(f, n); break; + case AST_UNTIL: lower_until(f, n); break; + case AST_UNLESS: lower_unless(f, n); break; + case AST_UNLESS_EXPR: lower_unless_expr(f, n); break; + + /* handled by assign */ + case AST_DOT: break; + } + + assert(loc_as_int(n->l) > 0); +} + +static void lower_global_var(struct fn *f, struct ast *n) { + n->l = build_global_loc(globals++); + struct ast *expr = var_expr(n); + lower(f, expr); + output_insn(f, MOVE, n->l, expr->l, null_loc(), 0); +} + +static void add_proc(struct ast *n) { + size_t idx = vec_len(&fns); + n->l = build_global_loc(idx); + struct fn f = {.name = proc_id(n), + .idx = idx, + .sp = 0, + .insns = vec_create(sizeof(struct insn))}; + + vect_append(struct fn, fns, &f); +} + +static void add_func(struct ast *n) { + size_t idx = vec_len(&fns); + n->l = build_global_loc(idx); + struct fn f = {.name = func_id(n), + .idx = idx, + .sp = 0, + .insns = vec_create(sizeof(struct insn))}; + + vect_append(struct fn, fns, &f); +} + +static void lower_proc_def(struct ast *d) +{ + struct fn *f = vec_at(&fns, d->l.o); + assert(f); + + lower_list(f, proc_formals(d)); + lower_list(f, proc_vars(d)); + lower_list(f, proc_body(d)); + + if (d->t == TYPE_VOID) + output_insn(f, STOP, null_loc(), null_loc(), null_loc(), 0); +} + +static void lower_func_def(struct ast *d) +{ + struct fn *f = vec_at(&fns, d->l.o); + assert(f); + + lower_list(f, func_formals(d)); + lower_list(f, func_vars(d)); + lower_list(f, func_body(d)); +} + +#ifdef DEBUG +static void dump_loc(struct loc l) +{ + if (is_null_loc(l)) { + printf("null, "); + return; + } + + bool local = l.l; + if (local) { + printf("l"); + } else { + printf("g"); + } + + size_t val = l.o; + printf("%zd, ", val); +} + +static void dump_val(int64_t v) +{ + printf("%lli", (long long)v); +} + +static void dump_insn(struct insn i, size_t addr) +{ + printf("//%8zd: ", addr); +#define DUMP(x) case x: printf(#x); break; + switch (i.k) { + DUMP(TODAY); + DUMP(CALL); + DUMP(MOVE); + DUMP(ADD); + DUMP(SUB); + DUMP(MUL); + DUMP(DIV); + DUMP(ARG); + DUMP(STOP); + DUMP(RETVAL); + DUMP(PRINT_DATE); + DUMP(PRINT_INT); + DUMP(PRINT_STRING); + DUMP(PRINT_NEWLINE); + DUMP(PRINT_SPACE); + DUMP(PRINT_BOOL); + DUMP(DATE_ADD); + DUMP(DATE_SUB); + DUMP(DATE_DIFF); + DUMP(STORE_DAY); + DUMP(STORE_MONTH); + DUMP(STORE_YEAR); + DUMP(LOAD_DAY); + DUMP(LOAD_MONTH); + DUMP(LOAD_YEAR); + DUMP(LOAD_WEEKDAY); + DUMP(LOAD_WEEKNUM); + DUMP(RET); + DUMP(CONST); + DUMP(EQ); + DUMP(LT); + DUMP(NEG); + DUMP(B); + DUMP(BZ); + DUMP(J); + DUMP(LABEL); + } +#undef DUMP + + printf(" "); + + dump_loc(i.o); + dump_loc(i.i0); + dump_loc(i.i1); + dump_val(i.v); + + printf("\n"); +} + +static void dump_insns(struct fn *f) +{ + size_t addr = 0; + foreach_vec(ii, f->insns) { + struct insn i = vect_at(struct insn, f->insns, ii); + dump_insn(i, addr); + addr++; + } +} +#endif /* DEBUG */ + +struct fn *find_fn(size_t idx) +{ + return vec_at(&fns, idx); +} + +int lower_ast(struct ast *tree) +{ + fns = vec_create(sizeof(struct fn)); + + struct fn main = {.name = "main", + .idx = 0, + .sp = 0, + .insns = vec_create(sizeof(struct insn))}; + + vect_append(struct fn, fns, &main); + + foreach_node(n, tree) { + switch (n->k) { + case AST_PROC_DEF: add_proc(n); break; + case AST_FUNC_DEF: add_func(n); break; + default: + } + } + + struct fn *f = &vect_at(struct fn, fns, 0); + foreach_node(n, tree) { + switch (n->k) { + case AST_VAR_DEF: lower_global_var(f, n); break; + case AST_PROC_DEF: lower_proc_def(n); break; + case AST_FUNC_DEF: lower_func_def(n); break; + default: lower(f, n); + } + } + + output_insn(f, STOP, null_loc(), null_loc(), null_loc(), 0); + +#ifdef DEBUG + foreach_vec(fi, fns) { + struct fn *f = vec_at(&fns, fi); + printf("// %s (%zd):\n", f->name, f->idx); + dump_insns(f); + } +#endif + return 0; +} + +static void destroy_fn(struct fn *f) +{ + vec_destroy(&f->insns); +} + +void destroy_lowering() +{ + foreach_vec(fi, fns) { + struct fn *f = vec_at(&fns, fi); + destroy_fn(f); + } + + vec_destroy(&fns); +} diff --git a/src/parser.y b/src/parser.y index 3b69b06..7aacd39 100644 --- a/src/parser.y +++ b/src/parser.y @@ -7,6 +7,7 @@ #include #include +#include #define FOREACH_TOKEN(M) \ M(LPAREN) \ @@ -57,7 +58,7 @@ %parse-param {void *scanner} {struct parser* parser} %union { - struct ast_node *node; + struct ast *node; ph_date_t num; int64_t snum; char *str; @@ -101,6 +102,44 @@ %token PRINT "print" %token END "end" +%nterm program +%nterm definition +%nterm definitions +%nterm opt_definitions +%nterm variable_definition +%nterm variable_definitions +%nterm opt_variable_definitions +%nterm procedure_definition +%nterm function_definition +%nterm formal_arg +%nterm formals +%nterm lvalue +%nterm rvalue +%nterm print_item +%nterm print_items +%nterm opt_formals +%nterm statement +%nterm statement_list +%nterm opt_arguments +%nterm arguments +%nterm print_statement +%nterm unless_statement +%nterm unless_expression +%nterm until_statement +%nterm assignment +%nterm procedure_call +%nterm function_call +%nterm simple_expr +%nterm atom +%nterm term +%nterm factor +%nterm expression +%nterm ident + +/* special case */ +%nterm return +%nterm opt_return + %{ /** Modifies the signature of yylex to fit our parser better. */ @@ -145,11 +184,11 @@ static void yyerror(YYLTYPE *yylloc, void *lexer, %% statement_list - : statement "," statement_list + : statement "," statement_list { $$ = $1; $$->n = $3; } | statement definitions - : definition definitions + : definition definitions { $$ = $1; $$->n = $2; } | definition definition @@ -158,74 +197,102 @@ definition | variable_definition variable_definition - : "var" IDENT "=" expression + : "var" IDENT "=" expression { + $$ = gen_var_def($[IDENT], $[expression], src_loc(@$)); + } variable_definitions - : variable_definition variable_definitions + : variable_definition variable_definitions { $$ = $1; $$->n = $2; } | variable_definition opt_variable_definitions : variable_definitions - | + | { $$ = NULL; } return - : "return" IDENT + : "return" IDENT { $$ = $2; } opt_return : return - | + | { $$ = NULL; } function_definition : "function" FUNC_IDENT "{" opt_formals "}" - return opt_variable_definitions "is" rvalue "end" "function" + return opt_variable_definitions "is" rvalue "end" "function" { + struct ast *ret = gen_return($[rvalue], $[rvalue]->loc); + $$ = gen_func_def($[FUNC_IDENT], + $[return], + $[opt_formals], + $[opt_variable_definitions], + ret, + src_loc(@$)); + } procedure_definition : "procedure" PROC_IDENT "{" opt_formals "}" opt_return opt_variable_definitions "is" statement_list - "end" "procedure" + "end" "procedure" { + $$ = gen_proc_def($[PROC_IDENT], + $[opt_return], + $[opt_formals], + $[opt_variable_definitions], + $[statement_list], + src_loc(@$)); + } formals - : formal_arg "," formal_arg + : formal_arg "," formals { $$ = $1; $$->n = $3; } | formal_arg opt_formals : formals - | + | { $$ = NULL; } formal_arg - : IDENT "[" IDENT "]" + : IDENT "[" IDENT "]" { + $$ = gen_formal_def($1, $3, src_loc(@$)); + } procedure_call - : PROC_IDENT "(" opt_arguments ")" + : PROC_IDENT "(" opt_arguments ")" { + $$ = gen_proc_call($[PROC_IDENT], $[opt_arguments], src_loc(@$)); + } arguments - : expression "," arguments + : expression "," arguments { $$ = $1; $$->n = $3; } | expression opt_arguments : arguments - | + | { $$ = NULL; } assignment - : lvalue "=" rvalue + : lvalue "=" rvalue { + $$ = gen_assign($[lvalue], $[rvalue], src_loc(@$)); + } + +ident + : IDENT { + $$ = gen_id($[IDENT], src_loc(@$)); + } lvalue - : IDENT - | IDENT "." IDENT + : ident + | ident "." IDENT { $$ = gen_dot($1, $3, src_loc(@$)); } rvalue : expression | unless_expression print_statement - : "print" print_items + : "print" print_items { $$ = gen_print($[print_items], src_loc(@$)); } print_items - : print_item "&" print_items + : print_item "&" print_items { $$ = $1; $$->n = $3; } | print_item print_item - : STRING + : STRING { $$ = gen_const_string($[STRING], src_loc(@$)); } | expression statement @@ -234,56 +301,98 @@ statement | print_statement | until_statement | unless_statement - | "return" expression + | "return" expression { $$ = gen_return($[expression], src_loc(@$)); } until_statement - : "do" statement_list "until" expression + : "do" statement_list "until" expression { + $$ = gen_until($[statement_list], $[expression], src_loc(@$)); + } unless_statement - : "do" statement_list "unless" expression "done" - | "do" statement_list "unless" expression "otherwise" statement_list "done" + : "do" statement_list "unless" expression "done" { + $$ = gen_unless($[statement_list], $[expression], NULL, src_loc(@$)); + } + | "do" statement_list "unless" expression "otherwise" statement_list "done" { + $$ = gen_unless($2, $[expression], $6, src_loc(@$)); + } expression : simple_expr - | expression "=" simple_expr - | expression "<" simple_expr + | expression "=" simple_expr { + $$ = gen_eq($1, $[simple_expr], src_loc(@$)); + } + | expression "<" simple_expr { + $$ = gen_lt($1, $[simple_expr], src_loc(@$)); + } simple_expr : term - | simple_expr "+" term - | simple_expr "-" term + | simple_expr "+" term { + $$ = gen_add($1, $[term], src_loc(@$)); + } + | simple_expr "-" term { + $$ = gen_sub($1, $[term], src_loc(@$)); + } term : factor - | term "*" factor - | term "/" factor + | term "*" factor { + $$ = gen_mul($1, $[factor], src_loc(@$)); + } + | term "/" factor { + $$ = gen_div($1, $[factor], src_loc(@$)); + } factor - : "+" atom - | "-" atom + : "+" atom { + $$ = gen_pos($[atom], src_loc(@$)); + } + | "-" atom { + $$ = gen_neg($[atom], src_loc(@$)); + } | atom atom - : IDENT - | IDENT "'" IDENT - | INT_LITERAL - | DATE_LITERAL + : ident + | ident "'" IDENT { + $$ = gen_attr($[ident], $[IDENT], src_loc(@$)); + } + | INT_LITERAL { + $$ = gen_const_int($[INT_LITERAL], src_loc(@$)); + } + | DATE_LITERAL { + $$ = gen_const_date($[DATE_LITERAL], src_loc(@$)); + } | function_call | procedure_call - | "(" expression ")" + | "(" expression ")" { $$ = $2; } function_call - : FUNC_IDENT "(" opt_arguments ")" + : FUNC_IDENT "(" opt_arguments ")" { + $$ = gen_func_call($[FUNC_IDENT], $[opt_arguments], src_loc(@$)); + } unless_expression - : "do" expression "unless" expression "otherwise" expression "done" + : "do" expression "unless" expression "otherwise" expression "done" { + $$ = gen_unless_expr($2, $4, $6, src_loc(@$)); + } opt_definitions : definitions - | {} + | { $$ = NULL; } program - : opt_definitions statement_list + : opt_definitions statement_list { + if ($1) { + ast_last($1)->n = $2; + $$ = $1; + } + else { + $$ = $2; + } + + parser->tree = $$; + } | error { /* make sure to catch parse errors */ parser->failed = true; @@ -293,44 +402,6 @@ program #include "gen_lexer.inc" -static void dump_yychar(struct parser *p, int yychar, YYSTYPE yylval, YYLTYPE yylloc) -{ - struct src_loc loc = src_loc(yylloc); - printf("%s:%d:%d: ", p->fname, loc.first_line, loc.first_col); - -#define PRINT_NAME(token) case token: printf(#token " "); break; - switch (yychar) { - FOREACH_TOKEN(PRINT_NAME); - default: printf("Unknown yychar\n"); return; - } - - char date_str[11] = {0}; - switch (yychar) { - case INT_LITERAL: printf("(%lld)", (long long int)yylval.snum); break; - case IDENT: printf("(%s)", yylval.str); break; - case FUNC_IDENT: printf("(%s)", yylval.str); break; - case PROC_IDENT: printf("(%s)", yylval.str); break; - case DATE_LITERAL: - date_to_string(date_str, yylval.num); - printf("(%s)", date_str); - break; - } - - printf("\n"); -} - -static void dump_lex(struct parser *p) -{ - int yychar; - YYSTYPE yylval; - YYLTYPE yylloc = {1, 1, 1, 1}; - - /* run lexer until we reach the end of the file */ - while ((yychar = yylex(&yylval, &yylloc, p->lexer, p)) != YYEOF) { - dump_yychar(p, yychar, yylval, yylloc); - } -} - static struct src_loc src_loc(YYLTYPE yylloc) { struct src_loc loc; @@ -376,12 +447,6 @@ void parse(struct parser *p, const char *fname, const char *buf) yylex_init(&p->lexer); - yy_scan_string(buf, p->lexer); - - // debugging, remember to reset yy_scan_string once the actual parser - // runs - dump_lex(p); - yy_scan_string(buf, p->lexer); yyparse(p->lexer, p); } diff --git a/src/scope.c b/src/scope.c new file mode 100644 index 0000000..290dfd6 --- /dev/null +++ b/src/scope.c @@ -0,0 +1,153 @@ +#include +#include + +#include +#include +#include + +struct scope { + size_t id; + struct scope *parent; + + const char *fname; + const char *buf; + + struct vec visible; +}; + +struct vec scopes = {0}; + +struct scope *create_scope() +{ + static size_t counter = 0; + if (vec_uninit(scopes)) { + scopes = vec_create(sizeof(struct scope *)); + } + + struct scope *scope = calloc(1, sizeof(struct scope)); + if (!scope) + return NULL; + + scope->visible = vec_create(sizeof(struct ast *)); + scope->id = counter++; + + vect_append(struct scope *, scopes, &scope); + return scope; +} + +void scope_add_scope(struct scope *parent, struct scope *scope) +{ + scope->parent = parent; + scope->fname = parent->fname; + scope->buf = parent->buf; +} + +void scope_set_file(struct scope *scope, const char *fname, const char *buf) +{ + scope->fname = fname; + scope->buf = buf; +} + +struct ast *scope_find(struct scope *scope, char *id) +{ + /* not particularly fast, but good enough for now */ + foreach_vec(vi, scope->visible) { + struct ast *n = vect_at(struct ast *, scope->visible, vi); + if (same_id(n->id, id)) + return n; + } + + return NULL; +} + +struct ast *file_scope_find(struct scope *scope, char *id) +{ + struct ast *exists = scope_find(scope, id); + if (exists) + return exists; + + if (scope->parent) + return file_scope_find(scope->parent, id); + + return NULL; +} + +int scope_add_var(struct scope *scope, struct ast *var) +{ + struct ast *exists = scope_find(scope, var_id(var)); + if (exists) { + semantic_error(scope, var, "var redefined"); + semantic_error(scope, exists, "previously here"); + return -1; + } + + vect_append(struct ast *, scope->visible, &var); + return 0; +} + +int scope_add_formal(struct scope *scope, struct ast *formal) +{ + struct ast *exists = scope_find(scope, formal_id(formal)); + if (exists) { + semantic_error(scope, formal, "formal redefined"); + semantic_error(scope, exists, "previously here"); + return -1; + } + + vect_append(struct ast *, scope->visible, &formal); + return 0; +} + +int scope_add_proc(struct scope *scope, struct ast *proc) +{ + struct ast *exists = scope_find(scope, proc_id(proc)); + if (exists) { + semantic_error(scope, proc, "proc redefined"); + semantic_error(scope, exists, "previously here"); + return -1; + } + + vect_append(struct ast *, scope->visible, &proc); + return 0; +} + +int scope_add_func(struct scope *scope, struct ast *func) +{ + struct ast *exists = scope_find(scope, func_id(func)); + if (exists) { + semantic_error(scope, func, "func redefined"); + semantic_error(scope, exists, "previously here"); + return -1; + } + + vect_append(struct ast *, scope->visible, &func); + return 0; +} + +static void destroy_scope(struct scope *scope) +{ + vec_destroy(&scope->visible); + free(scope); +} + +void destroy_scopes() +{ + foreach_vec(si, scopes) { + struct scope *s = vect_at(struct scope *, scopes, si); + destroy_scope(s); + } + + vec_destroy(&scopes); +} + +void semantic_error(struct scope *scope, struct ast *n, const char *msg, ...) +{ + va_list args; + va_start(args, msg); + struct src_issue issue; + issue.loc = n->loc; + issue.fname = scope->fname; + issue.buf = scope->buf; + vsrc_issue(issue, msg, args); + va_end(args); +} diff --git a/src/vec.c b/src/vec.c new file mode 100644 index 0000000..3fc2d7d --- /dev/null +++ b/src/vec.c @@ -0,0 +1,75 @@ +#include +#include +#include + +#include + +struct vec vec_create(size_t ns) +{ + return (struct vec) { + .n = 0, + .s = 1, + .ns = ns, + .buf = malloc(ns), + }; +} + +size_t vec_len(struct vec *v) +{ + return v->n; +} + +void *vec_at(struct vec *v, size_t i) +{ + assert(i < v->n && "out of vector bounds"); + return v->buf + i * v->ns; +} + +void *vec_back(struct vec *v) +{ + assert(v->n); + return v->buf + (v->n - 1) * v->ns; +} + +void *vec_pop(struct vec *v) +{ + assert(v->n && "attempting to pop empty vector"); + v->n--; + return v->buf + v->n * v->ns; +} + +void vec_append(struct vec *v, void *n) +{ + v->n++; + if (v->n >= v->s) { + v->s *= 2; + v->buf = realloc(v->buf, v->s * v->ns); + } + + void *p = vec_at(v, v->n - 1); + memcpy(p, n, v->ns); +} + +void vec_reset(struct vec *v) +{ + v->n = 0; +} + +void vec_reserve(struct vec *v, size_t n) +{ + if (v->n >= n) + return; + + v->n = n; + v->s = n; + v->buf = realloc(v->buf, v->s * v->ns); +} + +void vec_destroy(struct vec *v) { + free(v->buf); +} + +void vec_sort(struct vec *v, vec_comp_t comp) +{ + qsort(v->buf, v->n, v->ns, comp); +} -- cgit v1.2.3