aboutsummaryrefslogtreecommitdiff
path: root/src/parser.y
diff options
context:
space:
mode:
authorKimplul <kimi.h.kuparinen@gmail.com>2024-12-03 22:04:38 +0200
committerKimplul <kimi.h.kuparinen@gmail.com>2024-12-03 22:04:38 +0200
commit2253da61e9b3dd5408bed182ea08e5270156c17e (patch)
tree298bb06e681ec5366faa539906cae6e805fe5862 /src/parser.y
downloadfwd-2253da61e9b3dd5408bed182ea08e5270156c17e.tar.gz
fwd-2253da61e9b3dd5408bed182ea08e5270156c17e.zip
initial commit
+ Lots of code copied from ek, so didn't have to start from scratch, but might mean there are some quirks here and there that made sense in ek but not necessarily here.
Diffstat (limited to 'src/parser.y')
-rw-r--r--src/parser.y460
1 files changed, 460 insertions, 0 deletions
diff --git a/src/parser.y b/src/parser.y
new file mode 100644
index 0000000..ccff211
--- /dev/null
+++ b/src/parser.y
@@ -0,0 +1,460 @@
+/* SPDX-License-Identifier: copyleft-next-0.3.1 */
+/* Copyright 2024 Kim Kuparinen < kimi.h.kuparinen@gmail.com > */
+
+%{
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include <fwd/parser.h>
+
+%}
+
+%locations
+
+%define parse.trace
+%define parse.error verbose
+%define api.pure full
+%define lr.type ielr
+
+%lex-param {void *scanner} {struct parser *parser}
+%parse-param {void *scanner} {struct parser* parser}
+
+%union {
+ struct ast *node;
+ struct type *type;
+ double dbl;
+ long long integer;
+ char *str;
+};
+
+%token <integer> INT
+%token <integer> CHAR
+%token <integer> BOOL
+%token <dbl> FLOAT
+%token <str> STRING
+%token <str> ID
+%token <str> APPLY
+
+%token QUESTION "?"
+%token SQUOTE "'"
+%token TO "="
+%token ELLIPSIS "..."
+%token SEMICOLON ";"
+%token COLON ":"
+%token BANG "!"
+%token SIZEOF "sizeof"
+%token STAR "*"
+%token DIV "/"
+%token REM "%"
+%token MINUS "-"
+%token PLUS "+"
+%token XOR "^"
+%token AND "&"
+%token TILDE "~"
+%token LT "<"
+%token GT ">"
+%token LE "<="
+%token GE ">="
+%token NE "!="
+%token EQ "=="
+%token LSHIFT "<<"
+%token RSHIFT ">>"
+%token COMMA ","
+%token LPAREN "("
+%token RPAREN ")"
+%token LBRACE "{"
+%token RBRACE "}"
+%token LBRACKET "["
+%token RBRACKET "]"
+%token AS "as"
+%token DOT "."
+%token SCOPE "::"
+%token FATARROW "=>"
+
+%right "[" "]"
+/* precedence */
+%left ","
+%right "=" "+=" "-=" "*=" "/=" "%=" "<<=" ">>="
+%left "==" "!="
+%left "<" ">" "<=" ">="
+%left "^"
+%left "&"
+%left "<<" ">>"
+%left "+" "-"
+%left "*" "/" "%"
+%left "as" "sizeof"
+%right "'" "!" "~"
+%left "." "=>" "(" ")"
+%left "::"
+
+%nterm <node> top unit proc call closure var expr statement body
+%nterm <node> vars exprs statements closures
+%nterm <node> opt_vars opt_exprs opt_statements
+%nterm <node> rev_vars rev_exprs rev_closures rev_statements
+%nterm <node> let unop binop construct
+
+%nterm <type> type rev_types types opt_types
+
+
+%{
+
+/** Modifies the signature of yylex to fit our parser better. */
+#define YY_DECL int yylex(YYSTYPE *yylval, YYLTYPE *yylloc, \
+ void *yyscanner, struct parser *parser)
+
+/**
+ * Declare yylex.
+ *
+ * @param yylval Bison current value.
+ * @param yylloc Bison location info.
+ * @param yyscanner Flex scanner.
+ * @param parser Current parser state.
+ * @return \c 0 when succesful, \c 1 otherwise.
+ * More info on yylex() can be found in the flex manual.
+ */
+YY_DECL;
+
+/**
+ * Gobble tokens until we reach the next interesting feature.
+ * Interesting features are generally new statements.
+ * Mainly intended for trying to get to a sensible
+ * location to continue parser after an error has occured.
+ *
+ * @param yylval Current parser value.
+ * @param yylloc Parser location info.
+ * @param scanner Lex scanner.
+ * @param parser Current parser.
+ * @return \c 0 on success, non-zero otherwise.
+ */
+static int next_interesting_feature(YYSTYPE *yylval, YYLTYPE *yylloc,
+ void *scanner, struct parser *parser);
+
+/**
+ * Convert bison location info to our own source location info.
+ *
+ * @param yylloc Bison location info.
+ * @return Internal location info.
+ */
+static struct src_loc src_loc(YYLTYPE yylloc);
+
+/**
+ * Print parsing error.
+ * Automatically called by bison.
+ *
+ * @param yylloc Location of error.
+ * @param lexer Lexer.
+ * @param parser Parser state.
+ * @param msg Message to print.
+ */
+static void yyerror(YYLTYPE *yylloc, void *lexer,
+ struct parser *parser, const char *msg);
+
+/**
+ * Try to convert escape code to its actual value.
+ * I.e. '\n' -> 0x0a.
+ *
+ * @param c Escape character without backslash.
+ * @return Corresponding value.
+ */
+static char match_escape(char c);
+
+/**
+ * Similar to strdup() but skips quotation marks that would
+ * otherwise be included.
+ * I.e. "something" -> something.
+ *
+ * @param s String to clone, with quotation marks surrounding it.
+ * @return Identical string but without quotation marks around it.
+ */
+static char *strip(const char *s);
+
+%}
+
+%start input;
+%%
+var
+ : ID { $$ = gen_var($1, src_loc(@$)); }
+
+rev_vars
+ : rev_vars "," var { $$ = $3; $$->n = $1; }
+ | var
+
+vars
+ : rev_vars { $$ = reverse_ast_list($1); }
+ | rev_vars "," { $$ = reverse_ast_list($1); }
+
+opt_vars
+ : vars
+ | {$$ = NULL;}
+
+proc
+ : ID "(" opt_vars ")" body {
+ $$ = gen_proc($[ID], $[opt_vars], NULL, $[body], src_loc(@$));
+ }
+
+binop
+ : expr "+" expr { $$ = gen_binop(AST_ADD, $1, $3, src_loc(@$)); }
+ | expr "-" expr { $$ = gen_binop(AST_SUB, $1, $3, src_loc(@$)); }
+ | expr "*" expr { $$ = gen_binop(AST_MUL, $1, $3, src_loc(@$)); }
+ | expr "/" expr { $$ = gen_binop(AST_DIV, $1, $3, src_loc(@$)); }
+ | expr "%" expr { $$ = gen_binop(AST_REM, $1, $3, src_loc(@$)); }
+ | expr "<" expr { $$ = gen_comparison(AST_LT, $1, $3, src_loc(@$)); }
+ | expr ">" expr { $$ = gen_comparison(AST_GT, $1, $3, src_loc(@$)); }
+ | expr "<<" expr { $$ = gen_binop(AST_LSHIFT, $1, $3, src_loc(@$)); }
+ | expr ">>" expr { $$ = gen_binop(AST_RSHIFT, $1, $3, src_loc(@$)); }
+ | expr "<=" expr { $$ = gen_comparison(AST_LE, $1, $3, src_loc(@$)); }
+ | expr ">=" expr { $$ = gen_comparison(AST_GE, $1, $3, src_loc(@$)); }
+ | expr "!=" expr { $$ = gen_comparison(AST_NE, $1, $3, src_loc(@$)); }
+ | expr "==" expr { $$ = gen_comparison(AST_EQ, $1, $3, src_loc(@$)); }
+
+unop
+ : "-" expr { $$ = gen_unop(AST_NEG, $2, src_loc(@$)); }
+ | "!" expr { $$ = gen_unop(AST_LNOT, $2, src_loc(@$)); }
+
+type
+ : ID { $$ = tgen_id($[ID], src_loc(@$)); }
+ | APPLY "[" opt_types "]" {
+ $$ = tgen_construct($[APPLY], $[opt_types], src_loc(@$));
+ }
+
+rev_types
+ : rev_types "," type { $$ = $3; $3->n = $$; }
+ | type
+
+types
+ : rev_types { $$ = reverse_type_list($1); }
+
+opt_types
+ : types
+ | { $$ = NULL; }
+
+construct
+ : APPLY "{" opt_exprs "}" {
+ $$ = gen_init($[APPLY], NULL, $[opt_exprs], src_loc(@$));
+ }
+ | APPLY "[" opt_types "]" "{" opt_exprs "}" {
+ $$ = gen_init($[APPLY], $[opt_types], $[opt_exprs], src_loc(@$));
+ }
+
+expr
+ : expr "." ID { $$ = gen_dot($3, $1, src_loc(@$)); }
+ | STRING { $$ = gen_const_str(strip($1), src_loc(@$)); }
+ | FLOAT { $$ = gen_const_float($1, src_loc(@$)); }
+ | BOOL { $$ = gen_const_bool($1, src_loc(@$)); }
+ | CHAR { $$ = gen_const_char($1, src_loc(@$)); }
+ | INT { $$ = gen_const_int($1, src_loc(@$)); }
+ | ID { $$ = gen_id($1, src_loc(@$)); }
+ | "(" expr ")" { $$ = $2; }
+ | construct
+ | binop
+ | unop
+
+rev_exprs
+ : rev_exprs "," expr { $$ = $3; $3->n = $1; }
+ | expr
+
+exprs
+ : rev_exprs { $$ = reverse_ast_list($1); }
+
+opt_exprs
+ : exprs
+ | { $$ = NULL; }
+
+closure
+ : "=>" opt_vars body { $$ = gen_closure($[opt_vars], $[body], src_loc(@$)); }
+
+rev_closures
+ : rev_closures closure { $$ = $2; $2->n = $1; }
+ | closure
+
+closures
+ : rev_closures { $$ = reverse_ast_list($1); }
+
+call
+ : expr "(" opt_exprs ")" ";" {
+ $$ = gen_call($1, $[opt_exprs], src_loc(@$));
+ }
+ | expr "(" opt_exprs ")" "=>" opt_vars ";" {
+ /* the rest of the function body is our closure, gets fixed up
+ * later */
+ struct ast *closure = gen_closure($[opt_vars], NULL, src_loc(@$));
+ ast_append(&$[opt_exprs], closure);
+ $$ = gen_call($[expr], $[opt_exprs], src_loc(@$));
+ }
+ | expr "(" opt_exprs ")" closures {
+ ast_append(&$[opt_exprs], $[closures]);
+ $$ = gen_call($[expr], $[opt_exprs], src_loc(@$));
+ }
+
+let
+ : expr "=>" ID ";" { $$ = gen_let($3, $1, src_loc(@$)); }
+
+statement
+ : call
+ | let
+ | ";" { $$ = gen_empty(src_loc(@$)); }
+
+rev_statements
+ : rev_statements statement { $$ = $2; $2->n = $1; }
+ | statement
+
+statements
+ : rev_statements { $$ = reverse_ast_list($1); fix_closures($$); }
+
+opt_statements
+ : statements
+ | { $$ = NULL; }
+
+body
+ : "{" opt_statements "}" { $$ = gen_block($2, src_loc(@$)); }
+
+top
+ : proc
+ | error {
+ $$ = gen_empty(src_loc(@$));
+ parser->failed = true;
+ /* ignore any content inside a top level thing and just move onto
+ * the next one */
+ if (next_interesting_feature(&yylval, &yylloc, scanner, parser)) {
+ YYABORT;
+ }
+ yyclearin;
+ yyerrok;
+ }
+
+unit
+ : top { $$ = $1; }
+ | unit top { $$ = $2; $2->n = $1; }
+
+input
+ : unit { parser->tree = reverse_ast_list($1); }
+ | /* empty */
+
+%%
+
+#include "gen_lexer.inc"
+
+/* I'm not convinced this is foolproof quite yet, more testing would be nice. */
+static int next_interesting_feature(YYSTYPE *yylval, YYLTYPE *yylloc,
+ void *scanner, struct parser *parser)
+{
+ size_t depth = 0;
+ while (1) {
+ int ret = yylex(yylval, yylloc, scanner, parser);
+ if (ret == LBRACE) {
+ depth++;
+ continue;
+ }
+
+ if (ret == RBRACE && depth > 0)
+ depth--;
+
+ if (ret == RBRACE && depth == 0)
+ return 0;
+
+ if (ret == SEMICOLON && depth == 0)
+ return 0;
+
+ /* return fatal error and parser should abort */
+ if (ret == YYEOF)
+ /* some error for unmatched braces would be cool I think */
+ return 1;
+ }
+}
+
+
+static struct src_loc src_loc(YYLTYPE yylloc)
+{
+ struct src_loc loc;
+ loc.first_line = yylloc.first_line;
+ loc.last_line = yylloc.last_line;
+ loc.first_col = yylloc.first_column;
+ loc.last_col = yylloc.last_column;
+ return loc;
+}
+
+static void yyerror(YYLTYPE *yylloc, void *lexer,
+ struct parser *parser, const char *msg)
+{
+ (void)lexer;
+
+ struct src_issue issue;
+ issue.level = SRC_ERROR;
+ issue.loc = src_loc(*yylloc);
+ issue.fctx.fbuf = parser->buf;
+ issue.fctx.fname = parser->fname;
+ src_issue(issue, msg);
+}
+
+static char match_escape(char c)
+{
+ switch (c) {
+ case '\'': return '\'';
+ case '\\': return '\\';
+ case 'a': return '\a';
+ case 'b': return '\b';
+ case 'f': return '\f';
+ case 'n': return '\n';
+ case 'r': return '\r';
+ case 't': return '\t';
+ case 'v': return '\v';
+ }
+
+ return c;
+}
+
+static char *strip(const char *str)
+{
+ const size_t len = strlen(str) + 1;
+ char *buf = malloc(len);
+ if (!buf) {
+ /* should probably try to handle the error in some way... */
+ internal_error("failed allocating buffer for string clone");
+ free((void *)str);
+ return NULL;
+ }
+
+ /* skip quotation marks */
+ size_t j = 0;
+ for (size_t i = 1; i < len - 2; ++i) {
+ char c = str[i];
+
+ if (c == '\\')
+ c = match_escape(str[++i]);
+
+ buf[j++] = c;
+ }
+
+ buf[j] = 0;
+ free((void *)str);
+ return buf;
+
+}
+
+struct parser *create_parser()
+{
+ return calloc(1, sizeof(struct parser));
+}
+
+void destroy_parser(struct parser *p)
+{
+ yylex_destroy(p->lexer);
+ free(p);
+}
+
+void parse(struct parser *p, const char *fname, const char *buf)
+{
+ p->fname = fname;
+ p->buf = buf;
+
+ p->comment_nesting = 0;
+
+ p->failed = false;
+
+ yylex_init(&p->lexer);
+ yy_scan_string(buf, p->lexer);
+ yyparse(p->lexer, p);
+}