aboutsummaryrefslogtreecommitdiff
path: root/src/parser.y
diff options
context:
space:
mode:
Diffstat (limited to 'src/parser.y')
-rw-r--r--src/parser.y460
1 files changed, 460 insertions, 0 deletions
diff --git a/src/parser.y b/src/parser.y
new file mode 100644
index 0000000..ccff211
--- /dev/null
+++ b/src/parser.y
@@ -0,0 +1,460 @@
+/* SPDX-License-Identifier: copyleft-next-0.3.1 */
+/* Copyright 2024 Kim Kuparinen < kimi.h.kuparinen@gmail.com > */
+
+%{
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include <fwd/parser.h>
+
+%}
+
+%locations
+
+%define parse.trace
+%define parse.error verbose
+%define api.pure full
+%define lr.type ielr
+
+%lex-param {void *scanner} {struct parser *parser}
+%parse-param {void *scanner} {struct parser* parser}
+
+%union {
+ struct ast *node;
+ struct type *type;
+ double dbl;
+ long long integer;
+ char *str;
+};
+
+%token <integer> INT
+%token <integer> CHAR
+%token <integer> BOOL
+%token <dbl> FLOAT
+%token <str> STRING
+%token <str> ID
+%token <str> APPLY
+
+%token QUESTION "?"
+%token SQUOTE "'"
+%token TO "="
+%token ELLIPSIS "..."
+%token SEMICOLON ";"
+%token COLON ":"
+%token BANG "!"
+%token SIZEOF "sizeof"
+%token STAR "*"
+%token DIV "/"
+%token REM "%"
+%token MINUS "-"
+%token PLUS "+"
+%token XOR "^"
+%token AND "&"
+%token TILDE "~"
+%token LT "<"
+%token GT ">"
+%token LE "<="
+%token GE ">="
+%token NE "!="
+%token EQ "=="
+%token LSHIFT "<<"
+%token RSHIFT ">>"
+%token COMMA ","
+%token LPAREN "("
+%token RPAREN ")"
+%token LBRACE "{"
+%token RBRACE "}"
+%token LBRACKET "["
+%token RBRACKET "]"
+%token AS "as"
+%token DOT "."
+%token SCOPE "::"
+%token FATARROW "=>"
+
+%right "[" "]"
+/* precedence */
+%left ","
+%right "=" "+=" "-=" "*=" "/=" "%=" "<<=" ">>="
+%left "==" "!="
+%left "<" ">" "<=" ">="
+%left "^"
+%left "&"
+%left "<<" ">>"
+%left "+" "-"
+%left "*" "/" "%"
+%left "as" "sizeof"
+%right "'" "!" "~"
+%left "." "=>" "(" ")"
+%left "::"
+
+%nterm <node> top unit proc call closure var expr statement body
+%nterm <node> vars exprs statements closures
+%nterm <node> opt_vars opt_exprs opt_statements
+%nterm <node> rev_vars rev_exprs rev_closures rev_statements
+%nterm <node> let unop binop construct
+
+%nterm <type> type rev_types types opt_types
+
+
+%{
+
+/** Modifies the signature of yylex to fit our parser better. */
+#define YY_DECL int yylex(YYSTYPE *yylval, YYLTYPE *yylloc, \
+ void *yyscanner, struct parser *parser)
+
+/**
+ * Declare yylex.
+ *
+ * @param yylval Bison current value.
+ * @param yylloc Bison location info.
+ * @param yyscanner Flex scanner.
+ * @param parser Current parser state.
+ * @return \c 0 when succesful, \c 1 otherwise.
+ * More info on yylex() can be found in the flex manual.
+ */
+YY_DECL;
+
+/**
+ * Gobble tokens until we reach the next interesting feature.
+ * Interesting features are generally new statements.
+ * Mainly intended for trying to get to a sensible
+ * location to continue parser after an error has occured.
+ *
+ * @param yylval Current parser value.
+ * @param yylloc Parser location info.
+ * @param scanner Lex scanner.
+ * @param parser Current parser.
+ * @return \c 0 on success, non-zero otherwise.
+ */
+static int next_interesting_feature(YYSTYPE *yylval, YYLTYPE *yylloc,
+ void *scanner, struct parser *parser);
+
+/**
+ * Convert bison location info to our own source location info.
+ *
+ * @param yylloc Bison location info.
+ * @return Internal location info.
+ */
+static struct src_loc src_loc(YYLTYPE yylloc);
+
+/**
+ * Print parsing error.
+ * Automatically called by bison.
+ *
+ * @param yylloc Location of error.
+ * @param lexer Lexer.
+ * @param parser Parser state.
+ * @param msg Message to print.
+ */
+static void yyerror(YYLTYPE *yylloc, void *lexer,
+ struct parser *parser, const char *msg);
+
+/**
+ * Try to convert escape code to its actual value.
+ * I.e. '\n' -> 0x0a.
+ *
+ * @param c Escape character without backslash.
+ * @return Corresponding value.
+ */
+static char match_escape(char c);
+
+/**
+ * Similar to strdup() but skips quotation marks that would
+ * otherwise be included.
+ * I.e. "something" -> something.
+ *
+ * @param s String to clone, with quotation marks surrounding it.
+ * @return Identical string but without quotation marks around it.
+ */
+static char *strip(const char *s);
+
+%}
+
+%start input;
+%%
+var
+ : ID { $$ = gen_var($1, src_loc(@$)); }
+
+rev_vars
+ : rev_vars "," var { $$ = $3; $$->n = $1; }
+ | var
+
+vars
+ : rev_vars { $$ = reverse_ast_list($1); }
+ | rev_vars "," { $$ = reverse_ast_list($1); }
+
+opt_vars
+ : vars
+ | {$$ = NULL;}
+
+proc
+ : ID "(" opt_vars ")" body {
+ $$ = gen_proc($[ID], $[opt_vars], NULL, $[body], src_loc(@$));
+ }
+
+binop
+ : expr "+" expr { $$ = gen_binop(AST_ADD, $1, $3, src_loc(@$)); }
+ | expr "-" expr { $$ = gen_binop(AST_SUB, $1, $3, src_loc(@$)); }
+ | expr "*" expr { $$ = gen_binop(AST_MUL, $1, $3, src_loc(@$)); }
+ | expr "/" expr { $$ = gen_binop(AST_DIV, $1, $3, src_loc(@$)); }
+ | expr "%" expr { $$ = gen_binop(AST_REM, $1, $3, src_loc(@$)); }
+ | expr "<" expr { $$ = gen_comparison(AST_LT, $1, $3, src_loc(@$)); }
+ | expr ">" expr { $$ = gen_comparison(AST_GT, $1, $3, src_loc(@$)); }
+ | expr "<<" expr { $$ = gen_binop(AST_LSHIFT, $1, $3, src_loc(@$)); }
+ | expr ">>" expr { $$ = gen_binop(AST_RSHIFT, $1, $3, src_loc(@$)); }
+ | expr "<=" expr { $$ = gen_comparison(AST_LE, $1, $3, src_loc(@$)); }
+ | expr ">=" expr { $$ = gen_comparison(AST_GE, $1, $3, src_loc(@$)); }
+ | expr "!=" expr { $$ = gen_comparison(AST_NE, $1, $3, src_loc(@$)); }
+ | expr "==" expr { $$ = gen_comparison(AST_EQ, $1, $3, src_loc(@$)); }
+
+unop
+ : "-" expr { $$ = gen_unop(AST_NEG, $2, src_loc(@$)); }
+ | "!" expr { $$ = gen_unop(AST_LNOT, $2, src_loc(@$)); }
+
+type
+ : ID { $$ = tgen_id($[ID], src_loc(@$)); }
+ | APPLY "[" opt_types "]" {
+ $$ = tgen_construct($[APPLY], $[opt_types], src_loc(@$));
+ }
+
+rev_types
+ : rev_types "," type { $$ = $3; $3->n = $$; }
+ | type
+
+types
+ : rev_types { $$ = reverse_type_list($1); }
+
+opt_types
+ : types
+ | { $$ = NULL; }
+
+construct
+ : APPLY "{" opt_exprs "}" {
+ $$ = gen_init($[APPLY], NULL, $[opt_exprs], src_loc(@$));
+ }
+ | APPLY "[" opt_types "]" "{" opt_exprs "}" {
+ $$ = gen_init($[APPLY], $[opt_types], $[opt_exprs], src_loc(@$));
+ }
+
+expr
+ : expr "." ID { $$ = gen_dot($3, $1, src_loc(@$)); }
+ | STRING { $$ = gen_const_str(strip($1), src_loc(@$)); }
+ | FLOAT { $$ = gen_const_float($1, src_loc(@$)); }
+ | BOOL { $$ = gen_const_bool($1, src_loc(@$)); }
+ | CHAR { $$ = gen_const_char($1, src_loc(@$)); }
+ | INT { $$ = gen_const_int($1, src_loc(@$)); }
+ | ID { $$ = gen_id($1, src_loc(@$)); }
+ | "(" expr ")" { $$ = $2; }
+ | construct
+ | binop
+ | unop
+
+rev_exprs
+ : rev_exprs "," expr { $$ = $3; $3->n = $1; }
+ | expr
+
+exprs
+ : rev_exprs { $$ = reverse_ast_list($1); }
+
+opt_exprs
+ : exprs
+ | { $$ = NULL; }
+
+closure
+ : "=>" opt_vars body { $$ = gen_closure($[opt_vars], $[body], src_loc(@$)); }
+
+rev_closures
+ : rev_closures closure { $$ = $2; $2->n = $1; }
+ | closure
+
+closures
+ : rev_closures { $$ = reverse_ast_list($1); }
+
+call
+ : expr "(" opt_exprs ")" ";" {
+ $$ = gen_call($1, $[opt_exprs], src_loc(@$));
+ }
+ | expr "(" opt_exprs ")" "=>" opt_vars ";" {
+ /* the rest of the function body is our closure, gets fixed up
+ * later */
+ struct ast *closure = gen_closure($[opt_vars], NULL, src_loc(@$));
+ ast_append(&$[opt_exprs], closure);
+ $$ = gen_call($[expr], $[opt_exprs], src_loc(@$));
+ }
+ | expr "(" opt_exprs ")" closures {
+ ast_append(&$[opt_exprs], $[closures]);
+ $$ = gen_call($[expr], $[opt_exprs], src_loc(@$));
+ }
+
+let
+ : expr "=>" ID ";" { $$ = gen_let($3, $1, src_loc(@$)); }
+
+statement
+ : call
+ | let
+ | ";" { $$ = gen_empty(src_loc(@$)); }
+
+rev_statements
+ : rev_statements statement { $$ = $2; $2->n = $1; }
+ | statement
+
+statements
+ : rev_statements { $$ = reverse_ast_list($1); fix_closures($$); }
+
+opt_statements
+ : statements
+ | { $$ = NULL; }
+
+body
+ : "{" opt_statements "}" { $$ = gen_block($2, src_loc(@$)); }
+
+top
+ : proc
+ | error {
+ $$ = gen_empty(src_loc(@$));
+ parser->failed = true;
+ /* ignore any content inside a top level thing and just move onto
+ * the next one */
+ if (next_interesting_feature(&yylval, &yylloc, scanner, parser)) {
+ YYABORT;
+ }
+ yyclearin;
+ yyerrok;
+ }
+
+unit
+ : top { $$ = $1; }
+ | unit top { $$ = $2; $2->n = $1; }
+
+input
+ : unit { parser->tree = reverse_ast_list($1); }
+ | /* empty */
+
+%%
+
+#include "gen_lexer.inc"
+
+/* I'm not convinced this is foolproof quite yet, more testing would be nice. */
+static int next_interesting_feature(YYSTYPE *yylval, YYLTYPE *yylloc,
+ void *scanner, struct parser *parser)
+{
+ size_t depth = 0;
+ while (1) {
+ int ret = yylex(yylval, yylloc, scanner, parser);
+ if (ret == LBRACE) {
+ depth++;
+ continue;
+ }
+
+ if (ret == RBRACE && depth > 0)
+ depth--;
+
+ if (ret == RBRACE && depth == 0)
+ return 0;
+
+ if (ret == SEMICOLON && depth == 0)
+ return 0;
+
+ /* return fatal error and parser should abort */
+ if (ret == YYEOF)
+ /* some error for unmatched braces would be cool I think */
+ return 1;
+ }
+}
+
+
+static struct src_loc src_loc(YYLTYPE yylloc)
+{
+ struct src_loc loc;
+ loc.first_line = yylloc.first_line;
+ loc.last_line = yylloc.last_line;
+ loc.first_col = yylloc.first_column;
+ loc.last_col = yylloc.last_column;
+ return loc;
+}
+
+static void yyerror(YYLTYPE *yylloc, void *lexer,
+ struct parser *parser, const char *msg)
+{
+ (void)lexer;
+
+ struct src_issue issue;
+ issue.level = SRC_ERROR;
+ issue.loc = src_loc(*yylloc);
+ issue.fctx.fbuf = parser->buf;
+ issue.fctx.fname = parser->fname;
+ src_issue(issue, msg);
+}
+
+static char match_escape(char c)
+{
+ switch (c) {
+ case '\'': return '\'';
+ case '\\': return '\\';
+ case 'a': return '\a';
+ case 'b': return '\b';
+ case 'f': return '\f';
+ case 'n': return '\n';
+ case 'r': return '\r';
+ case 't': return '\t';
+ case 'v': return '\v';
+ }
+
+ return c;
+}
+
+static char *strip(const char *str)
+{
+ const size_t len = strlen(str) + 1;
+ char *buf = malloc(len);
+ if (!buf) {
+ /* should probably try to handle the error in some way... */
+ internal_error("failed allocating buffer for string clone");
+ free((void *)str);
+ return NULL;
+ }
+
+ /* skip quotation marks */
+ size_t j = 0;
+ for (size_t i = 1; i < len - 2; ++i) {
+ char c = str[i];
+
+ if (c == '\\')
+ c = match_escape(str[++i]);
+
+ buf[j++] = c;
+ }
+
+ buf[j] = 0;
+ free((void *)str);
+ return buf;
+
+}
+
+struct parser *create_parser()
+{
+ return calloc(1, sizeof(struct parser));
+}
+
+void destroy_parser(struct parser *p)
+{
+ yylex_destroy(p->lexer);
+ free(p);
+}
+
+void parse(struct parser *p, const char *fname, const char *buf)
+{
+ p->fname = fname;
+ p->buf = buf;
+
+ p->comment_nesting = 0;
+
+ p->failed = false;
+
+ yylex_init(&p->lexer);
+ yy_scan_string(buf, p->lexer);
+ yyparse(p->lexer, p);
+}