diff options
Diffstat (limited to 'src/parser.y')
-rw-r--r-- | src/parser.y | 460 |
1 files changed, 460 insertions, 0 deletions
diff --git a/src/parser.y b/src/parser.y new file mode 100644 index 0000000..ccff211 --- /dev/null +++ b/src/parser.y @@ -0,0 +1,460 @@ +/* SPDX-License-Identifier: copyleft-next-0.3.1 */ +/* Copyright 2024 Kim Kuparinen < kimi.h.kuparinen@gmail.com > */ + +%{ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include <fwd/parser.h> + +%} + +%locations + +%define parse.trace +%define parse.error verbose +%define api.pure full +%define lr.type ielr + +%lex-param {void *scanner} {struct parser *parser} +%parse-param {void *scanner} {struct parser* parser} + +%union { + struct ast *node; + struct type *type; + double dbl; + long long integer; + char *str; +}; + +%token <integer> INT +%token <integer> CHAR +%token <integer> BOOL +%token <dbl> FLOAT +%token <str> STRING +%token <str> ID +%token <str> APPLY + +%token QUESTION "?" +%token SQUOTE "'" +%token TO "=" +%token ELLIPSIS "..." +%token SEMICOLON ";" +%token COLON ":" +%token BANG "!" +%token SIZEOF "sizeof" +%token STAR "*" +%token DIV "/" +%token REM "%" +%token MINUS "-" +%token PLUS "+" +%token XOR "^" +%token AND "&" +%token TILDE "~" +%token LT "<" +%token GT ">" +%token LE "<=" +%token GE ">=" +%token NE "!=" +%token EQ "==" +%token LSHIFT "<<" +%token RSHIFT ">>" +%token COMMA "," +%token LPAREN "(" +%token RPAREN ")" +%token LBRACE "{" +%token RBRACE "}" +%token LBRACKET "[" +%token RBRACKET "]" +%token AS "as" +%token DOT "." +%token SCOPE "::" +%token FATARROW "=>" + +%right "[" "]" +/* precedence */ +%left "," +%right "=" "+=" "-=" "*=" "/=" "%=" "<<=" ">>=" +%left "==" "!=" +%left "<" ">" "<=" ">=" +%left "^" +%left "&" +%left "<<" ">>" +%left "+" "-" +%left "*" "/" "%" +%left "as" "sizeof" +%right "'" "!" "~" +%left "." "=>" "(" ")" +%left "::" + +%nterm <node> top unit proc call closure var expr statement body +%nterm <node> vars exprs statements closures +%nterm <node> opt_vars opt_exprs opt_statements +%nterm <node> rev_vars rev_exprs rev_closures rev_statements +%nterm <node> let unop binop construct + +%nterm <type> type rev_types types opt_types + + +%{ + +/** Modifies the signature of yylex to fit our parser better. */ +#define YY_DECL int yylex(YYSTYPE *yylval, YYLTYPE *yylloc, \ + void *yyscanner, struct parser *parser) + +/** + * Declare yylex. + * + * @param yylval Bison current value. + * @param yylloc Bison location info. + * @param yyscanner Flex scanner. + * @param parser Current parser state. + * @return \c 0 when succesful, \c 1 otherwise. + * More info on yylex() can be found in the flex manual. + */ +YY_DECL; + +/** + * Gobble tokens until we reach the next interesting feature. + * Interesting features are generally new statements. + * Mainly intended for trying to get to a sensible + * location to continue parser after an error has occured. + * + * @param yylval Current parser value. + * @param yylloc Parser location info. + * @param scanner Lex scanner. + * @param parser Current parser. + * @return \c 0 on success, non-zero otherwise. + */ +static int next_interesting_feature(YYSTYPE *yylval, YYLTYPE *yylloc, + void *scanner, struct parser *parser); + +/** + * Convert bison location info to our own source location info. + * + * @param yylloc Bison location info. + * @return Internal location info. + */ +static struct src_loc src_loc(YYLTYPE yylloc); + +/** + * Print parsing error. + * Automatically called by bison. + * + * @param yylloc Location of error. + * @param lexer Lexer. + * @param parser Parser state. + * @param msg Message to print. + */ +static void yyerror(YYLTYPE *yylloc, void *lexer, + struct parser *parser, const char *msg); + +/** + * Try to convert escape code to its actual value. + * I.e. '\n' -> 0x0a. + * + * @param c Escape character without backslash. + * @return Corresponding value. + */ +static char match_escape(char c); + +/** + * Similar to strdup() but skips quotation marks that would + * otherwise be included. + * I.e. "something" -> something. + * + * @param s String to clone, with quotation marks surrounding it. + * @return Identical string but without quotation marks around it. + */ +static char *strip(const char *s); + +%} + +%start input; +%% +var + : ID { $$ = gen_var($1, src_loc(@$)); } + +rev_vars + : rev_vars "," var { $$ = $3; $$->n = $1; } + | var + +vars + : rev_vars { $$ = reverse_ast_list($1); } + | rev_vars "," { $$ = reverse_ast_list($1); } + +opt_vars + : vars + | {$$ = NULL;} + +proc + : ID "(" opt_vars ")" body { + $$ = gen_proc($[ID], $[opt_vars], NULL, $[body], src_loc(@$)); + } + +binop + : expr "+" expr { $$ = gen_binop(AST_ADD, $1, $3, src_loc(@$)); } + | expr "-" expr { $$ = gen_binop(AST_SUB, $1, $3, src_loc(@$)); } + | expr "*" expr { $$ = gen_binop(AST_MUL, $1, $3, src_loc(@$)); } + | expr "/" expr { $$ = gen_binop(AST_DIV, $1, $3, src_loc(@$)); } + | expr "%" expr { $$ = gen_binop(AST_REM, $1, $3, src_loc(@$)); } + | expr "<" expr { $$ = gen_comparison(AST_LT, $1, $3, src_loc(@$)); } + | expr ">" expr { $$ = gen_comparison(AST_GT, $1, $3, src_loc(@$)); } + | expr "<<" expr { $$ = gen_binop(AST_LSHIFT, $1, $3, src_loc(@$)); } + | expr ">>" expr { $$ = gen_binop(AST_RSHIFT, $1, $3, src_loc(@$)); } + | expr "<=" expr { $$ = gen_comparison(AST_LE, $1, $3, src_loc(@$)); } + | expr ">=" expr { $$ = gen_comparison(AST_GE, $1, $3, src_loc(@$)); } + | expr "!=" expr { $$ = gen_comparison(AST_NE, $1, $3, src_loc(@$)); } + | expr "==" expr { $$ = gen_comparison(AST_EQ, $1, $3, src_loc(@$)); } + +unop + : "-" expr { $$ = gen_unop(AST_NEG, $2, src_loc(@$)); } + | "!" expr { $$ = gen_unop(AST_LNOT, $2, src_loc(@$)); } + +type + : ID { $$ = tgen_id($[ID], src_loc(@$)); } + | APPLY "[" opt_types "]" { + $$ = tgen_construct($[APPLY], $[opt_types], src_loc(@$)); + } + +rev_types + : rev_types "," type { $$ = $3; $3->n = $$; } + | type + +types + : rev_types { $$ = reverse_type_list($1); } + +opt_types + : types + | { $$ = NULL; } + +construct + : APPLY "{" opt_exprs "}" { + $$ = gen_init($[APPLY], NULL, $[opt_exprs], src_loc(@$)); + } + | APPLY "[" opt_types "]" "{" opt_exprs "}" { + $$ = gen_init($[APPLY], $[opt_types], $[opt_exprs], src_loc(@$)); + } + +expr + : expr "." ID { $$ = gen_dot($3, $1, src_loc(@$)); } + | STRING { $$ = gen_const_str(strip($1), src_loc(@$)); } + | FLOAT { $$ = gen_const_float($1, src_loc(@$)); } + | BOOL { $$ = gen_const_bool($1, src_loc(@$)); } + | CHAR { $$ = gen_const_char($1, src_loc(@$)); } + | INT { $$ = gen_const_int($1, src_loc(@$)); } + | ID { $$ = gen_id($1, src_loc(@$)); } + | "(" expr ")" { $$ = $2; } + | construct + | binop + | unop + +rev_exprs + : rev_exprs "," expr { $$ = $3; $3->n = $1; } + | expr + +exprs + : rev_exprs { $$ = reverse_ast_list($1); } + +opt_exprs + : exprs + | { $$ = NULL; } + +closure + : "=>" opt_vars body { $$ = gen_closure($[opt_vars], $[body], src_loc(@$)); } + +rev_closures + : rev_closures closure { $$ = $2; $2->n = $1; } + | closure + +closures + : rev_closures { $$ = reverse_ast_list($1); } + +call + : expr "(" opt_exprs ")" ";" { + $$ = gen_call($1, $[opt_exprs], src_loc(@$)); + } + | expr "(" opt_exprs ")" "=>" opt_vars ";" { + /* the rest of the function body is our closure, gets fixed up + * later */ + struct ast *closure = gen_closure($[opt_vars], NULL, src_loc(@$)); + ast_append(&$[opt_exprs], closure); + $$ = gen_call($[expr], $[opt_exprs], src_loc(@$)); + } + | expr "(" opt_exprs ")" closures { + ast_append(&$[opt_exprs], $[closures]); + $$ = gen_call($[expr], $[opt_exprs], src_loc(@$)); + } + +let + : expr "=>" ID ";" { $$ = gen_let($3, $1, src_loc(@$)); } + +statement + : call + | let + | ";" { $$ = gen_empty(src_loc(@$)); } + +rev_statements + : rev_statements statement { $$ = $2; $2->n = $1; } + | statement + +statements + : rev_statements { $$ = reverse_ast_list($1); fix_closures($$); } + +opt_statements + : statements + | { $$ = NULL; } + +body + : "{" opt_statements "}" { $$ = gen_block($2, src_loc(@$)); } + +top + : proc + | error { + $$ = gen_empty(src_loc(@$)); + parser->failed = true; + /* ignore any content inside a top level thing and just move onto + * the next one */ + if (next_interesting_feature(&yylval, &yylloc, scanner, parser)) { + YYABORT; + } + yyclearin; + yyerrok; + } + +unit + : top { $$ = $1; } + | unit top { $$ = $2; $2->n = $1; } + +input + : unit { parser->tree = reverse_ast_list($1); } + | /* empty */ + +%% + +#include "gen_lexer.inc" + +/* I'm not convinced this is foolproof quite yet, more testing would be nice. */ +static int next_interesting_feature(YYSTYPE *yylval, YYLTYPE *yylloc, + void *scanner, struct parser *parser) +{ + size_t depth = 0; + while (1) { + int ret = yylex(yylval, yylloc, scanner, parser); + if (ret == LBRACE) { + depth++; + continue; + } + + if (ret == RBRACE && depth > 0) + depth--; + + if (ret == RBRACE && depth == 0) + return 0; + + if (ret == SEMICOLON && depth == 0) + return 0; + + /* return fatal error and parser should abort */ + if (ret == YYEOF) + /* some error for unmatched braces would be cool I think */ + return 1; + } +} + + +static struct src_loc src_loc(YYLTYPE yylloc) +{ + struct src_loc loc; + loc.first_line = yylloc.first_line; + loc.last_line = yylloc.last_line; + loc.first_col = yylloc.first_column; + loc.last_col = yylloc.last_column; + return loc; +} + +static void yyerror(YYLTYPE *yylloc, void *lexer, + struct parser *parser, const char *msg) +{ + (void)lexer; + + struct src_issue issue; + issue.level = SRC_ERROR; + issue.loc = src_loc(*yylloc); + issue.fctx.fbuf = parser->buf; + issue.fctx.fname = parser->fname; + src_issue(issue, msg); +} + +static char match_escape(char c) +{ + switch (c) { + case '\'': return '\''; + case '\\': return '\\'; + case 'a': return '\a'; + case 'b': return '\b'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + case 'v': return '\v'; + } + + return c; +} + +static char *strip(const char *str) +{ + const size_t len = strlen(str) + 1; + char *buf = malloc(len); + if (!buf) { + /* should probably try to handle the error in some way... */ + internal_error("failed allocating buffer for string clone"); + free((void *)str); + return NULL; + } + + /* skip quotation marks */ + size_t j = 0; + for (size_t i = 1; i < len - 2; ++i) { + char c = str[i]; + + if (c == '\\') + c = match_escape(str[++i]); + + buf[j++] = c; + } + + buf[j] = 0; + free((void *)str); + return buf; + +} + +struct parser *create_parser() +{ + return calloc(1, sizeof(struct parser)); +} + +void destroy_parser(struct parser *p) +{ + yylex_destroy(p->lexer); + free(p); +} + +void parse(struct parser *p, const char *fname, const char *buf) +{ + p->fname = fname; + p->buf = buf; + + p->comment_nesting = 0; + + p->failed = false; + + yylex_init(&p->lexer); + yy_scan_string(buf, p->lexer); + yyparse(p->lexer, p); +} |