/* SPDX-License-Identifier: copyleft-next-0.3.1 */ /* Copyright 2024 Kim Kuparinen < kimi.h.kuparinen@gmail.com > */ %{ #include #include #include #include #include %} %locations %define parse.trace %define parse.error verbose %define api.pure full %define lr.type ielr %lex-param {void *scanner} {struct parser *parser} %parse-param {void *scanner} {struct parser* parser} %union { struct ast *node; struct type *type; double dbl; long long integer; char *str; }; %token INT %token CHAR %token BOOL %token FLOAT %token STRING %token ID %token APPLY %token QUESTION "?" %token SQUOTE "'" %token TO "=" %token ELLIPSIS "..." %token SEMICOLON ";" %token COLON ":" %token BANG "!" %token SIZEOF "sizeof" %token STAR "*" %token DIV "/" %token REM "%" %token MINUS "-" %token PLUS "+" %token XOR "^" %token AND "&" %token BAR "|" %token TILDE "~" %token LT "<" %token GT ">" %token LE "<=" %token GE ">=" %token NE "!=" %token EQ "==" %token LSHIFT "<<" %token RSHIFT ">>" %token COMMA "," %token LPAREN "(" %token RPAREN ")" %token LBRACE "{" %token RBRACE "}" %token LBRACKET "[" %token RBRACKET "]" %token IF "if" %token ELSE "else" %token NIL "nil" %token PUB "pub" %token MUT "mut" %token CONTINUE "continue" %token IMPORT "import" %token ERROR "error" %token DOT "." %token SCOPE "::" %token FATARROW "=>" %right "[" "]" /* precedence */ %left "," %right "=" "+=" "-=" "*=" "/=" "%=" "<<=" ">>=" %left "==" "!=" %left "<" ">" "<=" ">=" %left "^" %left "&" %left "<<" ">>" %left "+" "-" %left "*" "/" "%" %left "as" "sizeof" %right "'" "!" "~" %left "." "=>" "(" ")" %left "::" %nterm top unit proc proc_decl call closure var expr statement body %nterm vars exprs statements closures trailing_closure %nterm opt_vars opt_exprs opt_statements opt_trailing_closure %nterm rev_vars rev_exprs rev_closures rev_statements %nterm let if nil unop binop construct import %nterm struct struct_cont trait %nterm type_param type_params opt_type_params %nterm behaviour behaviours opt_behaviours %nterm member members opt_members %nterm type rev_types types opt_types %{ /** Modifies the signature of yylex to fit our parser better. */ #define YY_DECL int yylex(YYSTYPE *yylval, YYLTYPE *yylloc, \ void *yyscanner, struct parser *parser) /** * Declare yylex. * * @param yylval Bison current value. * @param yylloc Bison location info. * @param yyscanner Flex scanner. * @param parser Current parser state. * @return \c 0 when succesful, \c 1 otherwise. * More info on yylex() can be found in the flex manual. */ YY_DECL; /** * Gobble tokens until we reach the next interesting feature. * Interesting features are generally new statements. * Mainly intended for trying to get to a sensible * location to continue parser after an error has occured. * * @param yylval Current parser value. * @param yylloc Parser location info. * @param scanner Lex scanner. * @param parser Current parser. * @return \c 0 on success, non-zero otherwise. */ static int next_interesting_feature(YYSTYPE *yylval, YYLTYPE *yylloc, void *scanner, struct parser *parser); /** * Convert bison location info to our own source location info. * * @param yylloc Bison location info. * @return Internal location info. */ static struct src_loc src_loc(YYLTYPE yylloc); /** * Print parsing error. * Automatically called by bison. * * @param yylloc Location of error. * @param lexer Lexer. * @param parser Parser state. * @param msg Message to print. */ static void yyerror(YYLTYPE *yylloc, void *lexer, struct parser *parser, const char *msg); /** * Try to convert escape code to its actual value. * I.e. '\n' -> 0x0a. * * @param c Escape character without backslash. * @return Corresponding value. */ static char match_escape(char c); /** * Similar to strdup() but skips quotation marks that would * otherwise be included. * I.e. "something" -> something. * * @param s String to clone, with quotation marks surrounding it. * @return Identical string but without quotation marks around it. */ static char *strip(const char *s); %} %start input; %% var : type ID { $$ = gen_var($2, $1, src_loc(@$)); } rev_vars : rev_vars "," var { $$ = $3; $$->n = $1; } | rev_vars "|" var { $$ = $3; $$->n = $1; opt_group($1, $3); } | var vars : rev_vars { $$ = reverse_ast_list($1); } | rev_vars "," { $$ = reverse_ast_list($1); } opt_vars : vars | {$$ = NULL;} proc : ID "(" opt_vars ")" body { $$ = gen_proc($[ID], $[opt_vars], NULL, $[body], src_loc(@$)); } proc_decl : ID "(" opt_vars ")" ";" { $$ = gen_proc($[ID], $[opt_vars], NULL, NULL, src_loc(@$)); } binop : expr "+" expr { $$ = gen_binop(AST_ADD, $1, $3, src_loc(@$)); } | expr "-" expr { $$ = gen_binop(AST_SUB, $1, $3, src_loc(@$)); } | expr "*" expr { $$ = gen_binop(AST_MUL, $1, $3, src_loc(@$)); } | expr "/" expr { $$ = gen_binop(AST_DIV, $1, $3, src_loc(@$)); } | expr "%" expr { $$ = gen_binop(AST_REM, $1, $3, src_loc(@$)); } | expr "<" expr { $$ = gen_comparison(AST_LT, $1, $3, src_loc(@$)); } | expr ">" expr { $$ = gen_comparison(AST_GT, $1, $3, src_loc(@$)); } | expr "<<" expr { $$ = gen_binop(AST_LSHIFT, $1, $3, src_loc(@$)); } | expr ">>" expr { $$ = gen_binop(AST_RSHIFT, $1, $3, src_loc(@$)); } | expr "<=" expr { $$ = gen_comparison(AST_LE, $1, $3, src_loc(@$)); } | expr ">=" expr { $$ = gen_comparison(AST_GE, $1, $3, src_loc(@$)); } | expr "!=" expr { $$ = gen_comparison(AST_NE, $1, $3, src_loc(@$)); } | expr "==" expr { $$ = gen_comparison(AST_EQ, $1, $3, src_loc(@$)); } unop : "-" expr { $$ = gen_unop(AST_NEG, $2, src_loc(@$)); } | "!" expr { $$ = gen_unop(AST_LNOT, $2, src_loc(@$)); } type : ID { $$ = tgen_id($[ID], src_loc(@$)); } | APPLY "[" opt_types "]" { $$ = tgen_construct($[APPLY], $[opt_types], src_loc(@$)); } | "(" opt_types ")" { $$ = tgen_closure($2, src_loc(@$)); } | "&&" type { if ($2->k == TYPE_CLOSURE) { $$ = $2; $$->k = TYPE_PURE_CLOSURE; } else { $$ = tgen_ref($2, src_loc(@$)); } /* heh */ $$ = tgen_ref($$, src_loc(@$)); } | "&" type { if ($2->k == TYPE_CLOSURE) { $$ = $2; $$->k = TYPE_PURE_CLOSURE; } else { $$ = tgen_ref($2, src_loc(@$)); } } | "*" type { if ($2->k == TYPE_CLOSURE) { $$ = $2; $$->k = TYPE_FUNC_PTR; } else { $$ = tgen_ptr($2, src_loc(@$)); } } rev_types : rev_types "," type { $$ = $3; $$->n = $1; } | type types : rev_types { $$ = reverse_type_list($1); } opt_types : types | { $$ = NULL; } construct : APPLY "{" opt_exprs "}" { $$ = gen_init($[APPLY], NULL, $[opt_exprs], src_loc(@$)); } | APPLY "[" opt_types "]" "{" opt_exprs "}" { $$ = gen_init($[APPLY], $[opt_types], $[opt_exprs], src_loc(@$)); } expr : expr "." ID { $$ = gen_dot($3, $1, src_loc(@$)); } | STRING { $$ = gen_const_str(strip($1), src_loc(@$)); } | FLOAT { $$ = gen_const_float($1, src_loc(@$)); } | BOOL { $$ = gen_const_bool($1, src_loc(@$)); } | CHAR { $$ = gen_const_char($1, src_loc(@$)); } | INT { $$ = gen_const_int($1, src_loc(@$)); } | ID { $$ = gen_id($1, src_loc(@$)); } | "(" expr ")" { $$ = $2; } | expr "&" { $$ = gen_ref($1, src_loc(@$)); } | expr "*" { $$ = gen_deref($1, src_loc(@$)); } | construct | binop | unop rev_exprs : rev_exprs "," expr { $$ = $3; $3->n = $1; } | expr exprs : rev_exprs { $$ = reverse_ast_list($1); } opt_exprs : exprs | { $$ = NULL; } closure : "=>" opt_vars body { $$ = gen_closure($[opt_vars], $[body], src_loc(@$)); } | "&" "=>" opt_vars body { $$ = gen_closure($[opt_vars], $[body], src_loc(@$)); ast_set_flags($$, AST_FLAG_NOMOVES); } rev_closures : rev_closures closure { $$ = $2; $2->n = $1; } | closure trailing_closure : "=>" opt_vars ";" { $$ = gen_closure($[opt_vars], NULL, src_loc(@$));} | "&" "=>" opt_vars ";" { $$ = gen_closure($[opt_vars], NULL, src_loc(@$)); ast_set_flags($$, AST_FLAG_NOMOVES); } opt_trailing_closure : trailing_closure | { $$ = NULL; } closures : rev_closures opt_trailing_closure { if ($[opt_trailing_closure]) { $[opt_trailing_closure]->n = $[rev_closures]; $$ = reverse_ast_list($[opt_trailing_closure]); } else { $$ = reverse_ast_list($[rev_closures]); } } call : expr "(" opt_exprs ")" ";" { $$ = gen_call($1, $[opt_exprs], src_loc(@$)); } | expr "(" opt_exprs ")" "=>" opt_vars ";" { /* the rest of the function body is our closure, gets fixed up * later */ struct ast *closure = gen_closure($[opt_vars], NULL, src_loc(@$)); ast_append(&$[opt_exprs], closure); $$ = gen_call($[expr], $[opt_exprs], src_loc(@$)); } | expr "(" opt_exprs ")" closures { ast_append(&$[opt_exprs], $[closures]); $$ = gen_call($[expr], $[opt_exprs], src_loc(@$)); } let : expr "=>" var ";" { $$ = gen_let($3, $1, src_loc(@$)); } if : "if" expr body { $$ = gen_if($2, $3, NULL, src_loc(@$)); } | "if" expr body "else" body { $$ = gen_if($2, $3, $5, src_loc(@$)); } | "if" expr body "else" if { $$ = gen_if($2, $3, $5, src_loc(@$)); } nil : "nil" ID body "=>" var { $$ = gen_nil($2, $3, $5, src_loc(@$)); } statement : call | body | let | nil | if | ";" { $$ = gen_empty(src_loc(@$)); } rev_statements : rev_statements statement { $$ = $2; $2->n = $1; } | statement statements : rev_statements { $$ = reverse_ast_list($1); fix_closures($$); } opt_statements : statements | { $$ = NULL; } body : "{" opt_statements "}" { $$ = gen_block($2, src_loc(@$)); } behaviour : APPLY ";" { $$ = gen_trait_apply($1, src_loc(@$)); } | proc_decl ";" | proc behaviours : behaviours behaviour { $$ = $1; $1->n = $2; } | behaviour opt_behaviours : behaviours | { $$ = NULL; } member : var ";" | behaviour members : members member { $$ = $1; $1->n = $2; } | member opt_members : members | { $$ = NULL; } type_param : ID ID { struct type *t = tgen_id($1, src_loc(@1)); $$ = gen_var($2, t, src_loc(@$)); } type_params : type_param "," type_params { $$ = $1; $1->n = $3; } | type_param opt_type_params : type_params | { $$ = NULL; } struct : ID "[" opt_type_params "]" "{" opt_members "}" { $$ = gen_struct($1, $3, $6, src_loc(@$)); } struct_cont : "continue" ID "[" opt_type_params "]" "{" opt_behaviours "}" { $$ = gen_struct_cont($2, $4, $7, src_loc(@$)); } trait : ID "{" opt_behaviours "}" { $$ = gen_trait($1, $3, src_loc(@$)); } import : "import" STRING { $$ = gen_import(strip($2), src_loc(@$)); } top : proc | struct | struct_cont | import | trait | "pub" proc { $$ = $2; ast_set_flags($$, AST_FLAG_PUBLIC); } | "pub" struct { $$ = $2; ast_set_flags($$, AST_FLAG_PUBLIC); } | "pub" struct_cont { $$ = $2; ast_set_flags($$, AST_FLAG_PUBLIC); } | "pub" import { $$ = $2; ast_set_flags($$, AST_FLAG_PUBLIC); } | "pub" trait { $$ = $2; ast_set_flags($$, AST_FLAG_PUBLIC); } | error { $$ = gen_empty(src_loc(@$)); parser->failed = true; /* ignore any content inside a top level thing and just move onto * the next one */ if (next_interesting_feature(&yylval, &yylloc, scanner, parser)) { YYABORT; } yyclearin; yyerrok; } unit : top { $$ = $1; } | unit top { $$ = $2; $2->n = $1; } input : unit { parser->tree = reverse_ast_list($1); } | /* empty */ %% #include "gen_lexer.inc" /* I'm not convinced this is foolproof quite yet, more testing would be nice. */ static int next_interesting_feature(YYSTYPE *yylval, YYLTYPE *yylloc, void *scanner, struct parser *parser) { size_t depth = 0; while (1) { int ret = yylex(yylval, yylloc, scanner, parser); if (ret == LBRACE) { depth++; continue; } if (ret == RBRACE && depth > 0) depth--; if (ret == RBRACE && depth == 0) return 0; if (ret == SEMICOLON && depth == 0) return 0; /* return fatal error and parser should abort */ if (ret == YYEOF) /* some error for unmatched braces would be cool I think */ return 1; } } static struct src_loc src_loc(YYLTYPE yylloc) { struct src_loc loc; loc.first_line = yylloc.first_line; loc.last_line = yylloc.last_line; loc.first_col = yylloc.first_column; loc.last_col = yylloc.last_column; return loc; } static void yyerror(YYLTYPE *yylloc, void *lexer, struct parser *parser, const char *msg) { (void)lexer; struct src_issue issue; issue.level = SRC_ERROR; issue.loc = src_loc(*yylloc); issue.fctx.fbuf = parser->buf; issue.fctx.fname = parser->fname; src_issue(issue, msg); } static char match_escape(char c) { switch (c) { case '\'': return '\''; case '\\': return '\\'; case 'a': return '\a'; case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; } return c; } static char *strip(const char *str) { const size_t len = strlen(str) + 1; char *buf = malloc(len); if (!buf) { /* should probably try to handle the error in some way... */ internal_error("failed allocating buffer for string clone"); free((void *)str); return NULL; } /* skip quotation marks */ size_t j = 0; for (size_t i = 1; i < len - 2; ++i) buf[j++] = str[i]; buf[j] = 0; free((void *)str); return buf; } struct parser *create_parser() { return calloc(1, sizeof(struct parser)); } void destroy_parser(struct parser *p) { yylex_destroy(p->lexer); free(p); } void parse(struct parser *p, const char *fname, const char *buf) { p->fname = fname; p->buf = buf; p->comment_nesting = 0; p->failed = false; yylex_init(&p->lexer); yy_scan_string(buf, p->lexer); yyparse(p->lexer, p); }