diff options
Diffstat (limited to 'src/parser.y')
-rw-r--r-- | src/parser.y | 290 |
1 files changed, 290 insertions, 0 deletions
diff --git a/src/parser.y b/src/parser.y new file mode 100644 index 0000000..3e0156f --- /dev/null +++ b/src/parser.y @@ -0,0 +1,290 @@ +/* SPDX-License-Identifier: copyleft-next-0.3.1 */ +/* Copyright 2023 Kim Kuparinen < kimi.h.kuparinen@gmail.com > */ + +%{ + +/* TODO: clean up this mess and I guess fix location tracking, it works for the + * parser but each ast node should also get some location data + * I'm trying something over in ast.c, but I'm not sure about it + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include <lyn/parser.h> +#include <lyn/ast.h> + +%} + +%locations + +%define parse.trace +%define parse.error verbose +%define api.pure full +%define lr.type ielr + +%lex-param {void *scanner} {struct parser *parser} +%parse-param {void *scanner} {struct parser* parser} + +%union { + struct ast *ast; + char *str; + long long integer; + double floating; +}; + +%token <str> STRING +%token <str> ID +%token <integer> INT +%token <floating> FLOAT + +%token LPAREN "(" +%token RPAREN ")" +%token LBRACE "{" +%token RBRACE "}" +%token SEMICOLON ";" +%token NL "nl" + +%nterm <ast> arg args rev_args +%nterm <ast> cmd cmds rev_cmds + +%{ + +/** Modifies the signature of yylex to fit our parser better. */ +#define YY_DECL int yylex(YYSTYPE *yylval, YYLTYPE *yylloc, \ + void *yyscanner, struct parser *parser) + +/** + * Declare yylex. + * + * @param yylval Bison current value. + * @param yylloc Bison location info. + * @param yyscanner Flex scanner. + * @param parser Current parser state. + * @return \c 0 when succesful, \c 1 otherwise. + * More info on yylex() can be found in the flex manual. + */ +YY_DECL; + +/** + * Gobble tokens until we reach the next interesting feature. + * Interesting features are generally new statements. + * Mainly intended for trying to get to a sensible + * location to continue parser after an error has occured. + * + * @param yylval Current parser value. + * @param yylloc Parser location info. + * @param scanner Lex scanner. + * @param parser Current parser. + * @return \c 0 on success, non-zero otherwise. + */ +static int next_interesting_feature(YYSTYPE *yylval, YYLTYPE *yylloc, + void *scanner, struct parser *parser); + +/** + * Convert bison location info to our own source location info. + * + * @param yylloc Bison location info. + * @return Internal location info. + */ +static struct src_loc src_loc(YYLTYPE yylloc); + +/** + * Print parsing error. + * Automatically called by bison. + * + * @param yylloc Location of error. + * @param lexer Lexer. + * @param parser Parser state. + * @param msg Message to print. + */ +static void yyerror(YYLTYPE *yylloc, void *lexer, + struct parser *parser, const char *msg); + +/** + * Try to convert escape code to its actual value. + * I.e. '\n' -> 0x0a. + * + * @param c Escape character without backslash. + * @return Corresponding value. + */ +static char match_escape(char c); + +/** + * Similar to strdup() but skips quotation marks that would + * otherwise be included. + * I.e. "something" -> something. + * + * @param s String to clone, with quotation marks surrounding it. + * @return Identical string but without quotation marks around it. + */ +static char *strip(const char *s); + +%} + +%start input; +%% + +arg + : "(" cmds ")" {$$ = gen_apply($2);} + | "{" cmds "}" {$$ = gen_list($2);} + | ID {$$ = gen_id($1);} + | STRING {$$ = gen_str($1);} + | INT {$$ = gen_int($1);} + | FLOAT {$$ = gen_float($1);} + +rev_args + : rev_args arg {$$ = $2; $$->next = $1;} + | arg + +args + : rev_args {$$ = reverse_ast_list($1);} + +sep + : sep ";" + | sep NL + | ";" + | NL + +cmd + : args {$$ = gen_cmd($1);} + +rev_cmds + : rev_cmds sep cmd {$$ = $3; $$->next = $1;} + | cmd + +cmds + : rev_cmds {$$ = reverse_ast_list($1);} + | rev_cmds sep {$$ = reverse_ast_list($1);} + | sep rev_cmds {$$ = reverse_ast_list($2);} + | sep rev_cmds sep {$$ = reverse_ast_list($2);} + | {$$ = NULL;} + +input + : cmds {parser->tree = gen_list($1);} + +%% + +#include "gen_lexer.inc" + +/* I'm not convinced this is foolproof quite yet, more testing would be nice. */ +static int next_interesting_feature(YYSTYPE *yylval, YYLTYPE *yylloc, + void *scanner, struct parser *parser) +{ + size_t depth = 0; + while (1) { + int ret = yylex(yylval, yylloc, scanner, parser); + if (ret == LBRACE) { + depth++; + continue; + } + + if (ret == RBRACE && depth > 0) + depth--; + + if (ret == RBRACE && depth == 0) + return 0; + + if (ret == SEMICOLON && depth == 0) + return 0; + + /* return fatal error and parser should abort */ + if (ret == YYEOF) + /* some error for unmatched braces would be cool I think */ + return 1; + } +} + + +static struct src_loc src_loc(YYLTYPE yylloc) +{ + struct src_loc loc; + loc.first_line = yylloc.first_line; + loc.last_line = yylloc.last_line; + loc.first_col = yylloc.first_column; + loc.last_col = yylloc.last_column; + return loc; +} + +static void yyerror(YYLTYPE *yylloc, void *lexer, + struct parser *parser, const char *msg) +{ + (void)lexer; + + struct src_issue issue; + issue.level = SRC_ERROR; + issue.loc = src_loc(*yylloc); + issue.fctx.fbuf = parser->buf; + issue.fctx.fname = parser->fname; + src_issue(issue, msg); +} + +static char match_escape(char c) +{ + switch (c) { + case '\'': return '\''; + case '\\': return '\\'; + case 'a': return '\a'; + case 'b': return '\b'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + case 'v': return '\v'; + } + + return c; +} + +static char *strip(const char *str) +{ + const size_t len = strlen(str) + 1; + char *buf = malloc(len); + if (!buf) { + /* should probably try to handle the error in some way... */ + error("failed allocating buffer for string clone"); + free((void *)str); + return NULL; + } + + /* skip quotation marks */ + size_t j = 0; + for (size_t i = 1; i < len - 2; ++i) { + char c = str[i]; + + if (c == '\\') + c = match_escape(str[++i]); + + buf[j++] = c; + } + + buf[j] = 0; + free((void *)str); + return buf; + +} + +struct parser *create_parser() +{ + return calloc(1, sizeof(struct parser)); +} + +void destroy_parser(struct parser *p) +{ + yylex_destroy(p->lexer); + free(p); +} + +void parse(struct parser *p, const char *fname, const char *buf) +{ + p->fname = fname; + p->buf = buf; + + p->failed = false; + + yylex_init(&p->lexer); + yy_scan_string(buf, p->lexer); + yyparse(p->lexer, p); +} |