aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorKimplul <kimi.h.kuparinen@gmail.com>2024-10-18 22:35:27 +0300
committerKimplul <kimi.h.kuparinen@gmail.com>2024-10-23 18:25:23 +0300
commit7c5f098511b8f612a17f4ccdd8a4924c325d37e1 (patch)
tree4a97d802557bbffcadfcfd776c8e4afdb31d96aa /src
parent18262dcbecd97591dd15ee9274a81abb8c2ba1c4 (diff)
downloadlyn-7c5f098511b8f612a17f4ccdd8a4924c325d37e1.tar.gz
lyn-7c5f098511b8f612a17f4ccdd8a4924c325d37e1.zip
initial parser+lexer
+ AST might still change, also not a *huge* fan of having to reverse all arrays but I guess it's unlikely to be a significant bottleneck
Diffstat (limited to 'src')
-rw-r--r--src/ast.c53
-rw-r--r--src/debug.c108
-rw-r--r--src/lexer.l88
-rw-r--r--src/lyn.c89
-rw-r--r--src/main.c16
-rw-r--r--src/parser.y290
-rw-r--r--src/source.mk2
7 files changed, 646 insertions, 0 deletions
diff --git a/src/ast.c b/src/ast.c
new file mode 100644
index 0000000..faeae6e
--- /dev/null
+++ b/src/ast.c
@@ -0,0 +1,53 @@
+#include <stdio.h>
+#include <lyn/ast.h>
+
+struct ast *reverse_ast_list(struct ast *root)
+{
+ struct ast *new_root = NULL;
+ while (root) {
+ struct ast *next = root->next;
+ root->next = new_root;
+ new_root = root;
+ root = next;
+ }
+
+ return new_root;
+}
+
+#define dump(depth, fmt, ...) \
+ do { \
+ printf("//%*s", 2 * depth, ""); \
+ printf(fmt,##__VA_ARGS__); \
+ } while (0)
+
+void ast_dump(int depth, struct ast *ast)
+{
+ switch (ast->kind) {
+ case LYN_ID: dump(depth, "%s\n", ast->s); return;
+ case LYN_STR: dump(depth, "\"%s\"\n", ast->s); return;
+ case LYN_INT: dump(depth, "%lld\n", ast->i); return;
+ case LYN_FLOAT: dump(depth, "%f\n", ast->d); return;
+ case LYN_CMD:
+ dump(depth, "CMD\n");
+ ast_dump_list(depth + 1, ast->args);
+ return;
+
+ case LYN_LIST:
+ dump(depth, "LIST\n");
+ ast_dump_list(depth + 1, ast->args);
+ return;
+
+ case LYN_APPLY:
+ dump(depth, "APPLY\n");
+ ast_dump_list(depth + 1, ast->args);
+ return;
+ }
+}
+
+void ast_dump_list(int depth, struct ast *ast)
+{
+ while (ast) {
+ ast_dump(depth, ast);
+ ast = ast->next;
+ }
+}
diff --git a/src/debug.c b/src/debug.c
new file mode 100644
index 0000000..7226640
--- /dev/null
+++ b/src/debug.c
@@ -0,0 +1,108 @@
+#include <stdarg.h>
+#include <string.h>
+
+#include <lyn/debug.h>
+
+/**
+ * Get string representation of issue_level.
+ *
+ * @param level issue_level to get string representation for.
+ * @return \p level as a string.
+ */
+const char *issue_level_str(enum issue_level level)
+{
+ switch (level) {
+ case SRC_INFO: return "info";
+ case SRC_WARN: return "warn";
+ case SRC_ERROR: return "error";
+ }
+
+ return "unknown";
+}
+
+/**
+ * Find position in file buffer where line number \p no
+ * starts. Lines are assumed to be one-indexed, with
+ * \p no = \c 0 and \p no = \c 1 both considered the first line.
+ *
+ * @param buf Buffer to look in.
+ * @param no Line number whose start to look for.
+ * @return Pointer to location in buffer where line number \p no
+ * starts.
+ */
+static const char *find_lineno(const char *buf, size_t no)
+{
+ if (no == 0 || no == 1)
+ return buf;
+
+ char c;
+ while ((c = *buf)) {
+ buf++;
+
+ if (c == '\n')
+ no--;
+
+ if (no == 1)
+ break;
+ }
+
+ return buf;
+}
+
+/**
+ * Helper for printing out an issue.
+ *
+ * @param issue Issue context.
+ * @param fmt Format string. Follows standard printf() formatting.
+ * @param args Arguments for \p fmt.
+ */
+static void _issue(struct src_issue issue, const char *fmt, va_list args)
+{
+ /* get start and end of current line in buffer */
+ const char *line_start = find_lineno(issue.fctx.fbuf,
+ (size_t)issue.loc.first_line);
+ const char *line_end = strchr(line_start, '\n');
+ if (!line_end)
+ line_end = strchr(line_start, 0);
+
+ const int line_len = (int)(line_end - line_start);
+
+ fprintf(stderr, "%s:%i:%i: %s: ", issue.fctx.fname,
+ issue.loc.first_line,
+ issue.loc.first_col,
+ issue_level_str(issue.level));
+
+ vfprintf(stderr, fmt, args);
+ fputc('\n', stderr);
+
+ int lineno_len = snprintf(NULL, 0, "%i", issue.loc.first_line);
+ fputc(' ', stderr);
+ fprintf(stderr, "%i | ", issue.loc.first_line);
+
+ fprintf(stderr, "%.*s\n", line_len, line_start);
+
+ for (int i = 0; i < lineno_len + 2; ++i)
+ fputc(' ', stderr);
+
+ fprintf(stderr, "| ");
+
+ for (int i = 0; i < issue.loc.first_col - 1; ++i)
+ fputc(line_start[i] == '\t' ? '\t' : ' ', stderr);
+
+ for (int i = issue.loc.first_col; i < issue.loc.last_col; ++i) {
+ if (i == issue.loc.first_col)
+ fputc('^', stderr);
+ else
+ fputc('~', stderr);
+ }
+
+ fputc('\n', stderr);
+}
+
+void src_issue(struct src_issue issue, const char *err_msg, ...)
+{
+ va_list args;
+ va_start(args, err_msg);
+ _issue(issue, err_msg, args);
+ va_end(args);
+}
diff --git a/src/lexer.l b/src/lexer.l
new file mode 100644
index 0000000..3c611ab
--- /dev/null
+++ b/src/lexer.l
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: copyleft-next-0.3.1 */
+/* Copyright 2023 Kim Kuparinen < kimi.h.kuparinen@gmail.com > */
+
+%option reentrant noyywrap nounput noinput nodefault
+%{
+#define FROM_LEXER
+#include <lyn/parser.h>
+#include <lyn/debug.h>
+
+static void update_yylloc(struct parser *parser, YYLTYPE *lloc, const char *text)
+{
+ (void)parser;
+
+ lloc->first_line = lloc->last_line;
+ lloc->first_column = lloc->last_column;
+
+ for (size_t i = 0; text[i] != 0; ++i) {
+ if (text[i] == '\n') {
+ lloc->last_line++;
+ /* flex uses 1 based indexing */
+ lloc->last_column = 1;
+ } else {
+ lloc->last_column++;
+ }
+ }
+}
+
+#define YY_USER_ACTION update_yylloc(parser, yylloc, yytext);
+%}
+ID [^(){};[:space:]]+
+STRING \"(\\.|[^"\\])*\"
+
+HEX 0[xX][0-9a-fA-F]+
+DEC -?[0-9]+
+OCT 0[0-8]+
+BIN 0b[0-1]+
+
+INT {HEX}|{DEC}|{OCT}|{BIN}
+
+HEXF [+-]?0[xX][0-9a-fA-F]+([pP][+-]?[0-9]+)
+DECF [+-]?[0-9]+[.]([eE]?[+-]?[0-9]+)?[fF]?
+
+FLOAT {HEXF}|{DECF}
+
+%%
+"#".* {/* skip line comments */}
+
+"(" {return LPAREN;}
+")" {return RPAREN;}
+"{" {return LBRACE;}
+"}" {return RBRACE;}
+";" {return SEMICOLON;}
+"\n" {return NL;}
+
+{STRING} {
+ /* seems risky, I know, but letting the parser choose when to allocate a
+ * new string seems to help with syntax error cleanup */
+ yylval->str = strdup(yytext);
+ return STRING;
+}
+
+{INT} {
+ yylval->integer = strtoull(yytext, 0, 0);
+ return INT;
+}
+
+{FLOAT} {
+ yylval->floating = strtod(yytext, 0);
+ return FLOAT;
+}
+
+{ID} {
+ yylval->str = strdup(yytext);
+ return ID;
+}
+
+[^\n[:graph:]]+ {/* skip whitespace */}
+
+. {
+ struct src_issue issue;
+ issue.level = SRC_ERROR;
+ issue.loc = src_loc(*yylloc);
+ issue.fctx.fbuf = parser->buf;
+ issue.fctx.fname = parser->fname;
+ src_issue(issue, "Unexpected token: %s", yytext);
+ parser->failed = true;
+}
+%%
diff --git a/src/lyn.c b/src/lyn.c
new file mode 100644
index 0000000..2527677
--- /dev/null
+++ b/src/lyn.c
@@ -0,0 +1,89 @@
+#include <errno.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <limits.h>
+#include <stdlib.h>
+
+#include <lyn/lyn.h>
+#include <lyn/parser.h>
+#include <lyn/debug.h>
+
+struct lyn lyn_create()
+{
+ return (struct lyn){};
+}
+
+int lyn_eval_str(struct lyn *lyn, const char *name, const char *str)
+{
+ struct parser *p = create_parser();
+ if (!p)
+ return -1;
+
+ parse(p, name, str);
+ struct ast *ast = p->tree;
+ bool failed = p->failed;
+ destroy_parser(p);
+
+ if (!failed) {
+ ast_dump_list(0, ast);
+ }
+
+ return failed;
+}
+
+/**
+ * Read whole file into a buffer and return pointer to buffer.
+ * Possibly kind of silly to have both \p file and \p f.
+ * Apparently there's no standardized way to get the file name of a
+ * file pointer.
+ *
+ * @param file Name of file to read.
+ * @param f File pointer.
+ * @return Pointer to buffer with file contents.
+ */
+static char *read_file(const char *file, FILE *f)
+{
+ fseek(f, 0, SEEK_END);
+ /** @todo check how well standardized this actually is */
+ long s = ftell(f);
+ if (s == LONG_MAX) {
+ error("%s might be a directory", file);
+ return NULL;
+ }
+
+ fseek(f, 0, SEEK_SET);
+
+ char *buf = malloc((size_t)(s + 1));
+ if (!buf)
+ return NULL;
+
+ fread(buf, (size_t)(s + 1), 1, f);
+ /* remember terminating null */
+ buf[s] = 0;
+ return buf;
+}
+
+int lyn_eval_file(struct lyn *lyn, const char *fname)
+{
+ FILE *f = fopen(fname, "rb");
+ if (!f) {
+ error("failed opening %s: %s\n", fname, strerror(errno));
+ return -1;
+ }
+
+ char *buf = read_file(fname, f);
+ fclose(f);
+
+ if (!buf)
+ return -1;
+
+ int ret = lyn_eval_str(lyn, fname, buf);
+ free(buf);
+
+ return ret;
+}
+
+void lyn_destroy(struct lyn *lyn)
+{
+}
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..85dd445
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,16 @@
+#include <lyn/lyn.h>
+#include <lyn/debug.h>
+
+int main(int argc, char *argv[])
+{
+ if (argc != 2) {
+ error("wrong number of arguments (should be just one for a file)");
+ return -1;
+ }
+
+ struct lyn lyn = lyn_create();
+ int ret = lyn_eval_file(&lyn, argv[1]);
+ lyn_destroy(&lyn);
+
+ return ret;
+}
diff --git a/src/parser.y b/src/parser.y
new file mode 100644
index 0000000..3e0156f
--- /dev/null
+++ b/src/parser.y
@@ -0,0 +1,290 @@
+/* SPDX-License-Identifier: copyleft-next-0.3.1 */
+/* Copyright 2023 Kim Kuparinen < kimi.h.kuparinen@gmail.com > */
+
+%{
+
+/* TODO: clean up this mess and I guess fix location tracking, it works for the
+ * parser but each ast node should also get some location data
+ * I'm trying something over in ast.c, but I'm not sure about it
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include <lyn/parser.h>
+#include <lyn/ast.h>
+
+%}
+
+%locations
+
+%define parse.trace
+%define parse.error verbose
+%define api.pure full
+%define lr.type ielr
+
+%lex-param {void *scanner} {struct parser *parser}
+%parse-param {void *scanner} {struct parser* parser}
+
+%union {
+ struct ast *ast;
+ char *str;
+ long long integer;
+ double floating;
+};
+
+%token <str> STRING
+%token <str> ID
+%token <integer> INT
+%token <floating> FLOAT
+
+%token LPAREN "("
+%token RPAREN ")"
+%token LBRACE "{"
+%token RBRACE "}"
+%token SEMICOLON ";"
+%token NL "nl"
+
+%nterm <ast> arg args rev_args
+%nterm <ast> cmd cmds rev_cmds
+
+%{
+
+/** Modifies the signature of yylex to fit our parser better. */
+#define YY_DECL int yylex(YYSTYPE *yylval, YYLTYPE *yylloc, \
+ void *yyscanner, struct parser *parser)
+
+/**
+ * Declare yylex.
+ *
+ * @param yylval Bison current value.
+ * @param yylloc Bison location info.
+ * @param yyscanner Flex scanner.
+ * @param parser Current parser state.
+ * @return \c 0 when succesful, \c 1 otherwise.
+ * More info on yylex() can be found in the flex manual.
+ */
+YY_DECL;
+
+/**
+ * Gobble tokens until we reach the next interesting feature.
+ * Interesting features are generally new statements.
+ * Mainly intended for trying to get to a sensible
+ * location to continue parser after an error has occured.
+ *
+ * @param yylval Current parser value.
+ * @param yylloc Parser location info.
+ * @param scanner Lex scanner.
+ * @param parser Current parser.
+ * @return \c 0 on success, non-zero otherwise.
+ */
+static int next_interesting_feature(YYSTYPE *yylval, YYLTYPE *yylloc,
+ void *scanner, struct parser *parser);
+
+/**
+ * Convert bison location info to our own source location info.
+ *
+ * @param yylloc Bison location info.
+ * @return Internal location info.
+ */
+static struct src_loc src_loc(YYLTYPE yylloc);
+
+/**
+ * Print parsing error.
+ * Automatically called by bison.
+ *
+ * @param yylloc Location of error.
+ * @param lexer Lexer.
+ * @param parser Parser state.
+ * @param msg Message to print.
+ */
+static void yyerror(YYLTYPE *yylloc, void *lexer,
+ struct parser *parser, const char *msg);
+
+/**
+ * Try to convert escape code to its actual value.
+ * I.e. '\n' -> 0x0a.
+ *
+ * @param c Escape character without backslash.
+ * @return Corresponding value.
+ */
+static char match_escape(char c);
+
+/**
+ * Similar to strdup() but skips quotation marks that would
+ * otherwise be included.
+ * I.e. "something" -> something.
+ *
+ * @param s String to clone, with quotation marks surrounding it.
+ * @return Identical string but without quotation marks around it.
+ */
+static char *strip(const char *s);
+
+%}
+
+%start input;
+%%
+
+arg
+ : "(" cmds ")" {$$ = gen_apply($2);}
+ | "{" cmds "}" {$$ = gen_list($2);}
+ | ID {$$ = gen_id($1);}
+ | STRING {$$ = gen_str($1);}
+ | INT {$$ = gen_int($1);}
+ | FLOAT {$$ = gen_float($1);}
+
+rev_args
+ : rev_args arg {$$ = $2; $$->next = $1;}
+ | arg
+
+args
+ : rev_args {$$ = reverse_ast_list($1);}
+
+sep
+ : sep ";"
+ | sep NL
+ | ";"
+ | NL
+
+cmd
+ : args {$$ = gen_cmd($1);}
+
+rev_cmds
+ : rev_cmds sep cmd {$$ = $3; $$->next = $1;}
+ | cmd
+
+cmds
+ : rev_cmds {$$ = reverse_ast_list($1);}
+ | rev_cmds sep {$$ = reverse_ast_list($1);}
+ | sep rev_cmds {$$ = reverse_ast_list($2);}
+ | sep rev_cmds sep {$$ = reverse_ast_list($2);}
+ | {$$ = NULL;}
+
+input
+ : cmds {parser->tree = gen_list($1);}
+
+%%
+
+#include "gen_lexer.inc"
+
+/* I'm not convinced this is foolproof quite yet, more testing would be nice. */
+static int next_interesting_feature(YYSTYPE *yylval, YYLTYPE *yylloc,
+ void *scanner, struct parser *parser)
+{
+ size_t depth = 0;
+ while (1) {
+ int ret = yylex(yylval, yylloc, scanner, parser);
+ if (ret == LBRACE) {
+ depth++;
+ continue;
+ }
+
+ if (ret == RBRACE && depth > 0)
+ depth--;
+
+ if (ret == RBRACE && depth == 0)
+ return 0;
+
+ if (ret == SEMICOLON && depth == 0)
+ return 0;
+
+ /* return fatal error and parser should abort */
+ if (ret == YYEOF)
+ /* some error for unmatched braces would be cool I think */
+ return 1;
+ }
+}
+
+
+static struct src_loc src_loc(YYLTYPE yylloc)
+{
+ struct src_loc loc;
+ loc.first_line = yylloc.first_line;
+ loc.last_line = yylloc.last_line;
+ loc.first_col = yylloc.first_column;
+ loc.last_col = yylloc.last_column;
+ return loc;
+}
+
+static void yyerror(YYLTYPE *yylloc, void *lexer,
+ struct parser *parser, const char *msg)
+{
+ (void)lexer;
+
+ struct src_issue issue;
+ issue.level = SRC_ERROR;
+ issue.loc = src_loc(*yylloc);
+ issue.fctx.fbuf = parser->buf;
+ issue.fctx.fname = parser->fname;
+ src_issue(issue, msg);
+}
+
+static char match_escape(char c)
+{
+ switch (c) {
+ case '\'': return '\'';
+ case '\\': return '\\';
+ case 'a': return '\a';
+ case 'b': return '\b';
+ case 'f': return '\f';
+ case 'n': return '\n';
+ case 'r': return '\r';
+ case 't': return '\t';
+ case 'v': return '\v';
+ }
+
+ return c;
+}
+
+static char *strip(const char *str)
+{
+ const size_t len = strlen(str) + 1;
+ char *buf = malloc(len);
+ if (!buf) {
+ /* should probably try to handle the error in some way... */
+ error("failed allocating buffer for string clone");
+ free((void *)str);
+ return NULL;
+ }
+
+ /* skip quotation marks */
+ size_t j = 0;
+ for (size_t i = 1; i < len - 2; ++i) {
+ char c = str[i];
+
+ if (c == '\\')
+ c = match_escape(str[++i]);
+
+ buf[j++] = c;
+ }
+
+ buf[j] = 0;
+ free((void *)str);
+ return buf;
+
+}
+
+struct parser *create_parser()
+{
+ return calloc(1, sizeof(struct parser));
+}
+
+void destroy_parser(struct parser *p)
+{
+ yylex_destroy(p->lexer);
+ free(p);
+}
+
+void parse(struct parser *p, const char *fname, const char *buf)
+{
+ p->fname = fname;
+ p->buf = buf;
+
+ p->failed = false;
+
+ yylex_init(&p->lexer);
+ yy_scan_string(buf, p->lexer);
+ yyparse(p->lexer, p);
+}
diff --git a/src/source.mk b/src/source.mk
new file mode 100644
index 0000000..caa476c
--- /dev/null
+++ b/src/source.mk
@@ -0,0 +1,2 @@
+SRC_LOCAL != echo src/*.c
+LYN_SOURCES := $(LYN_SOURCES) $(SRC_LOCAL)