diff options
-rw-r--r-- | .gitignore | 9 | ||||
-rw-r--r-- | Makefile | 65 | ||||
-rw-r--r-- | examples/sum.lyn | 2 | ||||
-rw-r--r-- | gen/source.mk | 9 | ||||
-rw-r--r-- | include/lyn/ast.h | 104 | ||||
-rw-r--r-- | include/lyn/debug.h | 98 | ||||
-rw-r--r-- | include/lyn/lyn.h | 12 | ||||
-rw-r--r-- | include/lyn/parser.h | 57 | ||||
-rwxr-xr-x | scripts/gen-deps | 37 | ||||
-rwxr-xr-x | scripts/license | 16 | ||||
-rw-r--r-- | scripts/makefile | 74 | ||||
-rwxr-xr-x | scripts/warn-undocumented | 7 | ||||
-rw-r--r-- | src/ast.c | 53 | ||||
-rw-r--r-- | src/debug.c | 108 | ||||
-rw-r--r-- | src/lexer.l | 88 | ||||
-rw-r--r-- | src/lyn.c | 89 | ||||
-rw-r--r-- | src/main.c | 16 | ||||
-rw-r--r-- | src/parser.y | 290 | ||||
-rw-r--r-- | src/source.mk | 2 |
19 files changed, 1135 insertions, 1 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..02aae11 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +deps.mk +tests.mk +docs/output +reports +build +gen +lyn +!gen/source.mk +!include/lyn diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..79a05da --- /dev/null +++ b/Makefile @@ -0,0 +1,65 @@ +.PHONY: all +all: setup + $(MAKE) -f scripts/makefile + +# this kicks all unrecognised targets to the client script. +# note that trying to compile individual files, e.g. +# +# make kernel.elf +# +# will not work, you would need +# +# make -f scripts/makefile kernel.elf +# +# instead +.DEFAULT: setup + $(MAKE) -f scripts/makefile $< + +.PHONY: analyze +analyze: setup + CC='gcc -fanalyzer' SKIP_ANALYZER='-fno-analyzer' $(MAKE) CROSS_COMPILE= + +.PHONY: setup +setup: + @echo -n > deps.mk + @./scripts/gen-deps -p LYN -c COMPILE_LYN -b lyn "$(LYN_SOURCES)" + +CLEANUP := build deps.mk lyn +CLEANUP_CMD := +LYN_SOURCES := + +include src/source.mk + +.PHONY: check +check: all + $(MAKE) -C tests -k check + +.PHONY: format +format: + find src include -iname '*.[ch]' |\ + xargs uncrustify -c uncrustify.conf --no-backup -F - + +.PHONY: license +license: + find src include -iname '*.[ch]' |\ + xargs ./scripts/license + +.PHONY: docs +docs: + find src include -iname '*.[ch]' -not -path */gen/* |\ + xargs ./scripts/warn-undocumented + doxygen docs/doxygen.conf + +RM = rm + +.PHONY: clean +clean: + $(RM) -rf $(CLEANUP) + $(MAKE) -C tests clean + +.PHONY: clean_docs +clean_docs: + $(RM) -rf docs/output + +.PHONY: clean_all +clean_all: clean clean_docs diff --git a/examples/sum.lyn b/examples/sum.lyn index a4dcf61..55338f6 100644 --- a/examples/sum.lyn +++ b/examples/sum.lyn @@ -1,6 +1,6 @@ syntax for {init cond post body} { eval init - while (eval cond) { + while {eval cond} { eval body eval post } diff --git a/gen/source.mk b/gen/source.mk new file mode 100644 index 0000000..9138011 --- /dev/null +++ b/gen/source.mk @@ -0,0 +1,9 @@ +gen/gen_parser.c: src/parser.y gen/gen_lexer.inc + bison -Wcounterexamples -o gen/gen_parser.c src/parser.y + +gen/gen_lexer.inc: src/lexer.l + flex -o gen/gen_lexer.inc src/lexer.l + +build/gen/parser.o: gen/gen_parser.c + mkdir -p build/gen + $(COMPILE_LYN) $(SKIP_ANALYZER) -c gen/gen_parser.c -o build/gen/parser.o diff --git a/include/lyn/ast.h b/include/lyn/ast.h new file mode 100644 index 0000000..fd3b98b --- /dev/null +++ b/include/lyn/ast.h @@ -0,0 +1,104 @@ +#ifndef LYN_AST_H +#define LYN_AST_H + +#include <stdlib.h> +#include <string.h> + +enum kind { + LYN_ID, LYN_STR, LYN_INT, LYN_FLOAT, LYN_CMD, LYN_LIST, LYN_APPLY +}; + +struct ast { + enum kind kind; + union { + long long i; + double d; + const char *s; + struct ast *args; + }; + struct ast *next; +}; + +static inline struct ast *gen_id(const char *s) +{ + struct ast *id = calloc(1, sizeof(struct ast)); + if (!id) + return NULL; + + id->kind = LYN_ID; + id->s = strdup(s); + return id; +} + +static inline struct ast *gen_str(const char *s) +{ + struct ast *str = calloc(1, sizeof(struct ast)); + if (!str) + return NULL; + + str->kind = LYN_STR; + str->s = strdup(s); + return str; +} + +static inline struct ast *gen_int(long long i) +{ + struct ast *integer = calloc(1, sizeof(struct ast)); + if (!integer) + return NULL; + + integer->kind = LYN_INT; + integer->i = i; + return integer; +} + +static inline struct ast *gen_float(double d) +{ + struct ast *floating = calloc(1, sizeof(struct ast)); + if (!floating) + return NULL; + + floating->kind = LYN_FLOAT; + floating->d = d; + return floating; +} + +static inline struct ast *gen_apply(struct ast *cmds) +{ + struct ast *apply = calloc(1, sizeof(struct ast)); + if (!apply) + return NULL; + + apply->kind = LYN_APPLY; + apply->args = cmds; + return apply; +} + +static inline struct ast *gen_list(struct ast *cmds) +{ + struct ast *list = calloc(1, sizeof(struct ast)); + if (!list) + return NULL; + + list->kind = LYN_LIST; + list->args = cmds; + return list; +} + +static inline struct ast *gen_cmd(struct ast *args) +{ + struct ast *cmd = calloc(1, sizeof(struct ast)); + if (!cmd) + return NULL; + + cmd->kind = LYN_CMD; + cmd->args = args; + return cmd; +} + +void ast_dump(int depth, struct ast *ast); +void ast_dump_list(int depth, struct ast *ast); + +struct ast *reverse_ast_list(struct ast *ast); + +#endif /* LYN_AST_H */ diff --git a/include/lyn/debug.h b/include/lyn/debug.h new file mode 100644 index 0000000..3f78f17 --- /dev/null +++ b/include/lyn/debug.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: copyleft-next-0.3.1 */ +/* Copyright 2023 Kim Kuparinen < kimi.h.kuparinen@gmail.com > */ + +#ifndef EK_DEBUG_H +#define EK_DEBUG_H + +/** + * @file debug.h + * + * Debugging and general information printing helpers. + */ + +#include <stdio.h> + +#if DEBUG +/** + * Print debugging message. Only active if \c DEBUG is defined, + * + * @param x Format string. Follows standard printf() formatting. + */ +#define debug(x, ...) \ + do {fprintf(stderr, "debug: " x "\n",##__VA_ARGS__);} while(0) +#else +#define debug(x, ...) +#endif + +/** + * Print error message. + * + * @param x Format string. Follows standard printf() formatting. + */ +#define error(x, ...) \ + do {fprintf(stderr, "error: " x "\n",##__VA_ARGS__);} while(0) + +/** + * Print warning message. + * + * @param x Format string. Follows standard printf() formatting. + */ +#define warn(x, ...) \ + do {fprintf(stderr, "warn: " x "\n",##__VA_ARGS__);} while(0) + +/** + * Print info message. + * + * @param x Format string. Follows standard printf() formatting. + */ +#define info(x, ...) \ + do {fprintf(stderr, "info: " x "\n",##__VA_ARGS__);} while(0) + +/** Keeps track of file name and file buffer. */ +struct file_ctx { + /** File name. */ + const char *fname; + /** File buffer. */ + const char *fbuf; +}; + +/** Represents a source location, spanning over some bit of code. */ +struct src_loc { + /** First line of interesting text. */ + int first_line; + /** Last line of interesting text. */ + int last_line; + /** First column in first line of interesting text. */ + int first_col; + /** Last column in last line of interesting text. */ + int last_col; +}; + +/** Issue categorization. */ +enum issue_level { + /** Information. */ + SRC_INFO, + /** Warning. */ + SRC_WARN, + /** Error. */ + SRC_ERROR +}; + +/** Context for issue in user code. */ +struct src_issue { + /** How bad the issue is. */ + enum issue_level level; + /** Where the issue happened relative to file buffer. */ + struct src_loc loc; + /** File context issue happened in. */ + struct file_ctx fctx; +}; + +/** + * Print a source issue. + * + * @param issue Context for issue. + * @param err_msg Format string. Follows standard printf() formatting. + */ +void src_issue(struct src_issue issue, const char *err_msg, ...); +#endif /* EK_DEBUG_H */ diff --git a/include/lyn/lyn.h b/include/lyn/lyn.h new file mode 100644 index 0000000..43094ca --- /dev/null +++ b/include/lyn/lyn.h @@ -0,0 +1,12 @@ +#ifndef LYN_H +#define LYN_H + +struct lyn { +}; + +struct lyn lyn_create(); +int lyn_eval_file(struct lyn *lyn, const char *fname); +int lyn_eval_str(struct lyn *lyn, const char *name, const char *str); +void lyn_destroy(struct lyn *lyn); + +#endif /* LYN_H */ diff --git a/include/lyn/parser.h b/include/lyn/parser.h new file mode 100644 index 0000000..81ecc49 --- /dev/null +++ b/include/lyn/parser.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: copyleft-next-0.3.1 */ +/* Copyright 2023 Kim Kuparinen < kimi.h.kuparinen@gmail.com > */ + +#ifndef PARSER_H +#define PARSER_H + +/** + * @file parser.h + * + * Glue file to get lexer and parser to play nice. + */ + +#include <stddef.h> +#include <stdbool.h> +#include <lyn/ast.h> + +/** Stuff the parser needs to do its job. */ +struct parser { + /** Whether parsing failed or succeeded. */ + bool failed; + /** Lexer. Parser owns the lexer and is responsible for initializing + * and destroyint the lexer. + */ + void *lexer; + + struct ast *tree; + + /** File content in memory. */ + const char *buf; + /** Filename. */ + const char *fname; +}; + +/** + * Create new parser. + * + * @return Created parser. + */ +struct parser *create_parser(); + +/** + * Destroy parser. + * + * @param p Parser to destroy. + */ +void destroy_parser(struct parser *p); + +/** + * Run parser on buffer \p buf with name \p fname. + * + * @param p Parser to run. + * @param fname Name of file \p buf was read from. + * @param buf Contents of \p fname. + */ +void parse(struct parser *p, const char *fname, const char *buf); + +#endif /* PARSER_H */ diff --git a/scripts/gen-deps b/scripts/gen-deps new file mode 100755 index 0000000..f45707c --- /dev/null +++ b/scripts/gen-deps @@ -0,0 +1,37 @@ +#!/bin/sh + +PREFIX= +COMPILE=COMPILE +LINT=LINT +BUILD=build/ + +while getopts "p:c:b:l:" opt; do + case "$opt" in + p) PREFIX="$OPTARG"_;; + c) COMPILE="$OPTARG";; + l) LINT="$OPTARG";; + b) BUILD=build/"$OPTARG";; + *) echo "unrecognised option -$OPTARG" >&2; exit 1;; + esac +done + +shift $((OPTIND - 1)) + +# create all subdirectories +mkdir -p $(echo "${@}" | tr ' ' '\n' | sed "s|[^/]*$||;s|^|${BUILD}/|" | uniq) + +for s in ${@} +do + obj="${BUILD}/${s%.*}.o" + lint="${obj}.l" + dep="${obj}.d" + + echo "${PREFIX}OBJS += ${obj}" >> deps.mk + echo "${PREFIX}LINTS += ${lint}" >> deps.mk + echo "${dep}:" >> deps.mk + echo "-include ${dep}" >> deps.mk + echo "${obj}: ${s}" >> deps.mk + echo " \$(${COMPILE}) -c ${s} -o ${obj}" >> deps.mk + echo "${lint}: ${s}" >> deps.mk + echo " \$(${LINT}) -c ${s} -o /dev/null" >> deps.mk +done diff --git a/scripts/license b/scripts/license new file mode 100755 index 0000000..53bd5da --- /dev/null +++ b/scripts/license @@ -0,0 +1,16 @@ +#!/bin/sh + +SPDX="/* SPDX-License-Identifier: copyleft-next-0.3.1 */" + +for f in "$@" +do + if [ "$(head -1 "$f")" != "${SPDX}" ] + then + sed -i "1i${SPDX}\n" "$f" + fi + + if ! grep 'Copyright' "$f" > /dev/null + then + echo "Missing copyright info in $f" + fi +done diff --git a/scripts/makefile b/scripts/makefile new file mode 100644 index 0000000..dd0f7fd --- /dev/null +++ b/scripts/makefile @@ -0,0 +1,74 @@ +# this could be done better +RELEASE ?= 0 +OPTFLAGS != [ "$(RELEASE)" != "0" ] \ + && echo "-O2 -flto=auto" \ + || echo "-O0" + +DEBUG ?= 1 +DEBUGFLAGS != [ "$(DEBUG)" != "0" ] \ + && echo "-DDEBUG=1" \ + || echo "-DNDEBUG=1" + +ASSERT ?= 1 +ASSERTFLAGS != [ "$(ASSERT)" != "0" ] \ + && echo "-DASSERT=1" \ + || echo + +DEPFLAGS = -MT $@ -MMD -MP -MF $@.d +LINTFLAGS := -fsyntax-only +PREPROCESS := -E + +LLVM ?= 0 +BUILD := build + +all: lyn + +include gen/source.mk + +# default values, overwrite if/when needed +CROSS_COMPILE := + +OBJCOPY != [ "$(LLVM)" != "0" ] \ + && echo llvm-objcopy \ + || echo $(CROSS_COMPILE)objcopy + +COMPILER != [ -n "$(CROSS_COMPILE)" ] \ + && { \ + [ "$(LLVM)" != "0" ] \ + && echo clang --target="$(CROSS_COMPILE)" \ + || echo $(CROSS_COMPILE)gcc \ + ; \ + } \ + || echo $(CC) + + +OBFLAGS := -g +WARNFLAGS := -Wall -Wextra + +COMPILE_FLAGS := $(CFLAGS) $(WARNFLAGS) $(OPTFLAGS) $(OBFLAGS) $(ASSERTFLAGS) \ + $(DEBUGFLAGS) + +INCLUDE_FLAGS := -I include + +COMPILE = $(COMPILER) \ + $(COMPILE_FLAGS) $(DEPFLAGS) $(INCLUDE_FLAGS) + +LINT = $(COMPILER) \ + $(COMPILE_FLAGS) $(LINTFLAGS) $(INCLUDE_FLAGS) + +UBSAN ?= 0 +TRISCV_FLAGS != [ "$(UBSAN)" != "0" ] \ + && echo -fsanitize=undefined \ + || echo + +COMPILE_LYN = $(COMPILE) $(LYN_FLAGS) + +-include deps.mk + +lyn: $(LYN_OBJS) build/gen/parser.o + $(COMPILE_LYN) $(LYN_OBJS) build/gen/parser.o -o $@ + + +# might lint some common things twice +.PHONY: +lint: $(LYN_LINTS) diff --git a/scripts/warn-undocumented b/scripts/warn-undocumented new file mode 100755 index 0000000..db22249 --- /dev/null +++ b/scripts/warn-undocumented @@ -0,0 +1,7 @@ +#!/bin/sh +# look through all files for either @file or \file +for file in $@ +do + grep -c '[@\]file' "$file" |\ + awk -F':' "\$1 == 0 {print \"Undocumented file: $file\"}" +done diff --git a/src/ast.c b/src/ast.c new file mode 100644 index 0000000..faeae6e --- /dev/null +++ b/src/ast.c @@ -0,0 +1,53 @@ +#include <stdio.h> +#include <lyn/ast.h> + +struct ast *reverse_ast_list(struct ast *root) +{ + struct ast *new_root = NULL; + while (root) { + struct ast *next = root->next; + root->next = new_root; + new_root = root; + root = next; + } + + return new_root; +} + +#define dump(depth, fmt, ...) \ + do { \ + printf("//%*s", 2 * depth, ""); \ + printf(fmt,##__VA_ARGS__); \ + } while (0) + +void ast_dump(int depth, struct ast *ast) +{ + switch (ast->kind) { + case LYN_ID: dump(depth, "%s\n", ast->s); return; + case LYN_STR: dump(depth, "\"%s\"\n", ast->s); return; + case LYN_INT: dump(depth, "%lld\n", ast->i); return; + case LYN_FLOAT: dump(depth, "%f\n", ast->d); return; + case LYN_CMD: + dump(depth, "CMD\n"); + ast_dump_list(depth + 1, ast->args); + return; + + case LYN_LIST: + dump(depth, "LIST\n"); + ast_dump_list(depth + 1, ast->args); + return; + + case LYN_APPLY: + dump(depth, "APPLY\n"); + ast_dump_list(depth + 1, ast->args); + return; + } +} + +void ast_dump_list(int depth, struct ast *ast) +{ + while (ast) { + ast_dump(depth, ast); + ast = ast->next; + } +} diff --git a/src/debug.c b/src/debug.c new file mode 100644 index 0000000..7226640 --- /dev/null +++ b/src/debug.c @@ -0,0 +1,108 @@ +#include <stdarg.h> +#include <string.h> + +#include <lyn/debug.h> + +/** + * Get string representation of issue_level. + * + * @param level issue_level to get string representation for. + * @return \p level as a string. + */ +const char *issue_level_str(enum issue_level level) +{ + switch (level) { + case SRC_INFO: return "info"; + case SRC_WARN: return "warn"; + case SRC_ERROR: return "error"; + } + + return "unknown"; +} + +/** + * Find position in file buffer where line number \p no + * starts. Lines are assumed to be one-indexed, with + * \p no = \c 0 and \p no = \c 1 both considered the first line. + * + * @param buf Buffer to look in. + * @param no Line number whose start to look for. + * @return Pointer to location in buffer where line number \p no + * starts. + */ +static const char *find_lineno(const char *buf, size_t no) +{ + if (no == 0 || no == 1) + return buf; + + char c; + while ((c = *buf)) { + buf++; + + if (c == '\n') + no--; + + if (no == 1) + break; + } + + return buf; +} + +/** + * Helper for printing out an issue. + * + * @param issue Issue context. + * @param fmt Format string. Follows standard printf() formatting. + * @param args Arguments for \p fmt. + */ +static void _issue(struct src_issue issue, const char *fmt, va_list args) +{ + /* get start and end of current line in buffer */ + const char *line_start = find_lineno(issue.fctx.fbuf, + (size_t)issue.loc.first_line); + const char *line_end = strchr(line_start, '\n'); + if (!line_end) + line_end = strchr(line_start, 0); + + const int line_len = (int)(line_end - line_start); + + fprintf(stderr, "%s:%i:%i: %s: ", issue.fctx.fname, + issue.loc.first_line, + issue.loc.first_col, + issue_level_str(issue.level)); + + vfprintf(stderr, fmt, args); + fputc('\n', stderr); + + int lineno_len = snprintf(NULL, 0, "%i", issue.loc.first_line); + fputc(' ', stderr); + fprintf(stderr, "%i | ", issue.loc.first_line); + + fprintf(stderr, "%.*s\n", line_len, line_start); + + for (int i = 0; i < lineno_len + 2; ++i) + fputc(' ', stderr); + + fprintf(stderr, "| "); + + for (int i = 0; i < issue.loc.first_col - 1; ++i) + fputc(line_start[i] == '\t' ? '\t' : ' ', stderr); + + for (int i = issue.loc.first_col; i < issue.loc.last_col; ++i) { + if (i == issue.loc.first_col) + fputc('^', stderr); + else + fputc('~', stderr); + } + + fputc('\n', stderr); +} + +void src_issue(struct src_issue issue, const char *err_msg, ...) +{ + va_list args; + va_start(args, err_msg); + _issue(issue, err_msg, args); + va_end(args); +} diff --git a/src/lexer.l b/src/lexer.l new file mode 100644 index 0000000..3c611ab --- /dev/null +++ b/src/lexer.l @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: copyleft-next-0.3.1 */ +/* Copyright 2023 Kim Kuparinen < kimi.h.kuparinen@gmail.com > */ + +%option reentrant noyywrap nounput noinput nodefault +%{ +#define FROM_LEXER +#include <lyn/parser.h> +#include <lyn/debug.h> + +static void update_yylloc(struct parser *parser, YYLTYPE *lloc, const char *text) +{ + (void)parser; + + lloc->first_line = lloc->last_line; + lloc->first_column = lloc->last_column; + + for (size_t i = 0; text[i] != 0; ++i) { + if (text[i] == '\n') { + lloc->last_line++; + /* flex uses 1 based indexing */ + lloc->last_column = 1; + } else { + lloc->last_column++; + } + } +} + +#define YY_USER_ACTION update_yylloc(parser, yylloc, yytext); +%} +ID [^(){};[:space:]]+ +STRING \"(\\.|[^"\\])*\" + +HEX 0[xX][0-9a-fA-F]+ +DEC -?[0-9]+ +OCT 0[0-8]+ +BIN 0b[0-1]+ + +INT {HEX}|{DEC}|{OCT}|{BIN} + +HEXF [+-]?0[xX][0-9a-fA-F]+([pP][+-]?[0-9]+) +DECF [+-]?[0-9]+[.]([eE]?[+-]?[0-9]+)?[fF]? + +FLOAT {HEXF}|{DECF} + +%% +"#".* {/* skip line comments */} + +"(" {return LPAREN;} +")" {return RPAREN;} +"{" {return LBRACE;} +"}" {return RBRACE;} +";" {return SEMICOLON;} +"\n" {return NL;} + +{STRING} { + /* seems risky, I know, but letting the parser choose when to allocate a + * new string seems to help with syntax error cleanup */ + yylval->str = strdup(yytext); + return STRING; +} + +{INT} { + yylval->integer = strtoull(yytext, 0, 0); + return INT; +} + +{FLOAT} { + yylval->floating = strtod(yytext, 0); + return FLOAT; +} + +{ID} { + yylval->str = strdup(yytext); + return ID; +} + +[^\n[:graph:]]+ {/* skip whitespace */} + +. { + struct src_issue issue; + issue.level = SRC_ERROR; + issue.loc = src_loc(*yylloc); + issue.fctx.fbuf = parser->buf; + issue.fctx.fname = parser->fname; + src_issue(issue, "Unexpected token: %s", yytext); + parser->failed = true; +} +%% diff --git a/src/lyn.c b/src/lyn.c new file mode 100644 index 0000000..2527677 --- /dev/null +++ b/src/lyn.c @@ -0,0 +1,89 @@ +#include <errno.h> +#include <stdbool.h> +#include <string.h> +#include <stdio.h> +#include <limits.h> +#include <stdlib.h> + +#include <lyn/lyn.h> +#include <lyn/parser.h> +#include <lyn/debug.h> + +struct lyn lyn_create() +{ + return (struct lyn){}; +} + +int lyn_eval_str(struct lyn *lyn, const char *name, const char *str) +{ + struct parser *p = create_parser(); + if (!p) + return -1; + + parse(p, name, str); + struct ast *ast = p->tree; + bool failed = p->failed; + destroy_parser(p); + + if (!failed) { + ast_dump_list(0, ast); + } + + return failed; +} + +/** + * Read whole file into a buffer and return pointer to buffer. + * Possibly kind of silly to have both \p file and \p f. + * Apparently there's no standardized way to get the file name of a + * file pointer. + * + * @param file Name of file to read. + * @param f File pointer. + * @return Pointer to buffer with file contents. + */ +static char *read_file(const char *file, FILE *f) +{ + fseek(f, 0, SEEK_END); + /** @todo check how well standardized this actually is */ + long s = ftell(f); + if (s == LONG_MAX) { + error("%s might be a directory", file); + return NULL; + } + + fseek(f, 0, SEEK_SET); + + char *buf = malloc((size_t)(s + 1)); + if (!buf) + return NULL; + + fread(buf, (size_t)(s + 1), 1, f); + /* remember terminating null */ + buf[s] = 0; + return buf; +} + +int lyn_eval_file(struct lyn *lyn, const char *fname) +{ + FILE *f = fopen(fname, "rb"); + if (!f) { + error("failed opening %s: %s\n", fname, strerror(errno)); + return -1; + } + + char *buf = read_file(fname, f); + fclose(f); + + if (!buf) + return -1; + + int ret = lyn_eval_str(lyn, fname, buf); + free(buf); + + return ret; +} + +void lyn_destroy(struct lyn *lyn) +{ +} diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..85dd445 --- /dev/null +++ b/src/main.c @@ -0,0 +1,16 @@ +#include <lyn/lyn.h> +#include <lyn/debug.h> + +int main(int argc, char *argv[]) +{ + if (argc != 2) { + error("wrong number of arguments (should be just one for a file)"); + return -1; + } + + struct lyn lyn = lyn_create(); + int ret = lyn_eval_file(&lyn, argv[1]); + lyn_destroy(&lyn); + + return ret; +} diff --git a/src/parser.y b/src/parser.y new file mode 100644 index 0000000..3e0156f --- /dev/null +++ b/src/parser.y @@ -0,0 +1,290 @@ +/* SPDX-License-Identifier: copyleft-next-0.3.1 */ +/* Copyright 2023 Kim Kuparinen < kimi.h.kuparinen@gmail.com > */ + +%{ + +/* TODO: clean up this mess and I guess fix location tracking, it works for the + * parser but each ast node should also get some location data + * I'm trying something over in ast.c, but I'm not sure about it + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include <lyn/parser.h> +#include <lyn/ast.h> + +%} + +%locations + +%define parse.trace +%define parse.error verbose +%define api.pure full +%define lr.type ielr + +%lex-param {void *scanner} {struct parser *parser} +%parse-param {void *scanner} {struct parser* parser} + +%union { + struct ast *ast; + char *str; + long long integer; + double floating; +}; + +%token <str> STRING +%token <str> ID +%token <integer> INT +%token <floating> FLOAT + +%token LPAREN "(" +%token RPAREN ")" +%token LBRACE "{" +%token RBRACE "}" +%token SEMICOLON ";" +%token NL "nl" + +%nterm <ast> arg args rev_args +%nterm <ast> cmd cmds rev_cmds + +%{ + +/** Modifies the signature of yylex to fit our parser better. */ +#define YY_DECL int yylex(YYSTYPE *yylval, YYLTYPE *yylloc, \ + void *yyscanner, struct parser *parser) + +/** + * Declare yylex. + * + * @param yylval Bison current value. + * @param yylloc Bison location info. + * @param yyscanner Flex scanner. + * @param parser Current parser state. + * @return \c 0 when succesful, \c 1 otherwise. + * More info on yylex() can be found in the flex manual. + */ +YY_DECL; + +/** + * Gobble tokens until we reach the next interesting feature. + * Interesting features are generally new statements. + * Mainly intended for trying to get to a sensible + * location to continue parser after an error has occured. + * + * @param yylval Current parser value. + * @param yylloc Parser location info. + * @param scanner Lex scanner. + * @param parser Current parser. + * @return \c 0 on success, non-zero otherwise. + */ +static int next_interesting_feature(YYSTYPE *yylval, YYLTYPE *yylloc, + void *scanner, struct parser *parser); + +/** + * Convert bison location info to our own source location info. + * + * @param yylloc Bison location info. + * @return Internal location info. + */ +static struct src_loc src_loc(YYLTYPE yylloc); + +/** + * Print parsing error. + * Automatically called by bison. + * + * @param yylloc Location of error. + * @param lexer Lexer. + * @param parser Parser state. + * @param msg Message to print. + */ +static void yyerror(YYLTYPE *yylloc, void *lexer, + struct parser *parser, const char *msg); + +/** + * Try to convert escape code to its actual value. + * I.e. '\n' -> 0x0a. + * + * @param c Escape character without backslash. + * @return Corresponding value. + */ +static char match_escape(char c); + +/** + * Similar to strdup() but skips quotation marks that would + * otherwise be included. + * I.e. "something" -> something. + * + * @param s String to clone, with quotation marks surrounding it. + * @return Identical string but without quotation marks around it. + */ +static char *strip(const char *s); + +%} + +%start input; +%% + +arg + : "(" cmds ")" {$$ = gen_apply($2);} + | "{" cmds "}" {$$ = gen_list($2);} + | ID {$$ = gen_id($1);} + | STRING {$$ = gen_str($1);} + | INT {$$ = gen_int($1);} + | FLOAT {$$ = gen_float($1);} + +rev_args + : rev_args arg {$$ = $2; $$->next = $1;} + | arg + +args + : rev_args {$$ = reverse_ast_list($1);} + +sep + : sep ";" + | sep NL + | ";" + | NL + +cmd + : args {$$ = gen_cmd($1);} + +rev_cmds + : rev_cmds sep cmd {$$ = $3; $$->next = $1;} + | cmd + +cmds + : rev_cmds {$$ = reverse_ast_list($1);} + | rev_cmds sep {$$ = reverse_ast_list($1);} + | sep rev_cmds {$$ = reverse_ast_list($2);} + | sep rev_cmds sep {$$ = reverse_ast_list($2);} + | {$$ = NULL;} + +input + : cmds {parser->tree = gen_list($1);} + +%% + +#include "gen_lexer.inc" + +/* I'm not convinced this is foolproof quite yet, more testing would be nice. */ +static int next_interesting_feature(YYSTYPE *yylval, YYLTYPE *yylloc, + void *scanner, struct parser *parser) +{ + size_t depth = 0; + while (1) { + int ret = yylex(yylval, yylloc, scanner, parser); + if (ret == LBRACE) { + depth++; + continue; + } + + if (ret == RBRACE && depth > 0) + depth--; + + if (ret == RBRACE && depth == 0) + return 0; + + if (ret == SEMICOLON && depth == 0) + return 0; + + /* return fatal error and parser should abort */ + if (ret == YYEOF) + /* some error for unmatched braces would be cool I think */ + return 1; + } +} + + +static struct src_loc src_loc(YYLTYPE yylloc) +{ + struct src_loc loc; + loc.first_line = yylloc.first_line; + loc.last_line = yylloc.last_line; + loc.first_col = yylloc.first_column; + loc.last_col = yylloc.last_column; + return loc; +} + +static void yyerror(YYLTYPE *yylloc, void *lexer, + struct parser *parser, const char *msg) +{ + (void)lexer; + + struct src_issue issue; + issue.level = SRC_ERROR; + issue.loc = src_loc(*yylloc); + issue.fctx.fbuf = parser->buf; + issue.fctx.fname = parser->fname; + src_issue(issue, msg); +} + +static char match_escape(char c) +{ + switch (c) { + case '\'': return '\''; + case '\\': return '\\'; + case 'a': return '\a'; + case 'b': return '\b'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + case 'v': return '\v'; + } + + return c; +} + +static char *strip(const char *str) +{ + const size_t len = strlen(str) + 1; + char *buf = malloc(len); + if (!buf) { + /* should probably try to handle the error in some way... */ + error("failed allocating buffer for string clone"); + free((void *)str); + return NULL; + } + + /* skip quotation marks */ + size_t j = 0; + for (size_t i = 1; i < len - 2; ++i) { + char c = str[i]; + + if (c == '\\') + c = match_escape(str[++i]); + + buf[j++] = c; + } + + buf[j] = 0; + free((void *)str); + return buf; + +} + +struct parser *create_parser() +{ + return calloc(1, sizeof(struct parser)); +} + +void destroy_parser(struct parser *p) +{ + yylex_destroy(p->lexer); + free(p); +} + +void parse(struct parser *p, const char *fname, const char *buf) +{ + p->fname = fname; + p->buf = buf; + + p->failed = false; + + yylex_init(&p->lexer); + yy_scan_string(buf, p->lexer); + yyparse(p->lexer, p); +} diff --git a/src/source.mk b/src/source.mk new file mode 100644 index 0000000..caa476c --- /dev/null +++ b/src/source.mk @@ -0,0 +1,2 @@ +SRC_LOCAL != echo src/*.c +LYN_SOURCES := $(LYN_SOURCES) $(SRC_LOCAL) |