diff options
author | Kimplul <kimi.h.kuparinen@gmail.com> | 2024-02-08 21:19:38 +0200 |
---|---|---|
committer | Kimplul <kimi.h.kuparinen@gmail.com> | 2024-02-08 21:19:38 +0200 |
commit | c035571d85e3d756804519d82de8b354f3910b29 (patch) | |
tree | 89714e13753d200d6e9a04f0d19d6dd1f7a55b2e /src | |
download | posthaste-c035571d85e3d756804519d82de8b354f3910b29.tar.gz posthaste-c035571d85e3d756804519d82de8b354f3910b29.zip |
project work phase 1
Diffstat (limited to 'src')
-rw-r--r-- | src/core.c | 68 | ||||
-rw-r--r-- | src/date.c | 72 | ||||
-rw-r--r-- | src/debug.c | 81 | ||||
-rw-r--r-- | src/lexer.l | 191 | ||||
-rw-r--r-- | src/main.c | 28 | ||||
-rw-r--r-- | src/parser.y | 243 | ||||
-rw-r--r-- | src/source.mk | 4 |
7 files changed, 687 insertions, 0 deletions
diff --git a/src/core.c b/src/core.c new file mode 100644 index 0000000..7eca8bd --- /dev/null +++ b/src/core.c @@ -0,0 +1,68 @@ +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <errno.h> + +#include <posthaste/debug.h> +#include <posthaste/parser.h> +#include <posthaste/core.h> + +/** + * Read whole file into a buffer and return pointer to buffer. + * Possibly kind of silly to have both \p file and \p f. + * Apparently there's no standardized way to get the file name of a + * file pointer. + * + * @param fname Name of file to read. + * @param f File pointer. + * @return Pointer to buffer with file contents. + */ +static char *read_file(const char *fname, FILE *f) +{ + fseek(f, 0, SEEK_END); + long s = ftell(f); + if (s == LONG_MAX) { + /** @todo should probably do this via fstat or something */ + error("%s might be a directory", fname); + return NULL; + } + + fseek(f, 0, SEEK_SET); + + char *buf = malloc(s + 1); + if (!buf) + return NULL; + + fread(buf, s + 1, 1, f); + /* remember terminating null */ + buf[s] = 0; + return buf; +} + +int run(const char *fname) +{ + FILE *f = fopen(fname, "rb"); + if (!f) { + error("failed opening %s: %s\n", fname, strerror(errno)); + return -1; + } + + const char *buf = read_file(fname, f); + fclose(f); + + if (!buf) + return -1; + + struct parser *p = create_parser(); + if (!p) + return -1; + + parse(p, fname, buf); + int ret = p->failed ? -1 : 0; + + /* eventually do other stuff as well */ + free((void *)buf); + + destroy_parser(p); + return ret; +} diff --git a/src/date.c b/src/date.c new file mode 100644 index 0000000..7e6bc77 --- /dev/null +++ b/src/date.c @@ -0,0 +1,72 @@ +#include <time.h> + +#include <posthaste/date.h> + +ph_date_t date_from_string(const char str[static 11]) +{ + unsigned year = (str[0] - '0') * 1000 + + (str[1] - '0') * 100 + + (str[2] - '0') * 10 + + (str[3] - '0'); + + unsigned month = (str[5] - '0') * 10 + + (str[6] - '0'); + + unsigned day = (str[8] - '0') * 10 + + (str[9] - '0'); + + return date_from_numbers(year, month, day); +} + +ph_date_t date_from_numbers(unsigned year, unsigned month, unsigned day) +{ + return year << 9 | month << 5 | day; +} + +void date_split(ph_date_t date, unsigned *year, unsigned *month, unsigned *day) +{ + if (year) *year = date >> 9; + if (month) *month = (date >> 5) & 0xf; + if (day) *day = date & 0x1f; +} + +void date_to_string(char str[static 11], ph_date_t date) +{ + unsigned year, month, day; + date_split(date, &year, &month, &day); + + str[0] = '0' + (year / 1000) % 10; + str[1] = '0' + (year / 100) % 10; + str[2] = '0' + (year / 10) % 10; + str[3] = '0' + year % 10; + str[4] = '-'; + str[5] = '0' + (month / 10) % 10; + str[6] = '0' + month % 10; + str[7] = '-'; + str[8] = '0' + (day / 10) % 10; + str[9] = '0' + day % 10; + str[10] = '\0'; +} + +bool date_valid(ph_date_t date) +{ + unsigned year, month, day; + date_split(date, &year, &month, &day); + + struct tm tm = {0}; + tm.tm_year = year; + /* goddammit */ + tm.tm_mon = month - 1; + tm.tm_mday = day; + + time_t r = mktime(&tm); + /* if mktime fails to represent our time, it returns -1. mktime somewhat + * curiously accepts dates outside regular ranges, like negative time, + * but adjusts the tm structure to be 'correct', so we can check if any + * of the times were modified to see if this is a valid time. */ + if (r == -1 || (int)year != tm.tm_year + || (int)month != tm.tm_mon + 1|| (int)day != tm.tm_mday) + return false; + + return true; +} diff --git a/src/debug.c b/src/debug.c new file mode 100644 index 0000000..c146719 --- /dev/null +++ b/src/debug.c @@ -0,0 +1,81 @@ +#include <stdarg.h> +#include <string.h> + +#include <posthaste/debug.h> + +/** + * Find position in file buffer where line number \p no + * starts. Lines are assumed to be one-indexed, with + * \p no = \c 0 and \p no = \c 1 both considered the first line. + * + * @param buf Buffer to look in. + * @param no Line number whose start to look for. + * @return Pointer to location in buffer where line number \p no + * starts. + */ +static const char *find_lineno(const char *buf, size_t no) +{ + if (no == 0 || no == 1) + return buf; + + char c; + while ((c = *buf)) { + buf++; + + if (c == '\n') + no--; + + if (no == 1) + break; + } + + return buf; +} + +void vsrc_issue(struct src_issue issue, const char *msg, va_list args) +{ + const char *line_start = find_lineno(issue.buf, issue.loc.first_line); + const char *line_end = strchr(line_start, '\n'); + if (!line_end) + line_end = strchr(line_start, 0); + + int line_len = line_end - line_start; + + fprintf(stderr, "%s:%i:%i: ", issue.fname, + issue.loc.first_line, + issue.loc.first_col); + + vfprintf(stderr, msg, args); + fputc('\n', stderr); + + int lineno_len = snprintf(NULL, 0, "%i", issue.loc.first_line); + fputc(' ', stderr); + fprintf(stderr, "%i | ", issue.loc.first_line); + + fprintf(stderr, "%.*s\n", line_len, line_start); + + for (int i = 0; i < lineno_len + 2; ++i) + fputc(' ', stderr); + + fprintf(stderr, "| "); + + for (int i = 0; i < issue.loc.first_col - 1; ++i) + fputc(line_start[i] == '\t' ? '\t' : ' ', stderr); + + for (int i = issue.loc.first_col; i < issue.loc.last_col; ++i) { + if (i == issue.loc.first_col) + fputc('^', stderr); + else + fputc('~', stderr); + } + + fputc('\n', stderr); +} + +void src_issue(struct src_issue issue, const char *msg, ...) +{ + va_list args; + va_start(args, msg); + vsrc_issue(issue, msg, args); + va_end(args); +} diff --git a/src/lexer.l b/src/lexer.l new file mode 100644 index 0000000..6f58bd5 --- /dev/null +++ b/src/lexer.l @@ -0,0 +1,191 @@ +%option reentrant noyywrap nounput noinput nodefault +%{ + +#include <stdio.h> +#include <stdarg.h> + +#include <posthaste/parser.h> +#include <posthaste/debug.h> + +static void update_yylloc(struct parser *parser, YYLTYPE *lloc, const char *text) +{ + (void)parser; + + lloc->first_line = lloc->last_line; + lloc->first_column = lloc->last_column; + + for (size_t i = 0; text[i] != 0; ++i) { + if (text[i] == '\n') { + lloc->last_line++; + /* flex uses 1 based indexing */ + lloc->last_column = 1; + } else { + lloc->last_column++; + } + } +} + +static void lex_fail(struct parser *p, struct src_loc loc, const char *msg, ...) +{ + va_list args; + va_start(args, msg); + + struct src_issue issue; + issue.loc = loc; + issue.fname = p->fname; + issue.buf = p->buf; + vsrc_issue(issue, msg, args); + + va_end(args); + + p->failed = true; +} + +static ph_date_t lex_date(struct parser *p, struct src_loc loc, const char *date) +{ + ph_date_t d = date_from_string(date); + if (!date_valid(d)) { + lex_fail(p, loc, "Not a valid date."); + return 0; + } + + return d; +} + +static int64_t lex_int(struct parser *p, struct src_loc loc, const char *num) +{ + bool neg = num[0] == '-'; + + /* jump over minus sign */ + if (neg) + num += 1; + + int64_t sum = 0; + int64_t power = 1; + size_t n = strlen(num); + if (n > 18) { + lex_fail(p, loc, "Literal integer too large"); + return 0; + } + + for (size_t i = 0; i < n; ++i) { + /* jump over separators */ + char c = num[n - i - 1]; + if (c == '\'') + continue; + + /* decimal value at i:th least significant place */ + int64_t d = c - '0'; + sum += d * power; + power *= 10; + } + + if (sum > 1000000000000) { + lex_fail(p, loc, "Literal integer too large"); + return 0; + } + + if (neg) + sum = -sum; + + return sum; +} + +#define YY_USER_ACTION update_yylloc(parser, yylloc, yytext); +%} + +IDENT [a-z][a-zA-Z0-9_]+ +FUNC_IDENT [A-Z][a-z0-9_]+ +PROC_IDENT [A-Z]{2}[A-Z0-9_]* + +DATE_LITERAL [0-9]{4}-[0-9]{2}-[0-9]{2} +INT_LITERAL -?[0-9]+('[0-9][0-9][0-9]+)* +STRING \"(\\.|[^"\\])*\" + +%x SC_COMMENT + +%% +"(%" {BEGIN(SC_COMMENT);} +<SC_COMMENT>{ + "(%" {parser->comment_nesting += 1;} + "%)" { + if (parser->comment_nesting) + parser->comment_nesting -= 1; + else + BEGIN(INITIAL); + } + + /* magic to avoid lexer jamming on open braces */ + "*"+ {} + [^(%\n]+ {} + [(] {} + \n {} +} + +"(" {return LPAREN;} +")" {return RPAREN;} +"[" {return LSQUARE;} +"]" {return RSQUARE;} +"{" {return LCURLY;} +"}" {return RCURLY;} + +"'" {return APOSTROPHE;} +"&" {return AMPERSAND;} +"," {return COMMA;} +"." {return DOT;} +"=" {return EQ;} +"<" {return LT;} +"+" {return PLUS;} +"-" {return MINUS;} +"*" {return MULT;} +"/" {return DIV;} + +"var" {return VAR;} +"is" {return IS;} +"unless" {return UNLESS;} +"otherwise" {return OTHERWISE;} +"until" {return UNTIL;} +"do" {return DO;} +"done" {return DONE;} +"procedure" {return PROCEDURE;} +"function" {return FUNCTION;} +"return" {return RETURN;} +"print" {return PRINT;} +"end" {return END;} + +{STRING} { + yylval->str = yytext; + return STRING; +} + +{DATE_LITERAL} { + yylval->num = lex_date(parser, src_loc(*yylloc), yytext); + return DATE_LITERAL; +} + +{INT_LITERAL} { + yylval->snum = lex_int(parser, src_loc(*yylloc), yytext); + return INT_LITERAL; +} + +{IDENT} { + yylval->str = yytext; + return IDENT; +} + +{FUNC_IDENT} { + yylval->str = yytext; + return FUNC_IDENT; +} + +{PROC_IDENT} { + yylval->str = yytext; + return PROC_IDENT; +} + +[[:space:]]+ {/* skip whitespace */} + +. { + lex_fail(parser, src_loc(*yylloc), "Unexpected token: %s", yytext); +} +%% diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..a40d841 --- /dev/null +++ b/src/main.c @@ -0,0 +1,28 @@ +#include <stdio.h> +#include <posthaste/core.h> + +static void usage(FILE *f, const char *pname) +{ + fprintf(f, "Usage:\n %s <filename>\n", pname); +} + +/** + * Main entry to posthaste. + * Checks command line and drives the rest of the language. + * + * Feels kind of weird documenting main, but doxygen warns about not + * doing it so whatever. + * + * @param argc Number of command line arguments. + * @param argv Array of command line arguments. + * @return \c 0 when succesful, non-zero otherwise. + */ +int main(int argc, char *argv[]) +{ + if (argc != 2) { + usage(stderr, argv[0]); + return -1; + } + + return run(argv[1]); +} diff --git a/src/parser.y b/src/parser.y new file mode 100644 index 0000000..8a4ddb5 --- /dev/null +++ b/src/parser.y @@ -0,0 +1,243 @@ +%{ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> + +#include <posthaste/parser.h> +#include <posthaste/date.h> + +#define FOREACH_TOKEN(M) \ + M(LPAREN) \ + M(RPAREN) \ + M(LSQUARE) \ + M(RSQUARE) \ + M(LCURLY) \ + M(RCURLY) \ + M(APOSTROPHE) \ + M(AMPERSAND) \ + M(COMMA) \ + M(DOT) \ + M(EQ) \ + M(LT) \ + M(PLUS) \ + M(MINUS) \ + M(MULT) \ + M(DIV) \ + M(VAR) \ + M(IS) \ + M(UNLESS) \ + M(OTHERWISE) \ + M(UNTIL) \ + M(DO) \ + M(DONE) \ + M(PROCEDURE) \ + M(FUNCTION) \ + M(RETURN) \ + M(PRINT) \ + M(END) \ + M(STRING) \ + M(DATE_LITERAL) \ + M(INT_LITERAL) \ + M(IDENT) \ + M(FUNC_IDENT) \ + M(PROC_IDENT) + +%} + +%locations + +%define parse.trace +%define parse.error verbose +%define api.pure full +%define lr.type ielr + +%lex-param {void *scanner} {struct parser *parser} +%parse-param {void *scanner} {struct parser* parser} + +%union { + struct ast_node *node; + ph_date_t num; + int64_t snum; + char *str; +}; + +%token <str> STRING +%token <num> DATE_LITERAL +%token <snum> INT_LITERAL +%token <str> IDENT; +%token <str> FUNC_IDENT; +%token <str> PROC_IDENT; + +%token LPAREN "(" +%token RPAREN ")" +%token LSQUARE "[" +%token RSQUARE "]" +%token LCURLY "{" +%token RCURLY "}" + +%token APOSTROPHE "'" +%token AMPERSAND "&" +%token COMMA "," +%token DOT "." +%token EQ "=" +%token LT "<" +%token PLUS "+" +%token MINUS "-" +%token MULT "*" +%token DIV "/" + +%token VAR "var" +%token IS "is" +%token UNLESS "unless" +%token OTHERWISE "otherwise" +%token UNTIL "until" +%token DO "do" +%token DONE "done" +%token PROCEDURE "procedure" +%token FUNCTION "function" +%token RETURN "return" +%token PRINT "print" +%token END "end" + +%{ + +/** Modifies the signature of yylex to fit our parser better. */ +#define YY_DECL int yylex(YYSTYPE *yylval, YYLTYPE *yylloc, \ + void *yyscanner, struct parser *parser) + +/** + * Declare yylex. + * + * @param yylval Bison current value. + * @param yylloc Bison location info. + * @param yyscanner Flex scanner. + * @param parser Current parser state. + * @return \c 0 when succesful, \c 1 otherwise. + * More info on yylex() can be found in the flex manual. + */ +YY_DECL; + +/** + * Convert bison location info to our own source location info. + * + * @param yylloc Bison location info. + * @return Internal location info. + */ +static struct src_loc src_loc(YYLTYPE yylloc); + +/** + * Print parsing error. + * Automatically called by bison. + * + * @param yylloc Location of error. + * @param lexer Lexer. + * @param parser Parser state. + * @param msg Message to print. + */ +static void yyerror(YYLTYPE *yylloc, void *lexer, + struct parser *parser, const char *msg); + +%} + +%start input; +%% + +input: /* empty */ + +%% + +#include "gen_lexer.inc" + +static void dump_yychar(struct parser *p, int yychar, YYSTYPE yylval, YYLTYPE yylloc) +{ + struct src_loc loc = src_loc(yylloc); + printf("%s:%d:%d: ", p->fname, loc.first_line, loc.first_col); + +#define PRINT_NAME(token) case token: printf(#token " "); break; + switch (yychar) { + FOREACH_TOKEN(PRINT_NAME); + default: printf("Unknown yychar\n"); return; + } + + char date_str[11] = {0}; + switch (yychar) { + case INT_LITERAL: printf("(%lld)", (long long int)yylval.snum); break; + case IDENT: printf("(%s)", yylval.str); break; + case FUNC_IDENT: printf("(%s)", yylval.str); break; + case PROC_IDENT: printf("(%s)", yylval.str); break; + case DATE_LITERAL: + date_to_string(date_str, yylval.num); + printf("(%s)", date_str); + break; + } + + printf("\n"); +} + +static void dump_lex(struct parser *p) +{ + int yychar; + YYSTYPE yylval; + YYLTYPE yylloc = {1, 1, 1, 1}; + + /* run lexer until we reach the end of the file */ + while ((yychar = yylex(&yylval, &yylloc, p->lexer, p)) != YYEOF) { + dump_yychar(p, yychar, yylval, yylloc); + } +} + +static struct src_loc src_loc(YYLTYPE yylloc) +{ + struct src_loc loc; + loc.first_line = yylloc.first_line; + loc.last_line = yylloc.last_line; + loc.first_col = yylloc.first_column; + loc.last_col = yylloc.last_column; + return loc; +} + +static void yyerror(YYLTYPE *yylloc, void *lexer, + struct parser *parser, const char *msg) +{ + (void)lexer; + + struct src_issue issue; + issue.loc = src_loc(*yylloc); + issue.fname = parser->fname; + issue.buf = parser->buf; + + src_issue(issue, msg); +} + +struct parser *create_parser() +{ + return calloc(1, sizeof(struct parser)); +} + +void destroy_parser(struct parser *p) +{ + yylex_destroy(p->lexer); + free(p); +} + +void parse(struct parser *p, const char *fname, const char *buf) +{ + p->fname = fname; + p->buf = buf; + + p->comment_nesting = 0; + + p->failed = false; + + yylex_init(&p->lexer); + + yy_scan_string(buf, p->lexer); + + // debugging, remember to reset yy_scan_string once the actual parser + // runs + dump_lex(p); + + // yyparse(p->lexer, p); +} diff --git a/src/source.mk b/src/source.mk new file mode 100644 index 0000000..6a8d30f --- /dev/null +++ b/src/source.mk @@ -0,0 +1,4 @@ +SRCS != echo src/*.c + +# rules for gen_parser.c are in scripts/makefile +POSTHASTE_SOURCES += $(SRCS) gen/gen_parser.c |