aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorKimplul <kimi.h.kuparinen@gmail.com>2024-02-08 21:19:38 +0200
committerKimplul <kimi.h.kuparinen@gmail.com>2024-02-08 21:19:38 +0200
commitc035571d85e3d756804519d82de8b354f3910b29 (patch)
tree89714e13753d200d6e9a04f0d19d6dd1f7a55b2e /src
downloadposthaste-c035571d85e3d756804519d82de8b354f3910b29.tar.gz
posthaste-c035571d85e3d756804519d82de8b354f3910b29.zip
project work phase 1
Diffstat (limited to 'src')
-rw-r--r--src/core.c68
-rw-r--r--src/date.c72
-rw-r--r--src/debug.c81
-rw-r--r--src/lexer.l191
-rw-r--r--src/main.c28
-rw-r--r--src/parser.y243
-rw-r--r--src/source.mk4
7 files changed, 687 insertions, 0 deletions
diff --git a/src/core.c b/src/core.c
new file mode 100644
index 0000000..7eca8bd
--- /dev/null
+++ b/src/core.c
@@ -0,0 +1,68 @@
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+
+#include <posthaste/debug.h>
+#include <posthaste/parser.h>
+#include <posthaste/core.h>
+
+/**
+ * Read whole file into a buffer and return pointer to buffer.
+ * Possibly kind of silly to have both \p file and \p f.
+ * Apparently there's no standardized way to get the file name of a
+ * file pointer.
+ *
+ * @param fname Name of file to read.
+ * @param f File pointer.
+ * @return Pointer to buffer with file contents.
+ */
+static char *read_file(const char *fname, FILE *f)
+{
+ fseek(f, 0, SEEK_END);
+ long s = ftell(f);
+ if (s == LONG_MAX) {
+ /** @todo should probably do this via fstat or something */
+ error("%s might be a directory", fname);
+ return NULL;
+ }
+
+ fseek(f, 0, SEEK_SET);
+
+ char *buf = malloc(s + 1);
+ if (!buf)
+ return NULL;
+
+ fread(buf, s + 1, 1, f);
+ /* remember terminating null */
+ buf[s] = 0;
+ return buf;
+}
+
+int run(const char *fname)
+{
+ FILE *f = fopen(fname, "rb");
+ if (!f) {
+ error("failed opening %s: %s\n", fname, strerror(errno));
+ return -1;
+ }
+
+ const char *buf = read_file(fname, f);
+ fclose(f);
+
+ if (!buf)
+ return -1;
+
+ struct parser *p = create_parser();
+ if (!p)
+ return -1;
+
+ parse(p, fname, buf);
+ int ret = p->failed ? -1 : 0;
+
+ /* eventually do other stuff as well */
+ free((void *)buf);
+
+ destroy_parser(p);
+ return ret;
+}
diff --git a/src/date.c b/src/date.c
new file mode 100644
index 0000000..7e6bc77
--- /dev/null
+++ b/src/date.c
@@ -0,0 +1,72 @@
+#include <time.h>
+
+#include <posthaste/date.h>
+
+ph_date_t date_from_string(const char str[static 11])
+{
+ unsigned year = (str[0] - '0') * 1000
+ + (str[1] - '0') * 100
+ + (str[2] - '0') * 10
+ + (str[3] - '0');
+
+ unsigned month = (str[5] - '0') * 10
+ + (str[6] - '0');
+
+ unsigned day = (str[8] - '0') * 10
+ + (str[9] - '0');
+
+ return date_from_numbers(year, month, day);
+}
+
+ph_date_t date_from_numbers(unsigned year, unsigned month, unsigned day)
+{
+ return year << 9 | month << 5 | day;
+}
+
+void date_split(ph_date_t date, unsigned *year, unsigned *month, unsigned *day)
+{
+ if (year) *year = date >> 9;
+ if (month) *month = (date >> 5) & 0xf;
+ if (day) *day = date & 0x1f;
+}
+
+void date_to_string(char str[static 11], ph_date_t date)
+{
+ unsigned year, month, day;
+ date_split(date, &year, &month, &day);
+
+ str[0] = '0' + (year / 1000) % 10;
+ str[1] = '0' + (year / 100) % 10;
+ str[2] = '0' + (year / 10) % 10;
+ str[3] = '0' + year % 10;
+ str[4] = '-';
+ str[5] = '0' + (month / 10) % 10;
+ str[6] = '0' + month % 10;
+ str[7] = '-';
+ str[8] = '0' + (day / 10) % 10;
+ str[9] = '0' + day % 10;
+ str[10] = '\0';
+}
+
+bool date_valid(ph_date_t date)
+{
+ unsigned year, month, day;
+ date_split(date, &year, &month, &day);
+
+ struct tm tm = {0};
+ tm.tm_year = year;
+ /* goddammit */
+ tm.tm_mon = month - 1;
+ tm.tm_mday = day;
+
+ time_t r = mktime(&tm);
+ /* if mktime fails to represent our time, it returns -1. mktime somewhat
+ * curiously accepts dates outside regular ranges, like negative time,
+ * but adjusts the tm structure to be 'correct', so we can check if any
+ * of the times were modified to see if this is a valid time. */
+ if (r == -1 || (int)year != tm.tm_year
+ || (int)month != tm.tm_mon + 1|| (int)day != tm.tm_mday)
+ return false;
+
+ return true;
+}
diff --git a/src/debug.c b/src/debug.c
new file mode 100644
index 0000000..c146719
--- /dev/null
+++ b/src/debug.c
@@ -0,0 +1,81 @@
+#include <stdarg.h>
+#include <string.h>
+
+#include <posthaste/debug.h>
+
+/**
+ * Find position in file buffer where line number \p no
+ * starts. Lines are assumed to be one-indexed, with
+ * \p no = \c 0 and \p no = \c 1 both considered the first line.
+ *
+ * @param buf Buffer to look in.
+ * @param no Line number whose start to look for.
+ * @return Pointer to location in buffer where line number \p no
+ * starts.
+ */
+static const char *find_lineno(const char *buf, size_t no)
+{
+ if (no == 0 || no == 1)
+ return buf;
+
+ char c;
+ while ((c = *buf)) {
+ buf++;
+
+ if (c == '\n')
+ no--;
+
+ if (no == 1)
+ break;
+ }
+
+ return buf;
+}
+
+void vsrc_issue(struct src_issue issue, const char *msg, va_list args)
+{
+ const char *line_start = find_lineno(issue.buf, issue.loc.first_line);
+ const char *line_end = strchr(line_start, '\n');
+ if (!line_end)
+ line_end = strchr(line_start, 0);
+
+ int line_len = line_end - line_start;
+
+ fprintf(stderr, "%s:%i:%i: ", issue.fname,
+ issue.loc.first_line,
+ issue.loc.first_col);
+
+ vfprintf(stderr, msg, args);
+ fputc('\n', stderr);
+
+ int lineno_len = snprintf(NULL, 0, "%i", issue.loc.first_line);
+ fputc(' ', stderr);
+ fprintf(stderr, "%i | ", issue.loc.first_line);
+
+ fprintf(stderr, "%.*s\n", line_len, line_start);
+
+ for (int i = 0; i < lineno_len + 2; ++i)
+ fputc(' ', stderr);
+
+ fprintf(stderr, "| ");
+
+ for (int i = 0; i < issue.loc.first_col - 1; ++i)
+ fputc(line_start[i] == '\t' ? '\t' : ' ', stderr);
+
+ for (int i = issue.loc.first_col; i < issue.loc.last_col; ++i) {
+ if (i == issue.loc.first_col)
+ fputc('^', stderr);
+ else
+ fputc('~', stderr);
+ }
+
+ fputc('\n', stderr);
+}
+
+void src_issue(struct src_issue issue, const char *msg, ...)
+{
+ va_list args;
+ va_start(args, msg);
+ vsrc_issue(issue, msg, args);
+ va_end(args);
+}
diff --git a/src/lexer.l b/src/lexer.l
new file mode 100644
index 0000000..6f58bd5
--- /dev/null
+++ b/src/lexer.l
@@ -0,0 +1,191 @@
+%option reentrant noyywrap nounput noinput nodefault
+%{
+
+#include <stdio.h>
+#include <stdarg.h>
+
+#include <posthaste/parser.h>
+#include <posthaste/debug.h>
+
+static void update_yylloc(struct parser *parser, YYLTYPE *lloc, const char *text)
+{
+ (void)parser;
+
+ lloc->first_line = lloc->last_line;
+ lloc->first_column = lloc->last_column;
+
+ for (size_t i = 0; text[i] != 0; ++i) {
+ if (text[i] == '\n') {
+ lloc->last_line++;
+ /* flex uses 1 based indexing */
+ lloc->last_column = 1;
+ } else {
+ lloc->last_column++;
+ }
+ }
+}
+
+static void lex_fail(struct parser *p, struct src_loc loc, const char *msg, ...)
+{
+ va_list args;
+ va_start(args, msg);
+
+ struct src_issue issue;
+ issue.loc = loc;
+ issue.fname = p->fname;
+ issue.buf = p->buf;
+ vsrc_issue(issue, msg, args);
+
+ va_end(args);
+
+ p->failed = true;
+}
+
+static ph_date_t lex_date(struct parser *p, struct src_loc loc, const char *date)
+{
+ ph_date_t d = date_from_string(date);
+ if (!date_valid(d)) {
+ lex_fail(p, loc, "Not a valid date.");
+ return 0;
+ }
+
+ return d;
+}
+
+static int64_t lex_int(struct parser *p, struct src_loc loc, const char *num)
+{
+ bool neg = num[0] == '-';
+
+ /* jump over minus sign */
+ if (neg)
+ num += 1;
+
+ int64_t sum = 0;
+ int64_t power = 1;
+ size_t n = strlen(num);
+ if (n > 18) {
+ lex_fail(p, loc, "Literal integer too large");
+ return 0;
+ }
+
+ for (size_t i = 0; i < n; ++i) {
+ /* jump over separators */
+ char c = num[n - i - 1];
+ if (c == '\'')
+ continue;
+
+ /* decimal value at i:th least significant place */
+ int64_t d = c - '0';
+ sum += d * power;
+ power *= 10;
+ }
+
+ if (sum > 1000000000000) {
+ lex_fail(p, loc, "Literal integer too large");
+ return 0;
+ }
+
+ if (neg)
+ sum = -sum;
+
+ return sum;
+}
+
+#define YY_USER_ACTION update_yylloc(parser, yylloc, yytext);
+%}
+
+IDENT [a-z][a-zA-Z0-9_]+
+FUNC_IDENT [A-Z][a-z0-9_]+
+PROC_IDENT [A-Z]{2}[A-Z0-9_]*
+
+DATE_LITERAL [0-9]{4}-[0-9]{2}-[0-9]{2}
+INT_LITERAL -?[0-9]+('[0-9][0-9][0-9]+)*
+STRING \"(\\.|[^"\\])*\"
+
+%x SC_COMMENT
+
+%%
+"(%" {BEGIN(SC_COMMENT);}
+<SC_COMMENT>{
+ "(%" {parser->comment_nesting += 1;}
+ "%)" {
+ if (parser->comment_nesting)
+ parser->comment_nesting -= 1;
+ else
+ BEGIN(INITIAL);
+ }
+
+ /* magic to avoid lexer jamming on open braces */
+ "*"+ {}
+ [^(%\n]+ {}
+ [(] {}
+ \n {}
+}
+
+"(" {return LPAREN;}
+")" {return RPAREN;}
+"[" {return LSQUARE;}
+"]" {return RSQUARE;}
+"{" {return LCURLY;}
+"}" {return RCURLY;}
+
+"'" {return APOSTROPHE;}
+"&" {return AMPERSAND;}
+"," {return COMMA;}
+"." {return DOT;}
+"=" {return EQ;}
+"<" {return LT;}
+"+" {return PLUS;}
+"-" {return MINUS;}
+"*" {return MULT;}
+"/" {return DIV;}
+
+"var" {return VAR;}
+"is" {return IS;}
+"unless" {return UNLESS;}
+"otherwise" {return OTHERWISE;}
+"until" {return UNTIL;}
+"do" {return DO;}
+"done" {return DONE;}
+"procedure" {return PROCEDURE;}
+"function" {return FUNCTION;}
+"return" {return RETURN;}
+"print" {return PRINT;}
+"end" {return END;}
+
+{STRING} {
+ yylval->str = yytext;
+ return STRING;
+}
+
+{DATE_LITERAL} {
+ yylval->num = lex_date(parser, src_loc(*yylloc), yytext);
+ return DATE_LITERAL;
+}
+
+{INT_LITERAL} {
+ yylval->snum = lex_int(parser, src_loc(*yylloc), yytext);
+ return INT_LITERAL;
+}
+
+{IDENT} {
+ yylval->str = yytext;
+ return IDENT;
+}
+
+{FUNC_IDENT} {
+ yylval->str = yytext;
+ return FUNC_IDENT;
+}
+
+{PROC_IDENT} {
+ yylval->str = yytext;
+ return PROC_IDENT;
+}
+
+[[:space:]]+ {/* skip whitespace */}
+
+. {
+ lex_fail(parser, src_loc(*yylloc), "Unexpected token: %s", yytext);
+}
+%%
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..a40d841
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,28 @@
+#include <stdio.h>
+#include <posthaste/core.h>
+
+static void usage(FILE *f, const char *pname)
+{
+ fprintf(f, "Usage:\n %s <filename>\n", pname);
+}
+
+/**
+ * Main entry to posthaste.
+ * Checks command line and drives the rest of the language.
+ *
+ * Feels kind of weird documenting main, but doxygen warns about not
+ * doing it so whatever.
+ *
+ * @param argc Number of command line arguments.
+ * @param argv Array of command line arguments.
+ * @return \c 0 when succesful, non-zero otherwise.
+ */
+int main(int argc, char *argv[])
+{
+ if (argc != 2) {
+ usage(stderr, argv[0]);
+ return -1;
+ }
+
+ return run(argv[1]);
+}
diff --git a/src/parser.y b/src/parser.y
new file mode 100644
index 0000000..8a4ddb5
--- /dev/null
+++ b/src/parser.y
@@ -0,0 +1,243 @@
+%{
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+
+#include <posthaste/parser.h>
+#include <posthaste/date.h>
+
+#define FOREACH_TOKEN(M) \
+ M(LPAREN) \
+ M(RPAREN) \
+ M(LSQUARE) \
+ M(RSQUARE) \
+ M(LCURLY) \
+ M(RCURLY) \
+ M(APOSTROPHE) \
+ M(AMPERSAND) \
+ M(COMMA) \
+ M(DOT) \
+ M(EQ) \
+ M(LT) \
+ M(PLUS) \
+ M(MINUS) \
+ M(MULT) \
+ M(DIV) \
+ M(VAR) \
+ M(IS) \
+ M(UNLESS) \
+ M(OTHERWISE) \
+ M(UNTIL) \
+ M(DO) \
+ M(DONE) \
+ M(PROCEDURE) \
+ M(FUNCTION) \
+ M(RETURN) \
+ M(PRINT) \
+ M(END) \
+ M(STRING) \
+ M(DATE_LITERAL) \
+ M(INT_LITERAL) \
+ M(IDENT) \
+ M(FUNC_IDENT) \
+ M(PROC_IDENT)
+
+%}
+
+%locations
+
+%define parse.trace
+%define parse.error verbose
+%define api.pure full
+%define lr.type ielr
+
+%lex-param {void *scanner} {struct parser *parser}
+%parse-param {void *scanner} {struct parser* parser}
+
+%union {
+ struct ast_node *node;
+ ph_date_t num;
+ int64_t snum;
+ char *str;
+};
+
+%token <str> STRING
+%token <num> DATE_LITERAL
+%token <snum> INT_LITERAL
+%token <str> IDENT;
+%token <str> FUNC_IDENT;
+%token <str> PROC_IDENT;
+
+%token LPAREN "("
+%token RPAREN ")"
+%token LSQUARE "["
+%token RSQUARE "]"
+%token LCURLY "{"
+%token RCURLY "}"
+
+%token APOSTROPHE "'"
+%token AMPERSAND "&"
+%token COMMA ","
+%token DOT "."
+%token EQ "="
+%token LT "<"
+%token PLUS "+"
+%token MINUS "-"
+%token MULT "*"
+%token DIV "/"
+
+%token VAR "var"
+%token IS "is"
+%token UNLESS "unless"
+%token OTHERWISE "otherwise"
+%token UNTIL "until"
+%token DO "do"
+%token DONE "done"
+%token PROCEDURE "procedure"
+%token FUNCTION "function"
+%token RETURN "return"
+%token PRINT "print"
+%token END "end"
+
+%{
+
+/** Modifies the signature of yylex to fit our parser better. */
+#define YY_DECL int yylex(YYSTYPE *yylval, YYLTYPE *yylloc, \
+ void *yyscanner, struct parser *parser)
+
+/**
+ * Declare yylex.
+ *
+ * @param yylval Bison current value.
+ * @param yylloc Bison location info.
+ * @param yyscanner Flex scanner.
+ * @param parser Current parser state.
+ * @return \c 0 when succesful, \c 1 otherwise.
+ * More info on yylex() can be found in the flex manual.
+ */
+YY_DECL;
+
+/**
+ * Convert bison location info to our own source location info.
+ *
+ * @param yylloc Bison location info.
+ * @return Internal location info.
+ */
+static struct src_loc src_loc(YYLTYPE yylloc);
+
+/**
+ * Print parsing error.
+ * Automatically called by bison.
+ *
+ * @param yylloc Location of error.
+ * @param lexer Lexer.
+ * @param parser Parser state.
+ * @param msg Message to print.
+ */
+static void yyerror(YYLTYPE *yylloc, void *lexer,
+ struct parser *parser, const char *msg);
+
+%}
+
+%start input;
+%%
+
+input: /* empty */
+
+%%
+
+#include "gen_lexer.inc"
+
+static void dump_yychar(struct parser *p, int yychar, YYSTYPE yylval, YYLTYPE yylloc)
+{
+ struct src_loc loc = src_loc(yylloc);
+ printf("%s:%d:%d: ", p->fname, loc.first_line, loc.first_col);
+
+#define PRINT_NAME(token) case token: printf(#token " "); break;
+ switch (yychar) {
+ FOREACH_TOKEN(PRINT_NAME);
+ default: printf("Unknown yychar\n"); return;
+ }
+
+ char date_str[11] = {0};
+ switch (yychar) {
+ case INT_LITERAL: printf("(%lld)", (long long int)yylval.snum); break;
+ case IDENT: printf("(%s)", yylval.str); break;
+ case FUNC_IDENT: printf("(%s)", yylval.str); break;
+ case PROC_IDENT: printf("(%s)", yylval.str); break;
+ case DATE_LITERAL:
+ date_to_string(date_str, yylval.num);
+ printf("(%s)", date_str);
+ break;
+ }
+
+ printf("\n");
+}
+
+static void dump_lex(struct parser *p)
+{
+ int yychar;
+ YYSTYPE yylval;
+ YYLTYPE yylloc = {1, 1, 1, 1};
+
+ /* run lexer until we reach the end of the file */
+ while ((yychar = yylex(&yylval, &yylloc, p->lexer, p)) != YYEOF) {
+ dump_yychar(p, yychar, yylval, yylloc);
+ }
+}
+
+static struct src_loc src_loc(YYLTYPE yylloc)
+{
+ struct src_loc loc;
+ loc.first_line = yylloc.first_line;
+ loc.last_line = yylloc.last_line;
+ loc.first_col = yylloc.first_column;
+ loc.last_col = yylloc.last_column;
+ return loc;
+}
+
+static void yyerror(YYLTYPE *yylloc, void *lexer,
+ struct parser *parser, const char *msg)
+{
+ (void)lexer;
+
+ struct src_issue issue;
+ issue.loc = src_loc(*yylloc);
+ issue.fname = parser->fname;
+ issue.buf = parser->buf;
+
+ src_issue(issue, msg);
+}
+
+struct parser *create_parser()
+{
+ return calloc(1, sizeof(struct parser));
+}
+
+void destroy_parser(struct parser *p)
+{
+ yylex_destroy(p->lexer);
+ free(p);
+}
+
+void parse(struct parser *p, const char *fname, const char *buf)
+{
+ p->fname = fname;
+ p->buf = buf;
+
+ p->comment_nesting = 0;
+
+ p->failed = false;
+
+ yylex_init(&p->lexer);
+
+ yy_scan_string(buf, p->lexer);
+
+ // debugging, remember to reset yy_scan_string once the actual parser
+ // runs
+ dump_lex(p);
+
+ // yyparse(p->lexer, p);
+}
diff --git a/src/source.mk b/src/source.mk
new file mode 100644
index 0000000..6a8d30f
--- /dev/null
+++ b/src/source.mk
@@ -0,0 +1,4 @@
+SRCS != echo src/*.c
+
+# rules for gen_parser.c are in scripts/makefile
+POSTHASTE_SOURCES += $(SRCS) gen/gen_parser.c