aboutsummaryrefslogtreecommitdiff
path: root/src/lexer.l
diff options
context:
space:
mode:
Diffstat (limited to 'src/lexer.l')
-rw-r--r--src/lexer.l191
1 files changed, 191 insertions, 0 deletions
diff --git a/src/lexer.l b/src/lexer.l
new file mode 100644
index 0000000..6f58bd5
--- /dev/null
+++ b/src/lexer.l
@@ -0,0 +1,191 @@
+%option reentrant noyywrap nounput noinput nodefault
+%{
+
+#include <stdio.h>
+#include <stdarg.h>
+
+#include <posthaste/parser.h>
+#include <posthaste/debug.h>
+
+static void update_yylloc(struct parser *parser, YYLTYPE *lloc, const char *text)
+{
+ (void)parser;
+
+ lloc->first_line = lloc->last_line;
+ lloc->first_column = lloc->last_column;
+
+ for (size_t i = 0; text[i] != 0; ++i) {
+ if (text[i] == '\n') {
+ lloc->last_line++;
+ /* flex uses 1 based indexing */
+ lloc->last_column = 1;
+ } else {
+ lloc->last_column++;
+ }
+ }
+}
+
+static void lex_fail(struct parser *p, struct src_loc loc, const char *msg, ...)
+{
+ va_list args;
+ va_start(args, msg);
+
+ struct src_issue issue;
+ issue.loc = loc;
+ issue.fname = p->fname;
+ issue.buf = p->buf;
+ vsrc_issue(issue, msg, args);
+
+ va_end(args);
+
+ p->failed = true;
+}
+
+static ph_date_t lex_date(struct parser *p, struct src_loc loc, const char *date)
+{
+ ph_date_t d = date_from_string(date);
+ if (!date_valid(d)) {
+ lex_fail(p, loc, "Not a valid date.");
+ return 0;
+ }
+
+ return d;
+}
+
+static int64_t lex_int(struct parser *p, struct src_loc loc, const char *num)
+{
+ bool neg = num[0] == '-';
+
+ /* jump over minus sign */
+ if (neg)
+ num += 1;
+
+ int64_t sum = 0;
+ int64_t power = 1;
+ size_t n = strlen(num);
+ if (n > 18) {
+ lex_fail(p, loc, "Literal integer too large");
+ return 0;
+ }
+
+ for (size_t i = 0; i < n; ++i) {
+ /* jump over separators */
+ char c = num[n - i - 1];
+ if (c == '\'')
+ continue;
+
+ /* decimal value at i:th least significant place */
+ int64_t d = c - '0';
+ sum += d * power;
+ power *= 10;
+ }
+
+ if (sum > 1000000000000) {
+ lex_fail(p, loc, "Literal integer too large");
+ return 0;
+ }
+
+ if (neg)
+ sum = -sum;
+
+ return sum;
+}
+
+#define YY_USER_ACTION update_yylloc(parser, yylloc, yytext);
+%}
+
+IDENT [a-z][a-zA-Z0-9_]+
+FUNC_IDENT [A-Z][a-z0-9_]+
+PROC_IDENT [A-Z]{2}[A-Z0-9_]*
+
+DATE_LITERAL [0-9]{4}-[0-9]{2}-[0-9]{2}
+INT_LITERAL -?[0-9]+('[0-9][0-9][0-9]+)*
+STRING \"(\\.|[^"\\])*\"
+
+%x SC_COMMENT
+
+%%
+"(%" {BEGIN(SC_COMMENT);}
+<SC_COMMENT>{
+ "(%" {parser->comment_nesting += 1;}
+ "%)" {
+ if (parser->comment_nesting)
+ parser->comment_nesting -= 1;
+ else
+ BEGIN(INITIAL);
+ }
+
+ /* magic to avoid lexer jamming on open braces */
+ "*"+ {}
+ [^(%\n]+ {}
+ [(] {}
+ \n {}
+}
+
+"(" {return LPAREN;}
+")" {return RPAREN;}
+"[" {return LSQUARE;}
+"]" {return RSQUARE;}
+"{" {return LCURLY;}
+"}" {return RCURLY;}
+
+"'" {return APOSTROPHE;}
+"&" {return AMPERSAND;}
+"," {return COMMA;}
+"." {return DOT;}
+"=" {return EQ;}
+"<" {return LT;}
+"+" {return PLUS;}
+"-" {return MINUS;}
+"*" {return MULT;}
+"/" {return DIV;}
+
+"var" {return VAR;}
+"is" {return IS;}
+"unless" {return UNLESS;}
+"otherwise" {return OTHERWISE;}
+"until" {return UNTIL;}
+"do" {return DO;}
+"done" {return DONE;}
+"procedure" {return PROCEDURE;}
+"function" {return FUNCTION;}
+"return" {return RETURN;}
+"print" {return PRINT;}
+"end" {return END;}
+
+{STRING} {
+ yylval->str = yytext;
+ return STRING;
+}
+
+{DATE_LITERAL} {
+ yylval->num = lex_date(parser, src_loc(*yylloc), yytext);
+ return DATE_LITERAL;
+}
+
+{INT_LITERAL} {
+ yylval->snum = lex_int(parser, src_loc(*yylloc), yytext);
+ return INT_LITERAL;
+}
+
+{IDENT} {
+ yylval->str = yytext;
+ return IDENT;
+}
+
+{FUNC_IDENT} {
+ yylval->str = yytext;
+ return FUNC_IDENT;
+}
+
+{PROC_IDENT} {
+ yylval->str = yytext;
+ return PROC_IDENT;
+}
+
+[[:space:]]+ {/* skip whitespace */}
+
+. {
+ lex_fail(parser, src_loc(*yylloc), "Unexpected token: %s", yytext);
+}
+%%