diff options
Diffstat (limited to 'src/lexer.l')
-rw-r--r-- | src/lexer.l | 191 |
1 files changed, 191 insertions, 0 deletions
diff --git a/src/lexer.l b/src/lexer.l new file mode 100644 index 0000000..6f58bd5 --- /dev/null +++ b/src/lexer.l @@ -0,0 +1,191 @@ +%option reentrant noyywrap nounput noinput nodefault +%{ + +#include <stdio.h> +#include <stdarg.h> + +#include <posthaste/parser.h> +#include <posthaste/debug.h> + +static void update_yylloc(struct parser *parser, YYLTYPE *lloc, const char *text) +{ + (void)parser; + + lloc->first_line = lloc->last_line; + lloc->first_column = lloc->last_column; + + for (size_t i = 0; text[i] != 0; ++i) { + if (text[i] == '\n') { + lloc->last_line++; + /* flex uses 1 based indexing */ + lloc->last_column = 1; + } else { + lloc->last_column++; + } + } +} + +static void lex_fail(struct parser *p, struct src_loc loc, const char *msg, ...) +{ + va_list args; + va_start(args, msg); + + struct src_issue issue; + issue.loc = loc; + issue.fname = p->fname; + issue.buf = p->buf; + vsrc_issue(issue, msg, args); + + va_end(args); + + p->failed = true; +} + +static ph_date_t lex_date(struct parser *p, struct src_loc loc, const char *date) +{ + ph_date_t d = date_from_string(date); + if (!date_valid(d)) { + lex_fail(p, loc, "Not a valid date."); + return 0; + } + + return d; +} + +static int64_t lex_int(struct parser *p, struct src_loc loc, const char *num) +{ + bool neg = num[0] == '-'; + + /* jump over minus sign */ + if (neg) + num += 1; + + int64_t sum = 0; + int64_t power = 1; + size_t n = strlen(num); + if (n > 18) { + lex_fail(p, loc, "Literal integer too large"); + return 0; + } + + for (size_t i = 0; i < n; ++i) { + /* jump over separators */ + char c = num[n - i - 1]; + if (c == '\'') + continue; + + /* decimal value at i:th least significant place */ + int64_t d = c - '0'; + sum += d * power; + power *= 10; + } + + if (sum > 1000000000000) { + lex_fail(p, loc, "Literal integer too large"); + return 0; + } + + if (neg) + sum = -sum; + + return sum; +} + +#define YY_USER_ACTION update_yylloc(parser, yylloc, yytext); +%} + +IDENT [a-z][a-zA-Z0-9_]+ +FUNC_IDENT [A-Z][a-z0-9_]+ +PROC_IDENT [A-Z]{2}[A-Z0-9_]* + +DATE_LITERAL [0-9]{4}-[0-9]{2}-[0-9]{2} +INT_LITERAL -?[0-9]+('[0-9][0-9][0-9]+)* +STRING \"(\\.|[^"\\])*\" + +%x SC_COMMENT + +%% +"(%" {BEGIN(SC_COMMENT);} +<SC_COMMENT>{ + "(%" {parser->comment_nesting += 1;} + "%)" { + if (parser->comment_nesting) + parser->comment_nesting -= 1; + else + BEGIN(INITIAL); + } + + /* magic to avoid lexer jamming on open braces */ + "*"+ {} + [^(%\n]+ {} + [(] {} + \n {} +} + +"(" {return LPAREN;} +")" {return RPAREN;} +"[" {return LSQUARE;} +"]" {return RSQUARE;} +"{" {return LCURLY;} +"}" {return RCURLY;} + +"'" {return APOSTROPHE;} +"&" {return AMPERSAND;} +"," {return COMMA;} +"." {return DOT;} +"=" {return EQ;} +"<" {return LT;} +"+" {return PLUS;} +"-" {return MINUS;} +"*" {return MULT;} +"/" {return DIV;} + +"var" {return VAR;} +"is" {return IS;} +"unless" {return UNLESS;} +"otherwise" {return OTHERWISE;} +"until" {return UNTIL;} +"do" {return DO;} +"done" {return DONE;} +"procedure" {return PROCEDURE;} +"function" {return FUNCTION;} +"return" {return RETURN;} +"print" {return PRINT;} +"end" {return END;} + +{STRING} { + yylval->str = yytext; + return STRING; +} + +{DATE_LITERAL} { + yylval->num = lex_date(parser, src_loc(*yylloc), yytext); + return DATE_LITERAL; +} + +{INT_LITERAL} { + yylval->snum = lex_int(parser, src_loc(*yylloc), yytext); + return INT_LITERAL; +} + +{IDENT} { + yylval->str = yytext; + return IDENT; +} + +{FUNC_IDENT} { + yylval->str = yytext; + return FUNC_IDENT; +} + +{PROC_IDENT} { + yylval->str = yytext; + return PROC_IDENT; +} + +[[:space:]]+ {/* skip whitespace */} + +. { + lex_fail(parser, src_loc(*yylloc), "Unexpected token: %s", yytext); +} +%% |