diff options
Diffstat (limited to 'src/lexer.l')
-rw-r--r-- | src/lexer.l | 166 |
1 files changed, 166 insertions, 0 deletions
diff --git a/src/lexer.l b/src/lexer.l new file mode 100644 index 0000000..fe748e2 --- /dev/null +++ b/src/lexer.l @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: copyleft-next-0.3.1 */ +/* Copyright 2024 Kim Kuparinen < kimi.h.kuparinen@gmail.com > */ + +%option reentrant noyywrap nounput noinput nodefault +%{ +#define FROM_LEXER +#include <fwd/parser.h> +#include <fwd/debug.h> + +static void update_yylloc(struct parser *parser, YYLTYPE *lloc, const char *text) +{ + (void)parser; + + lloc->first_line = lloc->last_line; + lloc->first_column = lloc->last_column; + + for (size_t i = 0; text[i] != 0; ++i) { + if (text[i] == '\n') { + lloc->last_line++; + /* flex uses 1 based indexing */ + lloc->last_column = 1; + } else { + lloc->last_column++; + } + } +} + +#define YY_USER_ACTION update_yylloc(parser, yylloc, yytext); +%} + +HEX 0[xX][0-9a-fA-F]+ +DEC -?[0-9]+ +OCT 0[0-8]+ +BIN 0b[0-1]+ + +INT {HEX}|{DEC}|{OCT}|{BIN} + +HEXF [+-]?0[xX][0-9a-fA-F]+([pP][+-]?[0-9]+) +DECF [+-]?[0-9]+[.]([eE]?[+-]?[0-9]+)?[fF]? + +ID [_a-zA-Z][_a-zA-Z0-9]* +APPLY {ID}! + +STRING \"(\\.|[^"\\])*\" + +%x SC_COMMENT + +%% +"//".* {/* skip line comments */} + +"/*" {BEGIN(SC_COMMENT);} +<SC_COMMENT>{ + "/*" {++parser->comment_nesting;} + "*"+"/" { + if (parser->comment_nesting) + --parser->comment_nesting; + else + BEGIN(INITIAL); + } + + "*"+ {} + [^/*\n]+ {} + [/] {} + \n {} +} + +"::" {return SCOPE;} +"(" {return LPAREN;} +")" {return RPAREN;} +"{" {return LBRACE;} +"}" {return RBRACE;} +"[" {return LBRACKET;} +"]" {return RBRACKET;} +"." {return DOT;} +"," {return COMMA;} +";" {return SEMICOLON;} +":" {return COLON;} +"!" {return BANG;} + +"+" {return PLUS;} +"-" {return MINUS;} +"*" {return STAR;} +"/" {return DIV;} +"%" {return REM;} +"^" {return XOR;} + +"true" { + yylval->integer = 1; + return BOOL; +} + +"false" { + yylval->integer = 0; + return BOOL; +} + +'[^'\\]' { + /* regular character constant, 'a' */ + yylval->integer = yytext[1]; + return CHAR; +} + +'\\.' { + /* escaped character constant */ + yylval->integer = match_escape(yytext[2]); + return CHAR; +} + +"?" {return QUESTION;} +"'" {return SQUOTE;} + +"&" {return AND;} + +"~" {return TILDE;} +"=" {return TO;} +"<" {return LT;} +">" {return GT;} +"<=" {return LE;} +">=" {return GE;} +"!=" {return NE;} +"==" {return EQ;} + +"=>" {return FATARROW;} + +"<<" {return LSHIFT;} +">>" {return RSHIFT;} + +{STRING} { + /* seems risky, I know, but letting the parser choose when to allocate a + * new string seems to help with syntax error cleanup */ + yylval->str = strdup(yytext); + return STRING; +} + +{INT} { + yylval->integer = strtoll(yytext, 0, 0); + return INT; +} + +{ID} { + yylval->str = strdup(yytext); + return ID; +} + +{APPLY} { + /* strip trailing '!' */ + char *s = yytext + strlen(yytext); + s[-1] = '\0'; + + yylval->str = strdup(yytext); + return APPLY; +} + + +[[:space:]]+ {/* skip whitespace */} + +. { + struct src_issue issue; + issue.level = SRC_ERROR; + issue.loc = src_loc(*yylloc); + issue.fctx.fbuf = parser->buf; + issue.fctx.fname = parser->fname; + src_issue(issue, "Unexpected token: %s", yytext); + parser->failed = true; +} +%% |