diff options
author | Kimplul <kimi.h.kuparinen@gmail.com> | 2024-12-03 22:04:38 +0200 |
---|---|---|
committer | Kimplul <kimi.h.kuparinen@gmail.com> | 2024-12-03 22:04:38 +0200 |
commit | 2253da61e9b3dd5408bed182ea08e5270156c17e (patch) | |
tree | 298bb06e681ec5366faa539906cae6e805fe5862 /src/lexer.l | |
download | fwd-2253da61e9b3dd5408bed182ea08e5270156c17e.tar.gz fwd-2253da61e9b3dd5408bed182ea08e5270156c17e.zip |
initial commit
+ Lots of code copied from ek, so didn't have to start from scratch, but
might mean there are some quirks here and there that made sense in ek
but not necessarily here.
Diffstat (limited to 'src/lexer.l')
-rw-r--r-- | src/lexer.l | 166 |
1 files changed, 166 insertions, 0 deletions
diff --git a/src/lexer.l b/src/lexer.l new file mode 100644 index 0000000..fe748e2 --- /dev/null +++ b/src/lexer.l @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: copyleft-next-0.3.1 */ +/* Copyright 2024 Kim Kuparinen < kimi.h.kuparinen@gmail.com > */ + +%option reentrant noyywrap nounput noinput nodefault +%{ +#define FROM_LEXER +#include <fwd/parser.h> +#include <fwd/debug.h> + +static void update_yylloc(struct parser *parser, YYLTYPE *lloc, const char *text) +{ + (void)parser; + + lloc->first_line = lloc->last_line; + lloc->first_column = lloc->last_column; + + for (size_t i = 0; text[i] != 0; ++i) { + if (text[i] == '\n') { + lloc->last_line++; + /* flex uses 1 based indexing */ + lloc->last_column = 1; + } else { + lloc->last_column++; + } + } +} + +#define YY_USER_ACTION update_yylloc(parser, yylloc, yytext); +%} + +HEX 0[xX][0-9a-fA-F]+ +DEC -?[0-9]+ +OCT 0[0-8]+ +BIN 0b[0-1]+ + +INT {HEX}|{DEC}|{OCT}|{BIN} + +HEXF [+-]?0[xX][0-9a-fA-F]+([pP][+-]?[0-9]+) +DECF [+-]?[0-9]+[.]([eE]?[+-]?[0-9]+)?[fF]? + +ID [_a-zA-Z][_a-zA-Z0-9]* +APPLY {ID}! + +STRING \"(\\.|[^"\\])*\" + +%x SC_COMMENT + +%% +"//".* {/* skip line comments */} + +"/*" {BEGIN(SC_COMMENT);} +<SC_COMMENT>{ + "/*" {++parser->comment_nesting;} + "*"+"/" { + if (parser->comment_nesting) + --parser->comment_nesting; + else + BEGIN(INITIAL); + } + + "*"+ {} + [^/*\n]+ {} + [/] {} + \n {} +} + +"::" {return SCOPE;} +"(" {return LPAREN;} +")" {return RPAREN;} +"{" {return LBRACE;} +"}" {return RBRACE;} +"[" {return LBRACKET;} +"]" {return RBRACKET;} +"." {return DOT;} +"," {return COMMA;} +";" {return SEMICOLON;} +":" {return COLON;} +"!" {return BANG;} + +"+" {return PLUS;} +"-" {return MINUS;} +"*" {return STAR;} +"/" {return DIV;} +"%" {return REM;} +"^" {return XOR;} + +"true" { + yylval->integer = 1; + return BOOL; +} + +"false" { + yylval->integer = 0; + return BOOL; +} + +'[^'\\]' { + /* regular character constant, 'a' */ + yylval->integer = yytext[1]; + return CHAR; +} + +'\\.' { + /* escaped character constant */ + yylval->integer = match_escape(yytext[2]); + return CHAR; +} + +"?" {return QUESTION;} +"'" {return SQUOTE;} + +"&" {return AND;} + +"~" {return TILDE;} +"=" {return TO;} +"<" {return LT;} +">" {return GT;} +"<=" {return LE;} +">=" {return GE;} +"!=" {return NE;} +"==" {return EQ;} + +"=>" {return FATARROW;} + +"<<" {return LSHIFT;} +">>" {return RSHIFT;} + +{STRING} { + /* seems risky, I know, but letting the parser choose when to allocate a + * new string seems to help with syntax error cleanup */ + yylval->str = strdup(yytext); + return STRING; +} + +{INT} { + yylval->integer = strtoll(yytext, 0, 0); + return INT; +} + +{ID} { + yylval->str = strdup(yytext); + return ID; +} + +{APPLY} { + /* strip trailing '!' */ + char *s = yytext + strlen(yytext); + s[-1] = '\0'; + + yylval->str = strdup(yytext); + return APPLY; +} + + +[[:space:]]+ {/* skip whitespace */} + +. { + struct src_issue issue; + issue.level = SRC_ERROR; + issue.loc = src_loc(*yylloc); + issue.fctx.fbuf = parser->buf; + issue.fctx.fname = parser->fname; + src_issue(issue, "Unexpected token: %s", yytext); + parser->failed = true; +} +%% |