initial commit

+ Lots of code copied from ek, so didn't have to start from scratch, but might mean there are some quirks here and there that made sense in ek but not necessarily here.
author: Kimplul <kimi.h.kuparinen@gmail.com> 2024-12-03 22:04:38 +0200
committer: Kimplul <kimi.h.kuparinen@gmail.com> 2024-12-03 22:04:38 +0200
commit: 2253da61e9b3dd5408bed182ea08e5270156c17e (patch)
tree: 298bb06e681ec5366faa539906cae6e805fe5862 /src/lexer.l
download: fwd-2253da61e9b3dd5408bed182ea08e5270156c17e.tar.gz
fwd-2253da61e9b3dd5408bed182ea08e5270156c17e.zip
1 files changed, 166 insertions, 0 deletions
diff --git a/src/lexer.l b/src/lexer.l
new file mode 100644
index 0000000..fe748e2
--- /dev/null
+++ b/src/lexer.l
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: copyleft-next-0.3.1 */
+/* Copyright 2024 Kim Kuparinen < kimi.h.kuparinen@gmail.com > */
+
+%option reentrant noyywrap nounput noinput nodefault
+%{
+#define FROM_LEXER
+#include <fwd/parser.h>
+#include <fwd/debug.h>
+
+static void update_yylloc(struct parser *parser, YYLTYPE *lloc, const char *text)
+{
+	(void)parser;
+
+	lloc->first_line = lloc->last_line;
+	lloc->first_column = lloc->last_column;
+
+	for (size_t i = 0; text[i] != 0; ++i) {
+		if (text[i] == '\n') {
+			lloc->last_line++;
+			/* flex uses 1 based indexing */
+			lloc->last_column = 1;
+		} else {
+			lloc->last_column++;
+		}
+	}
+}
+
+#define YY_USER_ACTION update_yylloc(parser, yylloc, yytext);
+%}
+
+HEX		0[xX][0-9a-fA-F]+
+DEC		-?[0-9]+
+OCT		0[0-8]+
+BIN		0b[0-1]+
+
+INT		{HEX}|{DEC}|{OCT}|{BIN}
+
+HEXF		[+-]?0[xX][0-9a-fA-F]+([pP][+-]?[0-9]+)
+DECF		[+-]?[0-9]+[.]([eE]?[+-]?[0-9]+)?[fF]?
+
+ID		[_a-zA-Z][_a-zA-Z0-9]*
+APPLY		{ID}!
+
+STRING		\"(\\.|[^"\\])*\"
+
+%x SC_COMMENT
+
+%%
+"//".* {/* skip line comments */}
+
+"/*"	{BEGIN(SC_COMMENT);}
+<SC_COMMENT>{
+	"/*"	{++parser->comment_nesting;}
+	"*"+"/"	{
+		if (parser->comment_nesting)
+			--parser->comment_nesting;
+		else
+			BEGIN(INITIAL);
+	}
+
+	"*"+ {}
+	[^/*\n]+ {}
+	[/] {}
+	\n {}
+}
+
+"::"		{return SCOPE;}
+"("		{return LPAREN;}
+")"		{return RPAREN;}
+"{"		{return LBRACE;}
+"}"		{return RBRACE;}
+"["		{return LBRACKET;}
+"]"		{return RBRACKET;}
+"."		{return DOT;}
+","		{return COMMA;}
+";"		{return SEMICOLON;}
+":"		{return COLON;}
+"!"		{return BANG;}
+
+"+"		{return PLUS;}
+"-"		{return MINUS;}
+"*"		{return STAR;}
+"/"		{return DIV;}
+"%"		{return REM;}
+"^"		{return XOR;}
+
+"true" {
+	yylval->integer = 1;
+	return BOOL;
+}
+
+"false" {
+	yylval->integer = 0;
+	return BOOL;
+}
+
+'[^'\\]'	{
+	/* regular character constant, 'a' */
+	yylval->integer = yytext[1];
+	return CHAR;
+}
+
+'\\.' {
+	/* escaped character constant */
+	yylval->integer = match_escape(yytext[2]);
+	return CHAR;
+}
+
+"?"		{return QUESTION;}
+"'"		{return SQUOTE;}
+
+"&"		{return AND;}
+
+"~"		{return TILDE;}
+"="		{return TO;}
+"<"		{return LT;}
+">"		{return GT;}
+"<="		{return LE;}
+">="		{return GE;}
+"!="		{return NE;}
+"=="		{return EQ;}
+
+"=>"		{return FATARROW;}
+
+"<<"		{return LSHIFT;}
+">>"		{return RSHIFT;}
+
+{STRING} {
+	/* seems risky, I know, but letting the parser choose when to allocate a
+	 * new string seems to help with syntax error cleanup */
+	yylval->str = strdup(yytext);
+	return STRING;
+}
+
+{INT} {
+	yylval->integer = strtoll(yytext, 0, 0);
+	return INT;
+}
+
+{ID} {
+	yylval->str = strdup(yytext);
+	return ID;
+}
+
+{APPLY} {
+	/* strip trailing '!' */
+	char *s = yytext + strlen(yytext);
+	s[-1] = '\0';
+
+	yylval->str = strdup(yytext);
+	return APPLY;
+}
+
+
+[[:space:]]+	{/* skip whitespace */}
+
+. {
+	struct src_issue issue;
+	issue.level = SRC_ERROR;
+	issue.loc = src_loc(*yylloc);
+	issue.fctx.fbuf = parser->buf;
+	issue.fctx.fname = parser->fname;
+	src_issue(issue, "Unexpected token: %s", yytext);
+	parser->failed = true;
+}
+%%
author	Kimplul <kimi.h.kuparinen@gmail.com>	2024-12-03 22:04:38 +0200
committer	Kimplul <kimi.h.kuparinen@gmail.com>	2024-12-03 22:04:38 +0200
commit	2253da61e9b3dd5408bed182ea08e5270156c17e (patch)
tree	298bb06e681ec5366faa539906cae6e805fe5862 /src/lexer.l
download	fwd-2253da61e9b3dd5408bed182ea08e5270156c17e.tar.gz fwd-2253da61e9b3dd5408bed182ea08e5270156c17e.zip