%option reentrant noyywrap nounput noinput nodefault %{ #include #include #include #include static void update_yylloc(struct parser *parser, YYLTYPE *lloc, const char *text) { (void)parser; lloc->first_line = lloc->last_line; lloc->first_column = lloc->last_column; for (size_t i = 0; text[i] != 0; ++i) { if (text[i] == '\n') { lloc->last_line++; /* flex uses 1 based indexing */ lloc->last_column = 1; } else { lloc->last_column++; } } } static void lex_fail(struct parser *p, struct src_loc loc, const char *msg, ...) { va_list args; va_start(args, msg); struct src_issue issue; issue.loc = loc; issue.fname = p->fname; issue.buf = p->buf; vsrc_issue(issue, msg, args); va_end(args); p->failed = true; } static ph_date_t lex_date(struct parser *p, struct src_loc loc, const char *date) { ph_date_t d = date_from_string(date); if (!date_valid(d)) { lex_fail(p, loc, "Not a valid date."); return 0; } return d; } static int64_t lex_int(struct parser *p, struct src_loc loc, const char *num) { bool neg = num[0] == '-'; /* jump over minus sign */ if (neg) num += 1; int64_t sum = 0; int64_t power = 1; size_t n = strlen(num); if (n > 18) { lex_fail(p, loc, "Literal integer too large"); return 0; } for (size_t i = 0; i < n; ++i) { /* jump over separators */ char c = num[n - i - 1]; if (c == '\'') continue; /* decimal value at i:th least significant place */ int64_t d = c - '0'; sum += d * power; power *= 10; } if (sum > 1000000000000) { lex_fail(p, loc, "Literal integer too large"); return 0; } if (neg) sum = -sum; return sum; } #define YY_USER_ACTION update_yylloc(parser, yylloc, yytext); %} IDENT [a-z][a-zA-Z0-9_]+ FUNC_IDENT [A-Z][a-z0-9_]+ PROC_IDENT [A-Z]{2}[A-Z0-9_]* DATE_LITERAL [0-9]{4}-[0-9]{2}-[0-9]{2} INT_LITERAL -?[0-9]+('[0-9][0-9][0-9]+)* STRING \"(\\.|[^"\\])*\" %x SC_COMMENT %% "(%" {BEGIN(SC_COMMENT);} { "(%" {parser->comment_nesting += 1;} "%)" { if (parser->comment_nesting) parser->comment_nesting -= 1; else BEGIN(INITIAL); } /* magic to avoid lexer jamming on open braces */ "*"+ {} [^(%\n]+ {} [(] {} \n {} } "(" {return LPAREN;} ")" {return RPAREN;} "[" {return LSQUARE;} "]" {return RSQUARE;} "{" {return LCURLY;} "}" {return RCURLY;} "'" {return APOSTROPHE;} "&" {return AMPERSAND;} "," {return COMMA;} "." {return DOT;} "=" {return EQ;} "<" {return LT;} "+" {return PLUS;} "-" {return MINUS;} "*" {return MULT;} "/" {return DIV;} "var" {return VAR;} "is" {return IS;} "unless" {return UNLESS;} "otherwise" {return OTHERWISE;} "until" {return UNTIL;} "do" {return DO;} "done" {return DONE;} "procedure" {return PROCEDURE;} "function" {return FUNCTION;} "return" {return RETURN;} "print" {return PRINT;} "end" {return END;} {STRING} { /* skip quotation marks */ yylval->str = strdup(yytext + 1); yylval->str[strlen(yylval->str) - 1] = '\0'; return STRING; } {DATE_LITERAL} { yylval->num = lex_date(parser, src_loc(*yylloc), yytext); return DATE_LITERAL; } {INT_LITERAL} { yylval->snum = lex_int(parser, src_loc(*yylloc), yytext); return INT_LITERAL; } {IDENT} { yylval->str = strdup(yytext); return IDENT; } {FUNC_IDENT} { yylval->str = strdup(yytext); return FUNC_IDENT; } {PROC_IDENT} { yylval->str = strdup(yytext); return PROC_IDENT; } [[:space:]]+ {/* skip whitespace */} . { lex_fail(parser, src_loc(*yylloc), "Unexpected token: %s", yytext); } %%