/* SPDX-License-Identifier: copyleft-next-0.3.1 */
/* Copyright 2024 Kim Kuparinen < kimi.h.kuparinen@gmail.com > */

%{

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

#include <fwd/parser.h>

%}

%locations

%define parse.trace
%define parse.error verbose
%define api.pure full
%define lr.type ielr

%lex-param {void *scanner} {struct parser *parser}
%parse-param {void *scanner} {struct parser* parser}

%union {
	struct ast *node;
	struct type *type;
	double dbl;
	long long integer;
	char *str;
};

%token <integer> INT
%token <integer> CHAR
%token <integer> BOOL
%token <dbl> FLOAT
%token <str> STRING
%token <str> ID
%token <str> APPLY

%token QUESTION "?"
%token SQUOTE "'"
%token TO "="
%token ELLIPSIS "..."
%token SEMICOLON ";"
%token COLON ":"
%token BANG "!"
%token SIZEOF "sizeof"
%token STAR "*"
%token DIV "/"
%token REM "%"
%token MINUS "-"
%token PLUS "+"
%token XOR "^"
%token AND "&"
%token BAR "|"
%token TILDE "~"
%token LT "<"
%token GT ">"
%token LE "<="
%token GE ">="
%token NE "!="
%token EQ "=="
%token LSHIFT "<<"
%token RSHIFT ">>"
%token COMMA ","
%token LPAREN "("
%token RPAREN ")"
%token LBRACE "{"
%token RBRACE "}"
%token LBRACKET "["
%token RBRACKET "]"
%token IF "if"
%token ELSE "else"
%token NIL "nil"
%token PUB "pub"
%token MUT "mut"
%token CONTINUE "continue"
%token IMPORT "import"
%token ERROR "error"
%token DOT "."
%token SCOPE "::"
%token FATARROW "=>"

%right "[" "]"
/* precedence */
%left ","
%right "=" "+=" "-=" "*=" "/=" "%=" "<<=" ">>="
%left "==" "!="
%left "<" ">" "<=" ">="
%left "^"
%left "&"
%left "<<" ">>"
%left "+" "-"
%left "*" "/" "%"
%left "as" "sizeof"
%right "'" "!" "~"
%left "." "=>" "(" ")"
%left "::"

%nterm <node> top unit proc proc_decl call closure var expr statement body
%nterm <node> vars exprs statements closures trailing_closure
%nterm <node> opt_vars opt_exprs opt_statements opt_trailing_closure
%nterm <node> rev_vars rev_exprs rev_closures rev_statements
%nterm <node> let if nil unop binop construct import

%nterm <node> struct struct_cont trait
%nterm <node> type_param type_params opt_type_params
%nterm <node> behaviour behaviours opt_behaviours
%nterm <node> member members opt_members

%nterm <type> type rev_types types opt_types



%{

/** Modifies the signature of yylex to fit our parser better. */
#define YY_DECL int yylex(YYSTYPE *yylval, YYLTYPE *yylloc, \
	                  void *yyscanner, struct parser *parser)

/**
 * Declare yylex.
 *
 * @param yylval Bison current value.
 * @param yylloc Bison location info.
 * @param yyscanner Flex scanner.
 * @param parser Current parser state.
 * @return \c 0 when succesful, \c 1 otherwise.
 * More info on yylex() can be found in the flex manual.
 */
YY_DECL;

/**
 * Gobble tokens until we reach the next interesting feature.
 * Interesting features are generally new statements.
 * Mainly intended for trying to get to a sensible
 * location to continue parser after an error has occured.
 *
 * @param yylval Current parser value.
 * @param yylloc Parser location info.
 * @param scanner Lex scanner.
 * @param parser Current parser.
 * @return \c 0 on success, non-zero otherwise.
 */
static int next_interesting_feature(YYSTYPE *yylval, YYLTYPE *yylloc,
                             void *scanner, struct parser *parser);

/**
 * Convert bison location info to our own source location info.
 *
 * @param yylloc Bison location info.
 * @return Internal location info.
 */
static struct src_loc src_loc(YYLTYPE yylloc);

/**
 * Print parsing error.
 * Automatically called by bison.
 *
 * @param yylloc Location of error.
 * @param lexer Lexer.
 * @param parser Parser state.
 * @param msg Message to print.
 */
static void yyerror(YYLTYPE *yylloc, void *lexer,
		struct parser *parser, const char *msg);

/**
 * Try to convert escape code to its actual value.
 * I.e. '\n' -> 0x0a.
 *
 * @param c Escape character without backslash.
 * @return Corresponding value.
 */
static char match_escape(char c);

/**
 * Similar to strdup() but skips quotation marks that would
 * otherwise be included.
 * I.e. "something" -> something.
 *
 * @param s String to clone, with quotation marks surrounding it.
 * @return Identical string but without quotation marks around it.
 */
static char *strip(const char *s);

%}

%start input;
%%
var
	: type ID { $$ = gen_var($2, $1, src_loc(@$)); }

rev_vars
	: rev_vars "," var { $$ = $3; $$->n = $1; }
	| rev_vars "|" var { $$ = $3; $$->n = $1; opt_group($1, $3); }
	| var

vars
	: rev_vars { $$ = reverse_ast_list($1); }
	| rev_vars "," { $$ = reverse_ast_list($1); }

opt_vars
	: vars
	| {$$ = NULL;}

proc
	: ID "(" opt_vars ")" body {
		$$ = gen_proc($[ID], $[opt_vars], NULL, $[body], src_loc(@$));
	}

proc_decl
	: ID "(" opt_vars ")" ";" {
		$$ = gen_proc($[ID], $[opt_vars], NULL, NULL, src_loc(@$));
	}

binop
	: expr "+"  expr { $$ = gen_binop(AST_ADD, $1, $3, src_loc(@$));  }
	| expr "-"  expr { $$ = gen_binop(AST_SUB, $1, $3, src_loc(@$));  }
	| expr "*"  expr { $$ = gen_binop(AST_MUL, $1, $3, src_loc(@$));  }
	| expr "/"  expr { $$ = gen_binop(AST_DIV, $1, $3, src_loc(@$));  }
	| expr "%"  expr { $$ = gen_binop(AST_REM, $1, $3, src_loc(@$));  }
	| expr "<"  expr { $$ = gen_comparison(AST_LT, $1, $3, src_loc(@$));  }
	| expr ">"  expr { $$ = gen_comparison(AST_GT, $1, $3, src_loc(@$));  }
	| expr "<<" expr { $$ = gen_binop(AST_LSHIFT, $1, $3, src_loc(@$));  }
	| expr ">>" expr { $$ = gen_binop(AST_RSHIFT, $1, $3, src_loc(@$));  }
	| expr "<=" expr { $$ = gen_comparison(AST_LE, $1, $3, src_loc(@$));  }
	| expr ">=" expr { $$ = gen_comparison(AST_GE, $1, $3, src_loc(@$));  }
	| expr "!=" expr { $$ = gen_comparison(AST_NE, $1, $3, src_loc(@$));  }
	| expr "==" expr { $$ = gen_comparison(AST_EQ, $1, $3, src_loc(@$));  }

unop
	: "-" expr { $$ = gen_unop(AST_NEG, $2, src_loc(@$));  }
	| "!" expr { $$ = gen_unop(AST_LNOT, $2, src_loc(@$));  }

type
	: ID { $$ = tgen_id($[ID], src_loc(@$)); }
	| APPLY "[" opt_types "]" {
		$$ = tgen_construct($[APPLY], $[opt_types], src_loc(@$));
	}
	| "(" opt_types ")" { $$ = tgen_closure($2, src_loc(@$)); }
	| "&&" type {
		if ($2->k == TYPE_CLOSURE) {
			$$ = $2; $$->k = TYPE_PURE_CLOSURE;
		} else {
			$$ = tgen_ref($2, src_loc(@$));
		}

		/* heh */
		$$ = tgen_ref($$, src_loc(@$));
	}
	| "&" type {
		if ($2->k == TYPE_CLOSURE) {
			$$ = $2; $$->k = TYPE_PURE_CLOSURE;
		} else {
			$$ = tgen_ref($2, src_loc(@$));
		}
	}
	| "*" type {
		if ($2->k == TYPE_CLOSURE) {
			$$ = $2; $$->k = TYPE_FUNC_PTR;
		} else {
			$$ = tgen_ptr($2, src_loc(@$));
		}
	}

rev_types
	: rev_types "," type { $$ = $3; $$->n = $1; }
	| type

types
	: rev_types { $$ = reverse_type_list($1); }

opt_types
	: types
	| { $$ = NULL; }

construct
	: APPLY "{" opt_exprs "}" {
		$$ = gen_init($[APPLY], NULL, $[opt_exprs], src_loc(@$));
	}
	| APPLY "[" opt_types "]" "{" opt_exprs "}" {
		$$ = gen_init($[APPLY], $[opt_types], $[opt_exprs], src_loc(@$));
	}

expr
	: expr "." ID { $$ = gen_dot($3, $1, src_loc(@$)); }
	| STRING { $$ = gen_const_str(strip($1), src_loc(@$)); }
	| FLOAT { $$ = gen_const_float($1, src_loc(@$)); }
	| BOOL { $$ = gen_const_bool($1, src_loc(@$)); }
	| CHAR { $$ = gen_const_char($1, src_loc(@$)); }
	| INT { $$ = gen_const_int($1, src_loc(@$)); }
	| ID { $$ = gen_id($1, src_loc(@$)); }
	| "(" expr ")" { $$ = $2; }
	| expr "&" { $$ = gen_ref($1, src_loc(@$)); }
	| expr "*" { $$ = gen_deref($1, src_loc(@$)); }
	| construct
	| binop
	| unop

rev_exprs
	: rev_exprs "," expr { $$ = $3; $3->n = $1; }
	| expr

exprs
	: rev_exprs { $$ = reverse_ast_list($1); }

opt_exprs
	: exprs
	| { $$ = NULL; }

closure
	: "=>" opt_vars body {
		$$ = gen_closure($[opt_vars], $[body], src_loc(@$));
	}
	| "&" "=>" opt_vars body {
		$$ = gen_closure($[opt_vars], $[body], src_loc(@$));
		ast_set_flags($$, AST_FLAG_NOMOVES);
	}

rev_closures
	: rev_closures closure { $$ = $2; $2->n = $1; }
	| closure

trailing_closure
	: "=>" opt_vars ";" { $$ = gen_closure($[opt_vars], NULL, src_loc(@$));}
	| "&" "=>" opt_vars ";" {
		$$ = gen_closure($[opt_vars], NULL, src_loc(@$));
		ast_set_flags($$, AST_FLAG_NOMOVES);
	}

opt_trailing_closure
	: trailing_closure
	| { $$ = NULL; }

closures
	: rev_closures opt_trailing_closure {
		if ($[opt_trailing_closure]) {
			$[opt_trailing_closure]->n = $[rev_closures];
			$$ = reverse_ast_list($[opt_trailing_closure]);
		} else {
			$$ = reverse_ast_list($[rev_closures]);
		}
	}

call
	: expr "(" opt_exprs ")" ";" {
		$$ = gen_call($1, $[opt_exprs], src_loc(@$));
	}
	| expr "(" opt_exprs ")" "=>" opt_vars ";" {
		/* the rest of the function body is our closure, gets fixed up
		 * later */
		struct ast *closure = gen_closure($[opt_vars], NULL, src_loc(@$));
		ast_append(&$[opt_exprs], closure);
		$$ = gen_call($[expr], $[opt_exprs], src_loc(@$));
	}
	| expr "(" opt_exprs ")" closures {
		ast_append(&$[opt_exprs], $[closures]);
		$$ = gen_call($[expr], $[opt_exprs], src_loc(@$));
	}

let
	: expr "=>" var ";" { $$ = gen_let($3, $1, src_loc(@$)); }

if
	: "if" expr body { $$ = gen_if($2, $3, NULL, src_loc(@$)); }
	| "if" expr body "else" body { $$ = gen_if($2, $3, $5, src_loc(@$)); }
	| "if" expr body "else" if { $$ = gen_if($2, $3, $5, src_loc(@$)); }

nil
	: "nil" ID body "=>" var {
		$$ = gen_nil($2, $3, $5, src_loc(@$));
	}

statement
	: call
	| body
	| let
	| nil
	| if
	| ";" { $$ = gen_empty(src_loc(@$)); }

rev_statements
	: rev_statements statement { $$ = $2; $2->n = $1; }
	| statement

statements
	: rev_statements { $$ = reverse_ast_list($1); fix_closures($$); }

opt_statements
	: statements
	| { $$ = NULL; }

body
	: "{" opt_statements "}" {
		$$ = gen_block($2, src_loc(@$));
	}

behaviour
	: APPLY ";" { $$ = gen_trait_apply($1, src_loc(@$)); }
	| proc_decl ";"
	| proc

behaviours
	: behaviours behaviour { $$ = $1; $1->n = $2; }
	| behaviour

opt_behaviours
	: behaviours
	| { $$ = NULL; }

member
	: var ";"
	| behaviour

members
	: members member { $$ = $1; $1->n = $2; }
	| member

opt_members
	: members
	| { $$ = NULL; }

type_param
	: ID ID {
		struct type *t = tgen_id($1, src_loc(@1));
		$$ = gen_var($2, t, src_loc(@$));
	}

type_params
	: type_param "," type_params {
		$$ = $1; $1->n = $3;
	}
	| type_param

opt_type_params
	: type_params
	| { $$ = NULL; }

struct
	: ID "[" opt_type_params "]" "{" opt_members "}" {
		$$ = gen_struct($1, $3, $6, src_loc(@$));
	}

struct_cont
	: "continue" ID "[" opt_type_params "]" "{" opt_behaviours "}" {
		$$ = gen_struct_cont($2, $4, $7, src_loc(@$));
	}

trait
	: ID "{" opt_behaviours "}" {
		$$ = gen_trait($1, $3, src_loc(@$));
	}

import
	: "import" STRING {
		$$ = gen_import(strip($2), src_loc(@$));
	}

top
	: proc
	| struct
	| struct_cont
	| import
	| trait
	| "pub" proc        { $$ = $2; ast_set_flags($$, AST_FLAG_PUBLIC); }
	| "pub" struct      { $$ = $2; ast_set_flags($$, AST_FLAG_PUBLIC); }
	| "pub" struct_cont { $$ = $2; ast_set_flags($$, AST_FLAG_PUBLIC); }
	| "pub" import      { $$ = $2; ast_set_flags($$, AST_FLAG_PUBLIC); }
	| "pub" trait       { $$ = $2; ast_set_flags($$, AST_FLAG_PUBLIC); }
	| error {
	    $$ = gen_empty(src_loc(@$));
	    parser->failed = true;
	    /* ignore any content inside a top level thing and just move onto
	     * the next one */
	    if (next_interesting_feature(&yylval, &yylloc, scanner, parser)) {
		YYABORT;
	    }
	    yyclearin;
	    yyerrok;
	}

unit
	: top { $$ = $1; }
	| unit top { $$ = $2; $2->n = $1; }

input
	: unit { parser->tree = reverse_ast_list($1); }
	| /* empty */

%%

#include "gen_lexer.inc"

/* I'm not convinced this is foolproof quite yet, more testing would be nice. */
static int next_interesting_feature(YYSTYPE *yylval, YYLTYPE *yylloc,
				void *scanner, struct parser *parser)
{
	size_t depth = 0;
	while (1) {
		int ret = yylex(yylval, yylloc, scanner, parser);
		if (ret == LBRACE) {
			depth++;
			continue;
		}

		if (ret == RBRACE && depth > 0)
			depth--;

		if (ret == RBRACE && depth == 0)
			return 0;

		if (ret == SEMICOLON && depth == 0)
			return 0;

		/* return fatal error and parser should abort */
		if (ret == YYEOF)
			/* some error for unmatched braces would be cool I think */
			return 1;
	}
}


static struct src_loc src_loc(YYLTYPE yylloc)
{
	struct src_loc loc;
	loc.first_line = yylloc.first_line;
	loc.last_line = yylloc.last_line;
	loc.first_col = yylloc.first_column;
	loc.last_col = yylloc.last_column;
	return loc;
}

static void yyerror(YYLTYPE *yylloc, void *lexer,
		struct parser *parser, const char *msg)
{
	(void)lexer;

	struct src_issue issue;
	issue.level = SRC_ERROR;
	issue.loc = src_loc(*yylloc);
	issue.fctx.fbuf = parser->buf;
	issue.fctx.fname = parser->fname;
	src_issue(issue, msg);
}

static char match_escape(char c)
{
	switch (c) {
	case '\'': return '\'';
	case '\\': return '\\';
	case 'a': return '\a';
	case 'b': return '\b';
	case 'f': return '\f';
	case 'n': return '\n';
	case 'r': return '\r';
	case 't': return '\t';
	case 'v': return '\v';
	}

	return c;
}

static char *strip(const char *str)
{
	const size_t len = strlen(str) + 1;
	char *buf = malloc(len);
	if (!buf) {
		/* should probably try to handle the error in some way... */
		internal_error("failed allocating buffer for string clone");
		free((void *)str);
		return NULL;
	}

	/* skip quotation marks */
	size_t j = 0;
	for (size_t i = 1; i < len - 2; ++i)
		buf[j++] = str[i];

	buf[j] = 0;
	free((void *)str);
	return buf;

}

struct parser *create_parser()
{
	return calloc(1, sizeof(struct parser));
}

void destroy_parser(struct parser *p)
{
	yylex_destroy(p->lexer);
	free(p);
}

void parse(struct parser *p, const char *fname, const char *buf)
{
	p->fname = fname;
	p->buf = buf;

	p->comment_nesting = 0;

	p->failed = false;

	yylex_init(&p->lexer);
	yy_scan_string(buf, p->lexer);
	yyparse(p->lexer, p);
}