use crate::error::Error;
use crate::lexer::{Token, TokenKind};
use crate::syntax::{Expr, VarName};
enum Peek<'a> {
Eof,
Tok(&'a Token),
}
fn peek(tokens: &[Token], pos: usize) -> Peek<'_> {
tokens.get(pos).map_or(Peek::Eof, Peek::Tok)
}
pub fn parse(tokens: &[Token]) -> Result<Expr, Error> {
let (expr, end_pos) = parse_expr(tokens, 0)?;
tokens.get(end_pos).map_or(Ok(expr), |tok| {
Err(Error::UnexpectedToken {
at: tok.at(),
expected: "end of input",
found: format!("{}", tok.kind()),
})
})
}
fn parse_expr(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
parse_seq(tokens, pos)
}
fn parse_right_expr(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
match peek(tokens, pos) {
Peek::Eof => Err(Error::UnexpectedEnd {
expected: "expression",
}),
Peek::Tok(tok) => dispatch_right(tokens, pos, tok),
}
}
fn dispatch_right(tokens: &[Token], pos: usize, tok: &Token) -> Result<(Expr, usize), Error> {
match tok.kind() {
TokenKind::Lambda => parse_lambda(tokens, pos),
TokenKind::KwLet => parse_let(tokens, pos),
TokenKind::KwFix => parse_fix(tokens, pos),
TokenKind::Ident(_) | TokenKind::LParen | TokenKind::KwRef | TokenKind::Bang => {
parse_assign(tokens, pos)
}
TokenKind::KwIn
| TokenKind::Dot
| TokenKind::Equals
| TokenKind::RParen
| TokenKind::Assign
| TokenKind::Semicolon => Err(Error::UnexpectedToken {
at: tok.at(),
expected: "expression",
found: format!("{}", tok.kind()),
}),
}
}
fn parse_lambda(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let after_lambda = expect_kind(tokens, pos, &TokenKind::Lambda, "`\\`")?;
let (param, after_ident) = expect_ident(tokens, after_lambda)?;
let after_dot = expect_kind(tokens, after_ident, &TokenKind::Dot, "`.`")?;
let (body, after_body) = parse_expr(tokens, after_dot)?;
Ok((Expr::lam(param, body), after_body))
}
fn parse_let(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let after_let = expect_kind(tokens, pos, &TokenKind::KwLet, "keyword `let`")?;
let (name, after_name) = expect_ident(tokens, after_let)?;
let after_eq = expect_kind(tokens, after_name, &TokenKind::Equals, "`=`")?;
let (value, after_value) = parse_expr(tokens, after_eq)?;
let after_in = expect_kind(tokens, after_value, &TokenKind::KwIn, "keyword `in`")?;
let (body, after_body) = parse_expr(tokens, after_in)?;
Ok((Expr::bind(name, value, body), after_body))
}
fn parse_fix(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let after_fix = expect_kind(tokens, pos, &TokenKind::KwFix, "keyword `fix`")?;
let (name, after_name) = expect_ident(tokens, after_fix)?;
let after_dot = expect_kind(tokens, after_name, &TokenKind::Dot, "`.`")?;
let (body, after_body) = parse_expr(tokens, after_dot)?;
Ok((Expr::fix(name, body), after_body))
}
fn parse_seq(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let (first, after_first) = parse_right_expr(tokens, pos)?;
parse_seq_tail(tokens, after_first, first)
}
fn parse_seq_tail(tokens: &[Token], pos: usize, lhs: Expr) -> Result<(Expr, usize), Error> {
let has_semi = matches!(
peek(tokens, pos),
Peek::Tok(t) if matches!(t.kind(), TokenKind::Semicolon)
);
if has_semi {
let (next, after_next) = parse_right_expr(tokens, pos + 1)?;
parse_seq_tail(tokens, after_next, Expr::seq(lhs, next))
} else {
Ok((lhs, pos))
}
}
fn parse_assign(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let (lhs, after_lhs) = parse_app(tokens, pos)?;
let has_assign = matches!(
peek(tokens, after_lhs),
Peek::Tok(t) if matches!(t.kind(), TokenKind::Assign)
);
if has_assign {
let (rhs, after_rhs) = parse_right_expr(tokens, after_lhs + 1)?;
Ok((Expr::assign(lhs, rhs), after_rhs))
} else {
Ok((lhs, after_lhs))
}
}
fn parse_app(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let (head, after_head) = parse_atom(tokens, pos)?;
parse_app_tail(tokens, after_head, head)
}
fn parse_app_tail(tokens: &[Token], pos: usize, lhs: Expr) -> Result<(Expr, usize), Error> {
let can_apply = starts_atom(tokens, pos);
if can_apply {
let (arg, after_arg) = parse_atom(tokens, pos)?;
parse_app_tail(tokens, after_arg, Expr::app(lhs, arg))
} else {
Ok((lhs, pos))
}
}
fn starts_atom(tokens: &[Token], pos: usize) -> bool {
match peek(tokens, pos) {
Peek::Eof => false,
Peek::Tok(tok) => matches!(
tok.kind(),
TokenKind::Ident(_) | TokenKind::LParen | TokenKind::KwRef | TokenKind::Bang
),
}
}
fn parse_atom(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
match peek(tokens, pos) {
Peek::Eof => Err(Error::UnexpectedEnd {
expected: "atom (identifier, `(`, `ref`, or `!`)",
}),
Peek::Tok(tok) => dispatch_atom(tokens, pos, tok),
}
}
fn dispatch_atom(tokens: &[Token], pos: usize, tok: &Token) -> Result<(Expr, usize), Error> {
match tok.kind() {
TokenKind::Ident(name) => Ok((Expr::Var(name.clone()), pos + 1)),
TokenKind::LParen => parse_paren_inner(tokens, pos + 1),
TokenKind::KwRef => parse_ref_prefix(tokens, pos + 1),
TokenKind::Bang => parse_bang_prefix(tokens, pos + 1),
TokenKind::Lambda
| TokenKind::KwLet
| TokenKind::KwFix
| TokenKind::KwIn
| TokenKind::Dot
| TokenKind::Equals
| TokenKind::RParen
| TokenKind::Assign
| TokenKind::Semicolon => Err(Error::UnexpectedToken {
at: tok.at(),
expected: "atom (identifier, `(`, `ref`, or `!`)",
found: format!("{}", tok.kind()),
}),
}
}
fn parse_paren_inner(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let (inner, after_inner) = parse_expr(tokens, pos)?;
let after_close = expect_kind(tokens, after_inner, &TokenKind::RParen, "`)`")?;
Ok((inner, after_close))
}
fn parse_ref_prefix(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let (inner, after_inner) = parse_atom(tokens, pos)?;
Ok((Expr::alloc(inner), after_inner))
}
fn parse_bang_prefix(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let (inner, after_inner) = parse_atom(tokens, pos)?;
Ok((Expr::deref(inner), after_inner))
}
fn expect_ident(tokens: &[Token], pos: usize) -> Result<(VarName, usize), Error> {
match peek(tokens, pos) {
Peek::Eof => Err(Error::UnexpectedEnd {
expected: "identifier",
}),
Peek::Tok(tok) => dispatch_expect_ident(tok, pos),
}
}
fn dispatch_expect_ident(tok: &Token, pos: usize) -> Result<(VarName, usize), Error> {
match tok.kind() {
TokenKind::Ident(name) => Ok((name.clone(), pos + 1)),
TokenKind::Lambda
| TokenKind::KwLet
| TokenKind::KwIn
| TokenKind::KwFix
| TokenKind::KwRef
| TokenKind::Dot
| TokenKind::Equals
| TokenKind::LParen
| TokenKind::RParen
| TokenKind::Bang
| TokenKind::Assign
| TokenKind::Semicolon => Err(Error::UnexpectedToken {
at: tok.at(),
expected: "identifier",
found: format!("{}", tok.kind()),
}),
}
}
fn expect_kind(
tokens: &[Token],
pos: usize,
expected: &TokenKind,
name: &'static str,
) -> Result<usize, Error> {
match peek(tokens, pos) {
Peek::Eof => Err(Error::UnexpectedEnd { expected: name }),
Peek::Tok(tok) => {
let kind_matches = tok.kind() == expected;
if kind_matches {
Ok(pos + 1)
} else {
Err(Error::UnexpectedToken {
at: tok.at(),
expected: name,
found: format!("{}", tok.kind()),
})
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::lexer::lex;
fn parse_str(src: &str) -> Result<Expr, Error> {
let tokens = lex(src)?;
parse(&tokens)
}
#[test]
fn ref_and_deref() -> Result<(), Error> {
let e = parse_str(r"!ref x")?;
let expected = Expr::deref(Expr::alloc(Expr::var("x")));
(e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
expected: "ref/deref AST",
})
}
#[test]
fn assign_is_lowest_in_app() -> Result<(), Error> {
let e = parse_str(r"f x := g y")?;
let expected = Expr::assign(
Expr::app(Expr::var("f"), Expr::var("x")),
Expr::app(Expr::var("g"), Expr::var("y")),
);
(e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
expected: "assign AST",
})
}
#[test]
fn semicolon_is_left_associative() -> Result<(), Error> {
let e = parse_str(r"x ; y ; z")?;
let expected = Expr::seq(Expr::seq(Expr::var("x"), Expr::var("y")), Expr::var("z"));
(e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
expected: "seq AST",
})
}
}