use crate::error::Error;
use crate::lexer::{Token, TokenKind};
use crate::syntax::{Expr, VarName};
enum Peek<'a> {
Eof,
Tok(&'a Token),
}
fn peek(tokens: &[Token], pos: usize) -> Peek<'_> {
tokens.get(pos).map_or(Peek::Eof, Peek::Tok)
}
pub fn parse(tokens: &[Token]) -> Result<Expr, Error> {
let (expr, end_pos) = parse_expr(tokens, 0)?;
tokens.get(end_pos).map_or(Ok(expr), |tok| {
Err(Error::UnexpectedToken {
at: tok.at(),
expected: "end of input",
found: format!("{}", tok.kind()),
})
})
}
fn parse_expr(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
parse_seq(tokens, pos)
}
fn parse_right_expr(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
match peek(tokens, pos) {
Peek::Eof => Err(Error::UnexpectedEnd {
expected: "expression",
}),
Peek::Tok(tok) => dispatch_right(tokens, pos, tok),
}
}
fn dispatch_right(tokens: &[Token], pos: usize, tok: &Token) -> Result<(Expr, usize), Error> {
match tok.kind() {
TokenKind::Lambda => parse_lambda(tokens, pos),
TokenKind::KwLet => parse_let(tokens, pos),
TokenKind::KwFix => parse_fix(tokens, pos),
TokenKind::KwExtend => parse_extend(tokens, pos),
TokenKind::KwThrow => parse_throw(tokens, pos),
TokenKind::KwTry => parse_try_catch(tokens, pos),
TokenKind::Ident(_)
| TokenKind::LParen
| TokenKind::KwRef
| TokenKind::Bang
| TokenKind::LBrace => parse_assign(tokens, pos),
TokenKind::KwIn
| TokenKind::KwCatch
| TokenKind::Dot
| TokenKind::Equals
| TokenKind::RParen
| TokenKind::RBrace
| TokenKind::Comma
| TokenKind::Assign
| TokenKind::Semicolon => Err(Error::UnexpectedToken {
at: tok.at(),
expected: "expression",
found: format!("{}", tok.kind()),
}),
}
}
fn parse_lambda(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let after_lambda = expect_kind(tokens, pos, &TokenKind::Lambda, "`\\`")?;
let (param, after_ident) = expect_ident(tokens, after_lambda)?;
let after_dot = expect_kind(tokens, after_ident, &TokenKind::Dot, "`.`")?;
let (body, after_body) = parse_expr(tokens, after_dot)?;
Ok((Expr::lam(param, body), after_body))
}
fn parse_let(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let after_let = expect_kind(tokens, pos, &TokenKind::KwLet, "keyword `let`")?;
let (name, after_name) = expect_ident(tokens, after_let)?;
let after_eq = expect_kind(tokens, after_name, &TokenKind::Equals, "`=`")?;
let (value, after_value) = parse_expr(tokens, after_eq)?;
let after_in = expect_kind(tokens, after_value, &TokenKind::KwIn, "keyword `in`")?;
let (body, after_body) = parse_expr(tokens, after_in)?;
Ok((Expr::bind(name, value, body), after_body))
}
fn parse_fix(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let after_fix = expect_kind(tokens, pos, &TokenKind::KwFix, "keyword `fix`")?;
let (name, after_name) = expect_ident(tokens, after_fix)?;
let after_dot = expect_kind(tokens, after_name, &TokenKind::Dot, "`.`")?;
let (body, after_body) = parse_expr(tokens, after_dot)?;
Ok((Expr::fix(name, body), after_body))
}
fn parse_extend(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let after_kw = expect_kind(tokens, pos, &TokenKind::KwExtend, "keyword `extend`")?;
let (parent, parent_end) = parse_atom(tokens, after_kw)?;
let body_start = expect_kind(tokens, parent_end, &TokenKind::LBrace, "`{`")?;
let (entries, body_end) = parse_property_list(tokens, body_start)?;
Ok((Expr::object_with_proto(entries, parent), body_end))
}
fn parse_throw(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let after_kw = expect_kind(tokens, pos, &TokenKind::KwThrow, "keyword `throw`")?;
let (inner, after_inner) = parse_right_expr(tokens, after_kw)?;
Ok((Expr::throw(inner), after_inner))
}
fn parse_try_catch(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let after_try = expect_kind(tokens, pos, &TokenKind::KwTry, "keyword `try`")?;
let (body, body_end) = parse_expr(tokens, after_try)?;
let after_catch = expect_kind(tokens, body_end, &TokenKind::KwCatch, "keyword `catch`")?;
let (param, param_end) = expect_ident(tokens, after_catch)?;
let after_dot = expect_kind(tokens, param_end, &TokenKind::Dot, "`.`")?;
let (handler, handler_end) = parse_expr(tokens, after_dot)?;
Ok((Expr::try_catch(body, param, handler), handler_end))
}
fn parse_seq(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let (first, after_first) = parse_right_expr(tokens, pos)?;
parse_seq_tail(tokens, after_first, first)
}
fn parse_seq_tail(tokens: &[Token], pos: usize, lhs: Expr) -> Result<(Expr, usize), Error> {
let has_semi = matches!(
peek(tokens, pos),
Peek::Tok(t) if matches!(t.kind(), TokenKind::Semicolon)
);
if has_semi {
let (next, after_next) = parse_right_expr(tokens, pos + 1)?;
parse_seq_tail(tokens, after_next, Expr::seq(lhs, next))
} else {
Ok((lhs, pos))
}
}
fn parse_assign(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let (lhs, after_lhs) = parse_app(tokens, pos)?;
let has_assign = matches!(
peek(tokens, after_lhs),
Peek::Tok(t) if matches!(t.kind(), TokenKind::Assign)
);
if has_assign {
let (rhs, after_rhs) = parse_right_expr(tokens, after_lhs + 1)?;
Ok((Expr::assign(lhs, rhs), after_rhs))
} else {
Ok((lhs, after_lhs))
}
}
fn parse_app(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let (head, after_head) = parse_atom(tokens, pos)?;
parse_app_tail(tokens, after_head, head)
}
fn parse_app_tail(tokens: &[Token], pos: usize, lhs: Expr) -> Result<(Expr, usize), Error> {
let can_apply = starts_atom(tokens, pos);
if can_apply {
let (arg, after_arg) = parse_atom(tokens, pos)?;
parse_app_tail(tokens, after_arg, Expr::app(lhs, arg))
} else {
Ok((lhs, pos))
}
}
fn starts_atom(tokens: &[Token], pos: usize) -> bool {
match peek(tokens, pos) {
Peek::Eof => false,
Peek::Tok(tok) => matches!(
tok.kind(),
TokenKind::Ident(_)
| TokenKind::LParen
| TokenKind::KwRef
| TokenKind::Bang
| TokenKind::LBrace
),
}
}
fn parse_atom(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let (base, after_base) = parse_base_atom(tokens, pos)?;
parse_field_chain(tokens, after_base, base)
}
fn parse_field_chain(tokens: &[Token], pos: usize, base: Expr) -> Result<(Expr, usize), Error> {
let has_dot = matches!(
peek(tokens, pos),
Peek::Tok(t) if matches!(t.kind(), TokenKind::Dot)
);
if has_dot {
let (name, after_name) = expect_ident(tokens, pos + 1)?;
parse_field_chain(tokens, after_name, Expr::field(base, name))
} else {
Ok((base, pos))
}
}
fn parse_base_atom(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
match peek(tokens, pos) {
Peek::Eof => Err(Error::UnexpectedEnd {
expected: "atom (identifier, `(`, `ref`, `!`, or `{`)",
}),
Peek::Tok(tok) => dispatch_base_atom(tokens, pos, tok),
}
}
fn dispatch_base_atom(tokens: &[Token], pos: usize, tok: &Token) -> Result<(Expr, usize), Error> {
match tok.kind() {
TokenKind::Ident(name) => Ok((Expr::Var(name.clone()), pos + 1)),
TokenKind::LParen => parse_paren_inner(tokens, pos + 1),
TokenKind::KwRef => parse_ref_prefix(tokens, pos + 1),
TokenKind::Bang => parse_bang_prefix(tokens, pos + 1),
TokenKind::LBrace => parse_object_literal(tokens, pos + 1),
TokenKind::Lambda
| TokenKind::KwLet
| TokenKind::KwFix
| TokenKind::KwIn
| TokenKind::KwExtend
| TokenKind::KwThrow
| TokenKind::KwTry
| TokenKind::KwCatch
| TokenKind::Dot
| TokenKind::Equals
| TokenKind::RParen
| TokenKind::RBrace
| TokenKind::Comma
| TokenKind::Assign
| TokenKind::Semicolon => Err(Error::UnexpectedToken {
at: tok.at(),
expected: "atom (identifier, `(`, `ref`, `!`, or `{`)",
found: format!("{}", tok.kind()),
}),
}
}
fn parse_paren_inner(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let (inner, after_inner) = parse_expr(tokens, pos)?;
let after_close = expect_kind(tokens, after_inner, &TokenKind::RParen, "`)`")?;
Ok((inner, after_close))
}
fn parse_ref_prefix(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let (inner, after_inner) = parse_atom(tokens, pos)?;
Ok((Expr::alloc(inner), after_inner))
}
fn parse_bang_prefix(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let (inner, after_inner) = parse_atom(tokens, pos)?;
Ok((Expr::deref(inner), after_inner))
}
fn parse_object_literal(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
let (entries, after_close) = parse_property_list(tokens, pos)?;
Ok((Expr::object(entries), after_close))
}
fn parse_property_list(
tokens: &[Token],
pos: usize,
) -> Result<(Vec<(VarName, Expr)>, usize), Error> {
match peek(tokens, pos) {
Peek::Eof => Err(Error::UnexpectedEnd { expected: "`}`" }),
Peek::Tok(tok) => dispatch_property_list(tokens, pos, tok),
}
}
fn dispatch_property_list(
tokens: &[Token],
pos: usize,
tok: &Token,
) -> Result<(Vec<(VarName, Expr)>, usize), Error> {
match tok.kind() {
TokenKind::RBrace => Ok((Vec::new(), pos + 1)),
TokenKind::Ident(_) => {
let (first, after_first) = parse_property(tokens, pos)?;
parse_property_tail(tokens, after_first, vec![first])
}
TokenKind::Lambda
| TokenKind::KwLet
| TokenKind::KwIn
| TokenKind::KwFix
| TokenKind::KwRef
| TokenKind::KwExtend
| TokenKind::KwThrow
| TokenKind::KwTry
| TokenKind::KwCatch
| TokenKind::Dot
| TokenKind::Equals
| TokenKind::LParen
| TokenKind::RParen
| TokenKind::LBrace
| TokenKind::Comma
| TokenKind::Bang
| TokenKind::Assign
| TokenKind::Semicolon => Err(Error::UnexpectedToken {
at: tok.at(),
expected: "property name or `}`",
found: format!("{}", tok.kind()),
}),
}
}
fn parse_property_tail(
tokens: &[Token],
pos: usize,
acc: Vec<(VarName, Expr)>,
) -> Result<(Vec<(VarName, Expr)>, usize), Error> {
match peek(tokens, pos) {
Peek::Eof => Err(Error::UnexpectedEnd {
expected: "`,` or `}`",
}),
Peek::Tok(tok) => match tok.kind() {
TokenKind::Comma => {
let (next, after_next) = parse_property(tokens, pos + 1)?;
parse_property_tail(tokens, after_next, push_property(acc, next))
}
TokenKind::RBrace => Ok((acc, pos + 1)),
TokenKind::Ident(_)
| TokenKind::Lambda
| TokenKind::KwLet
| TokenKind::KwIn
| TokenKind::KwFix
| TokenKind::KwRef
| TokenKind::KwExtend
| TokenKind::KwThrow
| TokenKind::KwTry
| TokenKind::KwCatch
| TokenKind::Dot
| TokenKind::Equals
| TokenKind::LParen
| TokenKind::LBrace
| TokenKind::RParen
| TokenKind::Bang
| TokenKind::Assign
| TokenKind::Semicolon => Err(Error::UnexpectedToken {
at: tok.at(),
expected: "`,` or `}`",
found: format!("{}", tok.kind()),
}),
},
}
}
fn parse_property(tokens: &[Token], pos: usize) -> Result<((VarName, Expr), usize), Error> {
let (name, after_name) = expect_ident(tokens, pos)?;
let after_eq = expect_kind(tokens, after_name, &TokenKind::Equals, "`=`")?;
let (value, after_value) = parse_right_expr(tokens, after_eq)?;
Ok(((name, value), after_value))
}
fn push_property(acc: Vec<(VarName, Expr)>, entry: (VarName, Expr)) -> Vec<(VarName, Expr)> {
acc.into_iter().chain(std::iter::once(entry)).collect()
}
fn expect_ident(tokens: &[Token], pos: usize) -> Result<(VarName, usize), Error> {
match peek(tokens, pos) {
Peek::Eof => Err(Error::UnexpectedEnd {
expected: "identifier",
}),
Peek::Tok(tok) => dispatch_expect_ident(tok, pos),
}
}
fn dispatch_expect_ident(tok: &Token, pos: usize) -> Result<(VarName, usize), Error> {
match tok.kind() {
TokenKind::Ident(name) => Ok((name.clone(), pos + 1)),
TokenKind::Lambda
| TokenKind::KwLet
| TokenKind::KwIn
| TokenKind::KwFix
| TokenKind::KwRef
| TokenKind::KwExtend
| TokenKind::KwThrow
| TokenKind::KwTry
| TokenKind::KwCatch
| TokenKind::Dot
| TokenKind::Equals
| TokenKind::LParen
| TokenKind::RParen
| TokenKind::LBrace
| TokenKind::RBrace
| TokenKind::Comma
| TokenKind::Bang
| TokenKind::Assign
| TokenKind::Semicolon => Err(Error::UnexpectedToken {
at: tok.at(),
expected: "identifier",
found: format!("{}", tok.kind()),
}),
}
}
fn expect_kind(
tokens: &[Token],
pos: usize,
expected: &TokenKind,
name: &'static str,
) -> Result<usize, Error> {
match peek(tokens, pos) {
Peek::Eof => Err(Error::UnexpectedEnd { expected: name }),
Peek::Tok(tok) => {
let kind_matches = tok.kind() == expected;
if kind_matches {
Ok(pos + 1)
} else {
Err(Error::UnexpectedToken {
at: tok.at(),
expected: name,
found: format!("{}", tok.kind()),
})
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::lexer::lex;
fn parse_str(src: &str) -> Result<Expr, Error> {
let tokens = lex(src)?;
parse(&tokens)
}
#[test]
fn empty_object_literal() -> Result<(), Error> {
let e = parse_str("{}")?;
let expected = Expr::object(vec![]);
(e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
expected: "empty-object AST",
})
}
#[test]
fn object_with_one_property() -> Result<(), Error> {
let e = parse_str("{ foo = \\x. x }")?;
let expected = Expr::object(vec![(VarName::from("foo"), Expr::lam("x", Expr::var("x")))]);
(e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
expected: "one-property AST",
})
}
#[test]
fn field_access_chain() -> Result<(), Error> {
let e = parse_str("a.b.c")?;
let expected = Expr::field(Expr::field(Expr::var("a"), "b"), "c");
(e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
expected: "field chain AST",
})
}
#[test]
fn extend_with_properties() -> Result<(), Error> {
let e = parse_str("extend p { foo = \\x. x }")?;
let expected = Expr::object_with_proto(
vec![(VarName::from("foo"), Expr::lam("x", Expr::var("x")))],
Expr::var("p"),
);
(e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
expected: "extend AST",
})
}
#[test]
fn throw_expr() -> Result<(), Error> {
let e = parse_str("throw x")?;
let expected = Expr::throw(Expr::var("x"));
(e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
expected: "throw AST",
})
}
#[test]
fn try_catch_expr() -> Result<(), Error> {
let e = parse_str("try x catch e. e")?;
let expected = Expr::try_catch(Expr::var("x"), "e", Expr::var("e"));
(e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
expected: "try/catch AST",
})
}
}