lambda-ref-cat 0.1.0

Lambda calculus with mutable reference cells and a pure-functional mark-sweep garbage collector, built on comp-cat-rs. Spike 2 of a web-engine reformulation targeting Tauri.
Documentation
//! Recursive-descent parser for the extended lambda calculus surface syntax.
//!
//! The grammar is:
//!
//! ```text
//! expr        ::= seq_expr
//! seq_expr    ::= right_expr (";" right_expr)*
//! right_expr  ::= lambda | let | fix | assign_expr
//! lambda      ::= "\" ident "." expr
//! let         ::= "let" ident "=" expr "in" expr
//! fix         ::= "fix" ident "." expr
//! assign_expr ::= app_expr (":=" right_expr)?
//! app_expr    ::= atom atom*
//! atom        ::= ident | "(" expr ")" | "ref" atom | "!" atom
//! ```
//!
//! Precedence, from loosest to tightest: `;`, `:=`, application, prefix
//! `ref`/`!`, atoms.  Inside a lambda/let/fix body the parser admits a full
//! `expr` (so `;` is consumed), but the right-hand side of `:=` admits only
//! a `right_expr`, which keeps `;` strictly above `:=`.

use crate::error::Error;
use crate::lexer::{Token, TokenKind};
use crate::syntax::{Expr, VarName};

/// Look-ahead result for the parser.
enum Peek<'a> {
    Eof,
    Tok(&'a Token),
}

fn peek(tokens: &[Token], pos: usize) -> Peek<'_> {
    tokens.get(pos).map_or(Peek::Eof, Peek::Tok)
}

/// Parse a full expression from a token slice.
///
/// # Errors
///
/// Returns [`Error::UnexpectedToken`] if extra tokens trail after the
/// expression, [`Error::UnexpectedEnd`] if input is exhausted mid-production,
/// or other parse errors propagated from sub-productions.
///
/// # Examples
///
/// ```
/// # fn main() -> Result<(), lambda_ref_cat::error::Error> {
/// use lambda_ref_cat::lexer::lex;
/// use lambda_ref_cat::parser::parse;
///
/// let tokens = lex("ref (\\x. x)")?;
/// let expr = parse(&tokens)?;
/// assert_eq!(format!("{expr}"), "(ref (\\x. x))");
/// # Ok(())
/// # }
/// ```
///
/// [`Error::UnexpectedToken`]: crate::error::Error::UnexpectedToken
/// [`Error::UnexpectedEnd`]: crate::error::Error::UnexpectedEnd
pub fn parse(tokens: &[Token]) -> Result<Expr, Error> {
    let (expr, end_pos) = parse_expr(tokens, 0)?;
    tokens.get(end_pos).map_or(Ok(expr), |tok| {
        Err(Error::UnexpectedToken {
            at: tok.at(),
            expected: "end of input",
            found: format!("{}", tok.kind()),
        })
    })
}

fn parse_expr(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
    parse_seq(tokens, pos)
}

fn parse_right_expr(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
    match peek(tokens, pos) {
        Peek::Eof => Err(Error::UnexpectedEnd {
            expected: "expression",
        }),
        Peek::Tok(tok) => dispatch_right(tokens, pos, tok),
    }
}

fn dispatch_right(tokens: &[Token], pos: usize, tok: &Token) -> Result<(Expr, usize), Error> {
    match tok.kind() {
        TokenKind::Lambda => parse_lambda(tokens, pos),
        TokenKind::KwLet => parse_let(tokens, pos),
        TokenKind::KwFix => parse_fix(tokens, pos),
        TokenKind::Ident(_) | TokenKind::LParen | TokenKind::KwRef | TokenKind::Bang => {
            parse_assign(tokens, pos)
        }
        TokenKind::KwIn
        | TokenKind::Dot
        | TokenKind::Equals
        | TokenKind::RParen
        | TokenKind::Assign
        | TokenKind::Semicolon => Err(Error::UnexpectedToken {
            at: tok.at(),
            expected: "expression",
            found: format!("{}", tok.kind()),
        }),
    }
}

fn parse_lambda(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
    let after_lambda = expect_kind(tokens, pos, &TokenKind::Lambda, "`\\`")?;
    let (param, after_ident) = expect_ident(tokens, after_lambda)?;
    let after_dot = expect_kind(tokens, after_ident, &TokenKind::Dot, "`.`")?;
    let (body, after_body) = parse_expr(tokens, after_dot)?;
    Ok((Expr::lam(param, body), after_body))
}

fn parse_let(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
    let after_let = expect_kind(tokens, pos, &TokenKind::KwLet, "keyword `let`")?;
    let (name, after_name) = expect_ident(tokens, after_let)?;
    let after_eq = expect_kind(tokens, after_name, &TokenKind::Equals, "`=`")?;
    let (value, after_value) = parse_expr(tokens, after_eq)?;
    let after_in = expect_kind(tokens, after_value, &TokenKind::KwIn, "keyword `in`")?;
    let (body, after_body) = parse_expr(tokens, after_in)?;
    Ok((Expr::bind(name, value, body), after_body))
}

fn parse_fix(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
    let after_fix = expect_kind(tokens, pos, &TokenKind::KwFix, "keyword `fix`")?;
    let (name, after_name) = expect_ident(tokens, after_fix)?;
    let after_dot = expect_kind(tokens, after_name, &TokenKind::Dot, "`.`")?;
    let (body, after_body) = parse_expr(tokens, after_dot)?;
    Ok((Expr::fix(name, body), after_body))
}

fn parse_seq(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
    let (first, after_first) = parse_right_expr(tokens, pos)?;
    parse_seq_tail(tokens, after_first, first)
}

fn parse_seq_tail(tokens: &[Token], pos: usize, lhs: Expr) -> Result<(Expr, usize), Error> {
    let has_semi = matches!(
        peek(tokens, pos),
        Peek::Tok(t) if matches!(t.kind(), TokenKind::Semicolon)
    );
    if has_semi {
        let (next, after_next) = parse_right_expr(tokens, pos + 1)?;
        parse_seq_tail(tokens, after_next, Expr::seq(lhs, next))
    } else {
        Ok((lhs, pos))
    }
}

fn parse_assign(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
    let (lhs, after_lhs) = parse_app(tokens, pos)?;
    let has_assign = matches!(
        peek(tokens, after_lhs),
        Peek::Tok(t) if matches!(t.kind(), TokenKind::Assign)
    );
    if has_assign {
        let (rhs, after_rhs) = parse_right_expr(tokens, after_lhs + 1)?;
        Ok((Expr::assign(lhs, rhs), after_rhs))
    } else {
        Ok((lhs, after_lhs))
    }
}

fn parse_app(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
    let (head, after_head) = parse_atom(tokens, pos)?;
    parse_app_tail(tokens, after_head, head)
}

fn parse_app_tail(tokens: &[Token], pos: usize, lhs: Expr) -> Result<(Expr, usize), Error> {
    let can_apply = starts_atom(tokens, pos);
    if can_apply {
        let (arg, after_arg) = parse_atom(tokens, pos)?;
        parse_app_tail(tokens, after_arg, Expr::app(lhs, arg))
    } else {
        Ok((lhs, pos))
    }
}

fn starts_atom(tokens: &[Token], pos: usize) -> bool {
    match peek(tokens, pos) {
        Peek::Eof => false,
        Peek::Tok(tok) => matches!(
            tok.kind(),
            TokenKind::Ident(_) | TokenKind::LParen | TokenKind::KwRef | TokenKind::Bang
        ),
    }
}

fn parse_atom(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
    match peek(tokens, pos) {
        Peek::Eof => Err(Error::UnexpectedEnd {
            expected: "atom (identifier, `(`, `ref`, or `!`)",
        }),
        Peek::Tok(tok) => dispatch_atom(tokens, pos, tok),
    }
}

fn dispatch_atom(tokens: &[Token], pos: usize, tok: &Token) -> Result<(Expr, usize), Error> {
    match tok.kind() {
        TokenKind::Ident(name) => Ok((Expr::Var(name.clone()), pos + 1)),
        TokenKind::LParen => parse_paren_inner(tokens, pos + 1),
        TokenKind::KwRef => parse_ref_prefix(tokens, pos + 1),
        TokenKind::Bang => parse_bang_prefix(tokens, pos + 1),
        TokenKind::Lambda
        | TokenKind::KwLet
        | TokenKind::KwFix
        | TokenKind::KwIn
        | TokenKind::Dot
        | TokenKind::Equals
        | TokenKind::RParen
        | TokenKind::Assign
        | TokenKind::Semicolon => Err(Error::UnexpectedToken {
            at: tok.at(),
            expected: "atom (identifier, `(`, `ref`, or `!`)",
            found: format!("{}", tok.kind()),
        }),
    }
}

fn parse_paren_inner(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
    let (inner, after_inner) = parse_expr(tokens, pos)?;
    let after_close = expect_kind(tokens, after_inner, &TokenKind::RParen, "`)`")?;
    Ok((inner, after_close))
}

fn parse_ref_prefix(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
    let (inner, after_inner) = parse_atom(tokens, pos)?;
    Ok((Expr::alloc(inner), after_inner))
}

fn parse_bang_prefix(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
    let (inner, after_inner) = parse_atom(tokens, pos)?;
    Ok((Expr::deref(inner), after_inner))
}

fn expect_ident(tokens: &[Token], pos: usize) -> Result<(VarName, usize), Error> {
    match peek(tokens, pos) {
        Peek::Eof => Err(Error::UnexpectedEnd {
            expected: "identifier",
        }),
        Peek::Tok(tok) => dispatch_expect_ident(tok, pos),
    }
}

fn dispatch_expect_ident(tok: &Token, pos: usize) -> Result<(VarName, usize), Error> {
    match tok.kind() {
        TokenKind::Ident(name) => Ok((name.clone(), pos + 1)),
        TokenKind::Lambda
        | TokenKind::KwLet
        | TokenKind::KwIn
        | TokenKind::KwFix
        | TokenKind::KwRef
        | TokenKind::Dot
        | TokenKind::Equals
        | TokenKind::LParen
        | TokenKind::RParen
        | TokenKind::Bang
        | TokenKind::Assign
        | TokenKind::Semicolon => Err(Error::UnexpectedToken {
            at: tok.at(),
            expected: "identifier",
            found: format!("{}", tok.kind()),
        }),
    }
}

fn expect_kind(
    tokens: &[Token],
    pos: usize,
    expected: &TokenKind,
    name: &'static str,
) -> Result<usize, Error> {
    match peek(tokens, pos) {
        Peek::Eof => Err(Error::UnexpectedEnd { expected: name }),
        Peek::Tok(tok) => {
            let kind_matches = tok.kind() == expected;
            if kind_matches {
                Ok(pos + 1)
            } else {
                Err(Error::UnexpectedToken {
                    at: tok.at(),
                    expected: name,
                    found: format!("{}", tok.kind()),
                })
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::lexer::lex;

    fn parse_str(src: &str) -> Result<Expr, Error> {
        let tokens = lex(src)?;
        parse(&tokens)
    }

    #[test]
    fn ref_and_deref() -> Result<(), Error> {
        let e = parse_str(r"!ref x")?;
        // ref x is atom; !atom is atom; whole thing is just the deref of the ref
        let expected = Expr::deref(Expr::alloc(Expr::var("x")));
        (e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
            expected: "ref/deref AST",
        })
    }

    #[test]
    fn assign_is_lowest_in_app() -> Result<(), Error> {
        let e = parse_str(r"f x := g y")?;
        // (f x) := (g y)
        let expected = Expr::assign(
            Expr::app(Expr::var("f"), Expr::var("x")),
            Expr::app(Expr::var("g"), Expr::var("y")),
        );
        (e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
            expected: "assign AST",
        })
    }

    #[test]
    fn semicolon_is_left_associative() -> Result<(), Error> {
        let e = parse_str(r"x ; y ; z")?;
        let expected = Expr::seq(Expr::seq(Expr::var("x"), Expr::var("y")), Expr::var("z"));
        (e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
            expected: "seq AST",
        })
    }
}