Skip to main content

lambda_ref_cat/
parser.rs

1//! Recursive-descent parser for the extended lambda calculus surface syntax.
2//!
3//! The grammar is:
4//!
5//! ```text
6//! expr        ::= seq_expr
7//! seq_expr    ::= right_expr (";" right_expr)*
8//! right_expr  ::= lambda | let | fix | assign_expr
9//! lambda      ::= "\" ident "." expr
10//! let         ::= "let" ident "=" expr "in" expr
11//! fix         ::= "fix" ident "." expr
12//! assign_expr ::= app_expr (":=" right_expr)?
13//! app_expr    ::= atom atom*
14//! atom        ::= ident | "(" expr ")" | "ref" atom | "!" atom
15//! ```
16//!
17//! Precedence, from loosest to tightest: `;`, `:=`, application, prefix
18//! `ref`/`!`, atoms.  Inside a lambda/let/fix body the parser admits a full
19//! `expr` (so `;` is consumed), but the right-hand side of `:=` admits only
20//! a `right_expr`, which keeps `;` strictly above `:=`.
21
22use crate::error::Error;
23use crate::lexer::{Token, TokenKind};
24use crate::syntax::{Expr, VarName};
25
26/// Look-ahead result for the parser.
27enum Peek<'a> {
28    Eof,
29    Tok(&'a Token),
30}
31
32fn peek(tokens: &[Token], pos: usize) -> Peek<'_> {
33    tokens.get(pos).map_or(Peek::Eof, Peek::Tok)
34}
35
36/// Parse a full expression from a token slice.
37///
38/// # Errors
39///
40/// Returns [`Error::UnexpectedToken`] if extra tokens trail after the
41/// expression, [`Error::UnexpectedEnd`] if input is exhausted mid-production,
42/// or other parse errors propagated from sub-productions.
43///
44/// # Examples
45///
46/// ```
47/// # fn main() -> Result<(), lambda_ref_cat::error::Error> {
48/// use lambda_ref_cat::lexer::lex;
49/// use lambda_ref_cat::parser::parse;
50///
51/// let tokens = lex("ref (\\x. x)")?;
52/// let expr = parse(&tokens)?;
53/// assert_eq!(format!("{expr}"), "(ref (\\x. x))");
54/// # Ok(())
55/// # }
56/// ```
57///
58/// [`Error::UnexpectedToken`]: crate::error::Error::UnexpectedToken
59/// [`Error::UnexpectedEnd`]: crate::error::Error::UnexpectedEnd
60pub fn parse(tokens: &[Token]) -> Result<Expr, Error> {
61    let (expr, end_pos) = parse_expr(tokens, 0)?;
62    tokens.get(end_pos).map_or(Ok(expr), |tok| {
63        Err(Error::UnexpectedToken {
64            at: tok.at(),
65            expected: "end of input",
66            found: format!("{}", tok.kind()),
67        })
68    })
69}
70
71fn parse_expr(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
72    parse_seq(tokens, pos)
73}
74
75fn parse_right_expr(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
76    match peek(tokens, pos) {
77        Peek::Eof => Err(Error::UnexpectedEnd {
78            expected: "expression",
79        }),
80        Peek::Tok(tok) => dispatch_right(tokens, pos, tok),
81    }
82}
83
84fn dispatch_right(tokens: &[Token], pos: usize, tok: &Token) -> Result<(Expr, usize), Error> {
85    match tok.kind() {
86        TokenKind::Lambda => parse_lambda(tokens, pos),
87        TokenKind::KwLet => parse_let(tokens, pos),
88        TokenKind::KwFix => parse_fix(tokens, pos),
89        TokenKind::Ident(_) | TokenKind::LParen | TokenKind::KwRef | TokenKind::Bang => {
90            parse_assign(tokens, pos)
91        }
92        TokenKind::KwIn
93        | TokenKind::Dot
94        | TokenKind::Equals
95        | TokenKind::RParen
96        | TokenKind::Assign
97        | TokenKind::Semicolon => Err(Error::UnexpectedToken {
98            at: tok.at(),
99            expected: "expression",
100            found: format!("{}", tok.kind()),
101        }),
102    }
103}
104
105fn parse_lambda(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
106    let after_lambda = expect_kind(tokens, pos, &TokenKind::Lambda, "`\\`")?;
107    let (param, after_ident) = expect_ident(tokens, after_lambda)?;
108    let after_dot = expect_kind(tokens, after_ident, &TokenKind::Dot, "`.`")?;
109    let (body, after_body) = parse_expr(tokens, after_dot)?;
110    Ok((Expr::lam(param, body), after_body))
111}
112
113fn parse_let(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
114    let after_let = expect_kind(tokens, pos, &TokenKind::KwLet, "keyword `let`")?;
115    let (name, after_name) = expect_ident(tokens, after_let)?;
116    let after_eq = expect_kind(tokens, after_name, &TokenKind::Equals, "`=`")?;
117    let (value, after_value) = parse_expr(tokens, after_eq)?;
118    let after_in = expect_kind(tokens, after_value, &TokenKind::KwIn, "keyword `in`")?;
119    let (body, after_body) = parse_expr(tokens, after_in)?;
120    Ok((Expr::bind(name, value, body), after_body))
121}
122
123fn parse_fix(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
124    let after_fix = expect_kind(tokens, pos, &TokenKind::KwFix, "keyword `fix`")?;
125    let (name, after_name) = expect_ident(tokens, after_fix)?;
126    let after_dot = expect_kind(tokens, after_name, &TokenKind::Dot, "`.`")?;
127    let (body, after_body) = parse_expr(tokens, after_dot)?;
128    Ok((Expr::fix(name, body), after_body))
129}
130
131fn parse_seq(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
132    let (first, after_first) = parse_right_expr(tokens, pos)?;
133    parse_seq_tail(tokens, after_first, first)
134}
135
136fn parse_seq_tail(tokens: &[Token], pos: usize, lhs: Expr) -> Result<(Expr, usize), Error> {
137    let has_semi = matches!(
138        peek(tokens, pos),
139        Peek::Tok(t) if matches!(t.kind(), TokenKind::Semicolon)
140    );
141    if has_semi {
142        let (next, after_next) = parse_right_expr(tokens, pos + 1)?;
143        parse_seq_tail(tokens, after_next, Expr::seq(lhs, next))
144    } else {
145        Ok((lhs, pos))
146    }
147}
148
149fn parse_assign(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
150    let (lhs, after_lhs) = parse_app(tokens, pos)?;
151    let has_assign = matches!(
152        peek(tokens, after_lhs),
153        Peek::Tok(t) if matches!(t.kind(), TokenKind::Assign)
154    );
155    if has_assign {
156        let (rhs, after_rhs) = parse_right_expr(tokens, after_lhs + 1)?;
157        Ok((Expr::assign(lhs, rhs), after_rhs))
158    } else {
159        Ok((lhs, after_lhs))
160    }
161}
162
163fn parse_app(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
164    let (head, after_head) = parse_atom(tokens, pos)?;
165    parse_app_tail(tokens, after_head, head)
166}
167
168fn parse_app_tail(tokens: &[Token], pos: usize, lhs: Expr) -> Result<(Expr, usize), Error> {
169    let can_apply = starts_atom(tokens, pos);
170    if can_apply {
171        let (arg, after_arg) = parse_atom(tokens, pos)?;
172        parse_app_tail(tokens, after_arg, Expr::app(lhs, arg))
173    } else {
174        Ok((lhs, pos))
175    }
176}
177
178fn starts_atom(tokens: &[Token], pos: usize) -> bool {
179    match peek(tokens, pos) {
180        Peek::Eof => false,
181        Peek::Tok(tok) => matches!(
182            tok.kind(),
183            TokenKind::Ident(_) | TokenKind::LParen | TokenKind::KwRef | TokenKind::Bang
184        ),
185    }
186}
187
188fn parse_atom(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
189    match peek(tokens, pos) {
190        Peek::Eof => Err(Error::UnexpectedEnd {
191            expected: "atom (identifier, `(`, `ref`, or `!`)",
192        }),
193        Peek::Tok(tok) => dispatch_atom(tokens, pos, tok),
194    }
195}
196
197fn dispatch_atom(tokens: &[Token], pos: usize, tok: &Token) -> Result<(Expr, usize), Error> {
198    match tok.kind() {
199        TokenKind::Ident(name) => Ok((Expr::Var(name.clone()), pos + 1)),
200        TokenKind::LParen => parse_paren_inner(tokens, pos + 1),
201        TokenKind::KwRef => parse_ref_prefix(tokens, pos + 1),
202        TokenKind::Bang => parse_bang_prefix(tokens, pos + 1),
203        TokenKind::Lambda
204        | TokenKind::KwLet
205        | TokenKind::KwFix
206        | TokenKind::KwIn
207        | TokenKind::Dot
208        | TokenKind::Equals
209        | TokenKind::RParen
210        | TokenKind::Assign
211        | TokenKind::Semicolon => Err(Error::UnexpectedToken {
212            at: tok.at(),
213            expected: "atom (identifier, `(`, `ref`, or `!`)",
214            found: format!("{}", tok.kind()),
215        }),
216    }
217}
218
219fn parse_paren_inner(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
220    let (inner, after_inner) = parse_expr(tokens, pos)?;
221    let after_close = expect_kind(tokens, after_inner, &TokenKind::RParen, "`)`")?;
222    Ok((inner, after_close))
223}
224
225fn parse_ref_prefix(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
226    let (inner, after_inner) = parse_atom(tokens, pos)?;
227    Ok((Expr::alloc(inner), after_inner))
228}
229
230fn parse_bang_prefix(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
231    let (inner, after_inner) = parse_atom(tokens, pos)?;
232    Ok((Expr::deref(inner), after_inner))
233}
234
235fn expect_ident(tokens: &[Token], pos: usize) -> Result<(VarName, usize), Error> {
236    match peek(tokens, pos) {
237        Peek::Eof => Err(Error::UnexpectedEnd {
238            expected: "identifier",
239        }),
240        Peek::Tok(tok) => dispatch_expect_ident(tok, pos),
241    }
242}
243
244fn dispatch_expect_ident(tok: &Token, pos: usize) -> Result<(VarName, usize), Error> {
245    match tok.kind() {
246        TokenKind::Ident(name) => Ok((name.clone(), pos + 1)),
247        TokenKind::Lambda
248        | TokenKind::KwLet
249        | TokenKind::KwIn
250        | TokenKind::KwFix
251        | TokenKind::KwRef
252        | TokenKind::Dot
253        | TokenKind::Equals
254        | TokenKind::LParen
255        | TokenKind::RParen
256        | TokenKind::Bang
257        | TokenKind::Assign
258        | TokenKind::Semicolon => Err(Error::UnexpectedToken {
259            at: tok.at(),
260            expected: "identifier",
261            found: format!("{}", tok.kind()),
262        }),
263    }
264}
265
266fn expect_kind(
267    tokens: &[Token],
268    pos: usize,
269    expected: &TokenKind,
270    name: &'static str,
271) -> Result<usize, Error> {
272    match peek(tokens, pos) {
273        Peek::Eof => Err(Error::UnexpectedEnd { expected: name }),
274        Peek::Tok(tok) => {
275            let kind_matches = tok.kind() == expected;
276            if kind_matches {
277                Ok(pos + 1)
278            } else {
279                Err(Error::UnexpectedToken {
280                    at: tok.at(),
281                    expected: name,
282                    found: format!("{}", tok.kind()),
283                })
284            }
285        }
286    }
287}
288
289#[cfg(test)]
290mod tests {
291    use super::*;
292    use crate::lexer::lex;
293
294    fn parse_str(src: &str) -> Result<Expr, Error> {
295        let tokens = lex(src)?;
296        parse(&tokens)
297    }
298
299    #[test]
300    fn ref_and_deref() -> Result<(), Error> {
301        let e = parse_str(r"!ref x")?;
302        // ref x is atom; !atom is atom; whole thing is just the deref of the ref
303        let expected = Expr::deref(Expr::alloc(Expr::var("x")));
304        (e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
305            expected: "ref/deref AST",
306        })
307    }
308
309    #[test]
310    fn assign_is_lowest_in_app() -> Result<(), Error> {
311        let e = parse_str(r"f x := g y")?;
312        // (f x) := (g y)
313        let expected = Expr::assign(
314            Expr::app(Expr::var("f"), Expr::var("x")),
315            Expr::app(Expr::var("g"), Expr::var("y")),
316        );
317        (e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
318            expected: "assign AST",
319        })
320    }
321
322    #[test]
323    fn semicolon_is_left_associative() -> Result<(), Error> {
324        let e = parse_str(r"x ; y ; z")?;
325        let expected = Expr::seq(Expr::seq(Expr::var("x"), Expr::var("y")), Expr::var("z"));
326        (e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
327            expected: "seq AST",
328        })
329    }
330}