Skip to main content

lambda_throw_cat/
parser.rs

1//! Recursive-descent parser for the throw-extended lambda calculus.
2//!
3//! Grammar:
4//!
5//! ```text
6//! expr        ::= seq_expr
7//! seq_expr    ::= right_expr (";" right_expr)*
8//! right_expr  ::= lambda | let | fix | extend | throw_expr | try_expr | assign_expr
9//! lambda      ::= "\" ident "." expr
10//! let         ::= "let" ident "=" expr "in" expr
11//! fix         ::= "fix" ident "." expr
12//! extend      ::= "extend" atom object_lit
13//! throw_expr  ::= "throw" right_expr
14//! try_expr    ::= "try" right_expr "catch" ident "." right_expr
15//! assign_expr ::= app_expr (":=" right_expr)?
16//! app_expr    ::= atom atom*
17//! atom        ::= base_atom ("." ident)*
18//! base_atom   ::= ident | "(" expr ")" | "ref" atom | "!" atom | object_lit
19//! object_lit  ::= "{" props? "}"
20//! props       ::= property ("," property)*
21//! property    ::= ident "=" right_expr
22//! ```
23//!
24//! Precedence, loosest to tightest: `;`, `:=`, application, field access,
25//! prefix `ref`/`!`, atoms.  The dot token is overloaded three ways: as the
26//! lambda head separator (after `\ident`), the catch-arm separator (after
27//! `catch ident`), and the field-access infix (after an atom).  The parser
28//! disambiguates by syntactic position.
29
30use crate::error::Error;
31use crate::lexer::{Token, TokenKind};
32use crate::syntax::{Expr, VarName};
33
34enum Peek<'a> {
35    Eof,
36    Tok(&'a Token),
37}
38
39fn peek(tokens: &[Token], pos: usize) -> Peek<'_> {
40    tokens.get(pos).map_or(Peek::Eof, Peek::Tok)
41}
42
43/// Parse a full expression from a token slice.
44///
45/// # Errors
46///
47/// Returns parse errors when the token stream does not match the grammar.
48///
49/// # Examples
50///
51/// ```
52/// # fn main() -> Result<(), lambda_throw_cat::error::Error> {
53/// use lambda_throw_cat::lexer::lex;
54/// use lambda_throw_cat::parser::parse;
55///
56/// let tokens = lex("try throw x catch e. e")?;
57/// let expr = parse(&tokens)?;
58/// assert_eq!(format!("{expr}"), "(try (throw x) catch e. e)");
59/// # Ok(())
60/// # }
61/// ```
62pub fn parse(tokens: &[Token]) -> Result<Expr, Error> {
63    let (expr, end_pos) = parse_expr(tokens, 0)?;
64    tokens.get(end_pos).map_or(Ok(expr), |tok| {
65        Err(Error::UnexpectedToken {
66            at: tok.at(),
67            expected: "end of input",
68            found: format!("{}", tok.kind()),
69        })
70    })
71}
72
73fn parse_expr(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
74    parse_seq(tokens, pos)
75}
76
77fn parse_right_expr(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
78    match peek(tokens, pos) {
79        Peek::Eof => Err(Error::UnexpectedEnd {
80            expected: "expression",
81        }),
82        Peek::Tok(tok) => dispatch_right(tokens, pos, tok),
83    }
84}
85
86fn dispatch_right(tokens: &[Token], pos: usize, tok: &Token) -> Result<(Expr, usize), Error> {
87    match tok.kind() {
88        TokenKind::Lambda => parse_lambda(tokens, pos),
89        TokenKind::KwLet => parse_let(tokens, pos),
90        TokenKind::KwFix => parse_fix(tokens, pos),
91        TokenKind::KwExtend => parse_extend(tokens, pos),
92        TokenKind::KwThrow => parse_throw(tokens, pos),
93        TokenKind::KwTry => parse_try_catch(tokens, pos),
94        TokenKind::Ident(_)
95        | TokenKind::LParen
96        | TokenKind::KwRef
97        | TokenKind::Bang
98        | TokenKind::LBrace => parse_assign(tokens, pos),
99        TokenKind::KwIn
100        | TokenKind::KwCatch
101        | TokenKind::Dot
102        | TokenKind::Equals
103        | TokenKind::RParen
104        | TokenKind::RBrace
105        | TokenKind::Comma
106        | TokenKind::Assign
107        | TokenKind::Semicolon => Err(Error::UnexpectedToken {
108            at: tok.at(),
109            expected: "expression",
110            found: format!("{}", tok.kind()),
111        }),
112    }
113}
114
115fn parse_lambda(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
116    let after_lambda = expect_kind(tokens, pos, &TokenKind::Lambda, "`\\`")?;
117    let (param, after_ident) = expect_ident(tokens, after_lambda)?;
118    let after_dot = expect_kind(tokens, after_ident, &TokenKind::Dot, "`.`")?;
119    let (body, after_body) = parse_expr(tokens, after_dot)?;
120    Ok((Expr::lam(param, body), after_body))
121}
122
123fn parse_let(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
124    let after_let = expect_kind(tokens, pos, &TokenKind::KwLet, "keyword `let`")?;
125    let (name, after_name) = expect_ident(tokens, after_let)?;
126    let after_eq = expect_kind(tokens, after_name, &TokenKind::Equals, "`=`")?;
127    let (value, after_value) = parse_expr(tokens, after_eq)?;
128    let after_in = expect_kind(tokens, after_value, &TokenKind::KwIn, "keyword `in`")?;
129    let (body, after_body) = parse_expr(tokens, after_in)?;
130    Ok((Expr::bind(name, value, body), after_body))
131}
132
133fn parse_fix(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
134    let after_fix = expect_kind(tokens, pos, &TokenKind::KwFix, "keyword `fix`")?;
135    let (name, after_name) = expect_ident(tokens, after_fix)?;
136    let after_dot = expect_kind(tokens, after_name, &TokenKind::Dot, "`.`")?;
137    let (body, after_body) = parse_expr(tokens, after_dot)?;
138    Ok((Expr::fix(name, body), after_body))
139}
140
141fn parse_extend(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
142    let after_kw = expect_kind(tokens, pos, &TokenKind::KwExtend, "keyword `extend`")?;
143    let (parent, parent_end) = parse_atom(tokens, after_kw)?;
144    let body_start = expect_kind(tokens, parent_end, &TokenKind::LBrace, "`{`")?;
145    let (entries, body_end) = parse_property_list(tokens, body_start)?;
146    Ok((Expr::object_with_proto(entries, parent), body_end))
147}
148
149fn parse_throw(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
150    let after_kw = expect_kind(tokens, pos, &TokenKind::KwThrow, "keyword `throw`")?;
151    let (inner, after_inner) = parse_right_expr(tokens, after_kw)?;
152    Ok((Expr::throw(inner), after_inner))
153}
154
155fn parse_try_catch(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
156    let after_try = expect_kind(tokens, pos, &TokenKind::KwTry, "keyword `try`")?;
157    // Body uses parse_expr so sequences can flow through, stopping at `catch`.
158    let (body, body_end) = parse_expr(tokens, after_try)?;
159    let after_catch = expect_kind(tokens, body_end, &TokenKind::KwCatch, "keyword `catch`")?;
160    let (param, param_end) = expect_ident(tokens, after_catch)?;
161    let after_dot = expect_kind(tokens, param_end, &TokenKind::Dot, "`.`")?;
162    // Handler also uses parse_expr, matching lambda/let/fix body convention.
163    let (handler, handler_end) = parse_expr(tokens, after_dot)?;
164    Ok((Expr::try_catch(body, param, handler), handler_end))
165}
166
167fn parse_seq(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
168    let (first, after_first) = parse_right_expr(tokens, pos)?;
169    parse_seq_tail(tokens, after_first, first)
170}
171
172fn parse_seq_tail(tokens: &[Token], pos: usize, lhs: Expr) -> Result<(Expr, usize), Error> {
173    let has_semi = matches!(
174        peek(tokens, pos),
175        Peek::Tok(t) if matches!(t.kind(), TokenKind::Semicolon)
176    );
177    if has_semi {
178        let (next, after_next) = parse_right_expr(tokens, pos + 1)?;
179        parse_seq_tail(tokens, after_next, Expr::seq(lhs, next))
180    } else {
181        Ok((lhs, pos))
182    }
183}
184
185fn parse_assign(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
186    let (lhs, after_lhs) = parse_app(tokens, pos)?;
187    let has_assign = matches!(
188        peek(tokens, after_lhs),
189        Peek::Tok(t) if matches!(t.kind(), TokenKind::Assign)
190    );
191    if has_assign {
192        let (rhs, after_rhs) = parse_right_expr(tokens, after_lhs + 1)?;
193        Ok((Expr::assign(lhs, rhs), after_rhs))
194    } else {
195        Ok((lhs, after_lhs))
196    }
197}
198
199fn parse_app(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
200    let (head, after_head) = parse_atom(tokens, pos)?;
201    parse_app_tail(tokens, after_head, head)
202}
203
204fn parse_app_tail(tokens: &[Token], pos: usize, lhs: Expr) -> Result<(Expr, usize), Error> {
205    let can_apply = starts_atom(tokens, pos);
206    if can_apply {
207        let (arg, after_arg) = parse_atom(tokens, pos)?;
208        parse_app_tail(tokens, after_arg, Expr::app(lhs, arg))
209    } else {
210        Ok((lhs, pos))
211    }
212}
213
214fn starts_atom(tokens: &[Token], pos: usize) -> bool {
215    match peek(tokens, pos) {
216        Peek::Eof => false,
217        Peek::Tok(tok) => matches!(
218            tok.kind(),
219            TokenKind::Ident(_)
220                | TokenKind::LParen
221                | TokenKind::KwRef
222                | TokenKind::Bang
223                | TokenKind::LBrace
224        ),
225    }
226}
227
228fn parse_atom(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
229    let (base, after_base) = parse_base_atom(tokens, pos)?;
230    parse_field_chain(tokens, after_base, base)
231}
232
233fn parse_field_chain(tokens: &[Token], pos: usize, base: Expr) -> Result<(Expr, usize), Error> {
234    let has_dot = matches!(
235        peek(tokens, pos),
236        Peek::Tok(t) if matches!(t.kind(), TokenKind::Dot)
237    );
238    if has_dot {
239        let (name, after_name) = expect_ident(tokens, pos + 1)?;
240        parse_field_chain(tokens, after_name, Expr::field(base, name))
241    } else {
242        Ok((base, pos))
243    }
244}
245
246fn parse_base_atom(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
247    match peek(tokens, pos) {
248        Peek::Eof => Err(Error::UnexpectedEnd {
249            expected: "atom (identifier, `(`, `ref`, `!`, or `{`)",
250        }),
251        Peek::Tok(tok) => dispatch_base_atom(tokens, pos, tok),
252    }
253}
254
255fn dispatch_base_atom(tokens: &[Token], pos: usize, tok: &Token) -> Result<(Expr, usize), Error> {
256    match tok.kind() {
257        TokenKind::Ident(name) => Ok((Expr::Var(name.clone()), pos + 1)),
258        TokenKind::LParen => parse_paren_inner(tokens, pos + 1),
259        TokenKind::KwRef => parse_ref_prefix(tokens, pos + 1),
260        TokenKind::Bang => parse_bang_prefix(tokens, pos + 1),
261        TokenKind::LBrace => parse_object_literal(tokens, pos + 1),
262        TokenKind::Lambda
263        | TokenKind::KwLet
264        | TokenKind::KwFix
265        | TokenKind::KwIn
266        | TokenKind::KwExtend
267        | TokenKind::KwThrow
268        | TokenKind::KwTry
269        | TokenKind::KwCatch
270        | TokenKind::Dot
271        | TokenKind::Equals
272        | TokenKind::RParen
273        | TokenKind::RBrace
274        | TokenKind::Comma
275        | TokenKind::Assign
276        | TokenKind::Semicolon => Err(Error::UnexpectedToken {
277            at: tok.at(),
278            expected: "atom (identifier, `(`, `ref`, `!`, or `{`)",
279            found: format!("{}", tok.kind()),
280        }),
281    }
282}
283
284fn parse_paren_inner(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
285    let (inner, after_inner) = parse_expr(tokens, pos)?;
286    let after_close = expect_kind(tokens, after_inner, &TokenKind::RParen, "`)`")?;
287    Ok((inner, after_close))
288}
289
290fn parse_ref_prefix(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
291    let (inner, after_inner) = parse_atom(tokens, pos)?;
292    Ok((Expr::alloc(inner), after_inner))
293}
294
295fn parse_bang_prefix(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
296    let (inner, after_inner) = parse_atom(tokens, pos)?;
297    Ok((Expr::deref(inner), after_inner))
298}
299
300fn parse_object_literal(tokens: &[Token], pos: usize) -> Result<(Expr, usize), Error> {
301    // The `{` has already been consumed; we are at the first property or `}`.
302    let (entries, after_close) = parse_property_list(tokens, pos)?;
303    Ok((Expr::object(entries), after_close))
304}
305
306fn parse_property_list(
307    tokens: &[Token],
308    pos: usize,
309) -> Result<(Vec<(VarName, Expr)>, usize), Error> {
310    match peek(tokens, pos) {
311        Peek::Eof => Err(Error::UnexpectedEnd { expected: "`}`" }),
312        Peek::Tok(tok) => dispatch_property_list(tokens, pos, tok),
313    }
314}
315
316fn dispatch_property_list(
317    tokens: &[Token],
318    pos: usize,
319    tok: &Token,
320) -> Result<(Vec<(VarName, Expr)>, usize), Error> {
321    match tok.kind() {
322        TokenKind::RBrace => Ok((Vec::new(), pos + 1)),
323        TokenKind::Ident(_) => {
324            let (first, after_first) = parse_property(tokens, pos)?;
325            parse_property_tail(tokens, after_first, vec![first])
326        }
327        TokenKind::Lambda
328        | TokenKind::KwLet
329        | TokenKind::KwIn
330        | TokenKind::KwFix
331        | TokenKind::KwRef
332        | TokenKind::KwExtend
333        | TokenKind::KwThrow
334        | TokenKind::KwTry
335        | TokenKind::KwCatch
336        | TokenKind::Dot
337        | TokenKind::Equals
338        | TokenKind::LParen
339        | TokenKind::RParen
340        | TokenKind::LBrace
341        | TokenKind::Comma
342        | TokenKind::Bang
343        | TokenKind::Assign
344        | TokenKind::Semicolon => Err(Error::UnexpectedToken {
345            at: tok.at(),
346            expected: "property name or `}`",
347            found: format!("{}", tok.kind()),
348        }),
349    }
350}
351
352fn parse_property_tail(
353    tokens: &[Token],
354    pos: usize,
355    acc: Vec<(VarName, Expr)>,
356) -> Result<(Vec<(VarName, Expr)>, usize), Error> {
357    match peek(tokens, pos) {
358        Peek::Eof => Err(Error::UnexpectedEnd {
359            expected: "`,` or `}`",
360        }),
361        Peek::Tok(tok) => match tok.kind() {
362            TokenKind::Comma => {
363                let (next, after_next) = parse_property(tokens, pos + 1)?;
364                parse_property_tail(tokens, after_next, push_property(acc, next))
365            }
366            TokenKind::RBrace => Ok((acc, pos + 1)),
367            TokenKind::Ident(_)
368            | TokenKind::Lambda
369            | TokenKind::KwLet
370            | TokenKind::KwIn
371            | TokenKind::KwFix
372            | TokenKind::KwRef
373            | TokenKind::KwExtend
374            | TokenKind::KwThrow
375            | TokenKind::KwTry
376            | TokenKind::KwCatch
377            | TokenKind::Dot
378            | TokenKind::Equals
379            | TokenKind::LParen
380            | TokenKind::LBrace
381            | TokenKind::RParen
382            | TokenKind::Bang
383            | TokenKind::Assign
384            | TokenKind::Semicolon => Err(Error::UnexpectedToken {
385                at: tok.at(),
386                expected: "`,` or `}`",
387                found: format!("{}", tok.kind()),
388            }),
389        },
390    }
391}
392
393fn parse_property(tokens: &[Token], pos: usize) -> Result<((VarName, Expr), usize), Error> {
394    let (name, after_name) = expect_ident(tokens, pos)?;
395    let after_eq = expect_kind(tokens, after_name, &TokenKind::Equals, "`=`")?;
396    let (value, after_value) = parse_right_expr(tokens, after_eq)?;
397    Ok(((name, value), after_value))
398}
399
400fn push_property(acc: Vec<(VarName, Expr)>, entry: (VarName, Expr)) -> Vec<(VarName, Expr)> {
401    acc.into_iter().chain(std::iter::once(entry)).collect()
402}
403
404fn expect_ident(tokens: &[Token], pos: usize) -> Result<(VarName, usize), Error> {
405    match peek(tokens, pos) {
406        Peek::Eof => Err(Error::UnexpectedEnd {
407            expected: "identifier",
408        }),
409        Peek::Tok(tok) => dispatch_expect_ident(tok, pos),
410    }
411}
412
413fn dispatch_expect_ident(tok: &Token, pos: usize) -> Result<(VarName, usize), Error> {
414    match tok.kind() {
415        TokenKind::Ident(name) => Ok((name.clone(), pos + 1)),
416        TokenKind::Lambda
417        | TokenKind::KwLet
418        | TokenKind::KwIn
419        | TokenKind::KwFix
420        | TokenKind::KwRef
421        | TokenKind::KwExtend
422        | TokenKind::KwThrow
423        | TokenKind::KwTry
424        | TokenKind::KwCatch
425        | TokenKind::Dot
426        | TokenKind::Equals
427        | TokenKind::LParen
428        | TokenKind::RParen
429        | TokenKind::LBrace
430        | TokenKind::RBrace
431        | TokenKind::Comma
432        | TokenKind::Bang
433        | TokenKind::Assign
434        | TokenKind::Semicolon => Err(Error::UnexpectedToken {
435            at: tok.at(),
436            expected: "identifier",
437            found: format!("{}", tok.kind()),
438        }),
439    }
440}
441
442fn expect_kind(
443    tokens: &[Token],
444    pos: usize,
445    expected: &TokenKind,
446    name: &'static str,
447) -> Result<usize, Error> {
448    match peek(tokens, pos) {
449        Peek::Eof => Err(Error::UnexpectedEnd { expected: name }),
450        Peek::Tok(tok) => {
451            let kind_matches = tok.kind() == expected;
452            if kind_matches {
453                Ok(pos + 1)
454            } else {
455                Err(Error::UnexpectedToken {
456                    at: tok.at(),
457                    expected: name,
458                    found: format!("{}", tok.kind()),
459                })
460            }
461        }
462    }
463}
464
465#[cfg(test)]
466mod tests {
467    use super::*;
468    use crate::lexer::lex;
469
470    fn parse_str(src: &str) -> Result<Expr, Error> {
471        let tokens = lex(src)?;
472        parse(&tokens)
473    }
474
475    #[test]
476    fn empty_object_literal() -> Result<(), Error> {
477        let e = parse_str("{}")?;
478        let expected = Expr::object(vec![]);
479        (e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
480            expected: "empty-object AST",
481        })
482    }
483
484    #[test]
485    fn object_with_one_property() -> Result<(), Error> {
486        let e = parse_str("{ foo = \\x. x }")?;
487        let expected = Expr::object(vec![(VarName::from("foo"), Expr::lam("x", Expr::var("x")))]);
488        (e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
489            expected: "one-property AST",
490        })
491    }
492
493    #[test]
494    fn field_access_chain() -> Result<(), Error> {
495        let e = parse_str("a.b.c")?;
496        let expected = Expr::field(Expr::field(Expr::var("a"), "b"), "c");
497        (e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
498            expected: "field chain AST",
499        })
500    }
501
502    #[test]
503    fn extend_with_properties() -> Result<(), Error> {
504        let e = parse_str("extend p { foo = \\x. x }")?;
505        let expected = Expr::object_with_proto(
506            vec![(VarName::from("foo"), Expr::lam("x", Expr::var("x")))],
507            Expr::var("p"),
508        );
509        (e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
510            expected: "extend AST",
511        })
512    }
513
514    #[test]
515    fn throw_expr() -> Result<(), Error> {
516        let e = parse_str("throw x")?;
517        let expected = Expr::throw(Expr::var("x"));
518        (e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
519            expected: "throw AST",
520        })
521    }
522
523    #[test]
524    fn try_catch_expr() -> Result<(), Error> {
525        let e = parse_str("try x catch e. e")?;
526        let expected = Expr::try_catch(Expr::var("x"), "e", Expr::var("e"));
527        (e == expected).then_some(()).ok_or(Error::UnexpectedEnd {
528            expected: "try/catch AST",
529        })
530    }
531}