lambda/
lambda.rs

1//! A lambda expression parser.
2//!
3//! This example showcases the Art of the State; that is,
4//! how to use parcours's mutable state.
5//! In this example, we will use mutable state to:
6//! * store currently bound variables,
7//! * record multiple recoverable errors,
8//! * report a single non-recoverable error.
9//!
10//! You can run this example using:
11//!
12//!     cargo run --example lambda
13//!
14//! This opens an interactive prompt, in which
15//! you can try the following examples:
16//! * `|x y| x`
17//! * `|x y| y x`
18//! * `|x y| x (x y)`
19//! * `(|x| x) (|x| x)`
20//! * `|x y  x` <-- fails because `|` is not terminated
21//! * `|x| |y| x`
22//! * `|x| x y z` <-- fails because `y` and `z` are not bound
23//! * `|x x| x
24//! * `|fst snd| fst`
25//! * `|x y| || x`
26//! * `|一 二| 一`
27//! * `|0| 0` <-- fails, because identifiers may not start with a digit
28//! * `+` <-- fails, because this is not a valid term
29//!
30//! Many parsers in this example that do not access the state
31//! are generic over any state `S`, whereas those which access the state
32//! use `State`.
33use parcours::str::{matches, take_while};
34use parcours::{from_fn, lazy, Combinator, Parser};
35
36/// A lambda expression.
37#[derive(Debug)]
38enum Term<'a> {
39    /// Abstraction
40    ///
41    /// This has the shape `|x0 ... xn| t`, where
42    /// `x0` to `xn` may be a (possibly empty) sequence of identifiers, and
43    /// `t` is a term.
44    Abst(Vec<&'a str>, Box<Self>),
45    /// Application
46    ///
47    /// This has the shape `t t1 ... tn`, where
48    /// `t` is a term and
49    /// `t1` to `tn` is a non-empty sequence of terms.
50    Appl(Box<Self>, Vec<Self>),
51    /// Variable
52    ///
53    /// This is a [de Bruijn index](https://en.wikipedia.org/wiki/De_Bruijn_index)
54    /// that points to a variable bound in an abstraction.
55    Var(usize),
56}
57
58/// State of the parser.
59#[derive(Default)]
60struct State<'a> {
61    /// currently bound variables
62    vars: Vec<&'a str>,
63    /// binding errors
64    errs: Vec<Error<'a>>,
65    /// parsing error (only one, namely the first, is recorded)
66    expected: Option<(&'static str, &'a str)>,
67}
68
69#[derive(Debug)]
70enum Error<'a> {
71    UnboundVar(&'a str),
72}
73
74/// Parse whitespace and return it.
75fn space<'a, S>() -> impl Parser<&'a str, S, O = &'a str> + Clone {
76    take_while(|c, _| c.is_ascii_whitespace())
77}
78
79/// Parse an identifier, potentially followed by whitespace.
80///
81/// An identifier consists of characters that are
82/// either not in ASCII (such as 'ä') or that are alphanumeric ASCII.
83/// Furthermore, the first character of an identifier must not be a digit.
84fn ident<'a, S>() -> impl Parser<&'a str, S, O = &'a str> + Clone {
85    let pn = |c: &u8, _: &mut S| !c.is_ascii() || c.is_ascii_alphanumeric();
86    let p0 = |c: char| !c.is_ascii_digit();
87    take_while(pn)
88        .filter(move |s| s.chars().next().map(p0).unwrap_or(false))
89        .then_ignore(space())
90}
91
92/// Parse the given string, potentially followed by whitespace.
93fn token<S>(x: &str) -> impl Parser<&str, S, O = ()> + Clone {
94    matches(x).then_ignore(space())
95}
96
97/// Store the given error `s` if no other error was stored before, then fail.
98///
99/// Here, we use `from_fn` to implement some custom parsing logic
100/// that we cannot express with the normal parser combinators in parcours,
101/// in particular because we access and modify the state here.
102/// Alternatively, we could also implement the `Parser` trait manually,
103/// but this is more verbose than `from_fn`.
104///
105/// We only store an error if no other error was stored before.
106/// This is to prevent cascading errors which might be more confusing than helpful.
107fn expected<'a, O>(s: &'static str) -> impl Parser<&'a str, State<'a>, O = O> + Clone {
108    from_fn(move |input, state: &mut State| {
109        state.expected.get_or_insert((s, input));
110        None
111    })
112}
113
114/// Parse an atomic term, namely either a variable or a term enclosed by parentheses.
115fn atomic<'a>() -> impl Parser<&'a str, State<'a>, O = Term<'a>> + Clone {
116    let var = ident().map_with(|v, state: &mut State<'a>| {
117        // find the de Bruijn index of the variable
118        let db = state.vars.iter().rev().position(|b| *b == v);
119        // if the variable was not bound, then record this as error, but then continue,
120        // because such errors are not fatal parsing errors
121        Term::Var(db.unwrap_or_else(|| {
122            state.errs.push(Error::UnboundVar(v));
123            0
124        }))
125    });
126    // here, we see error reporting in action:
127    // if a term that was opened with a parenthesis is not closed by a parenthesis,
128    // we store an error message and fail
129    var.or(lazy!(term).delimited_by(token("("), token(")").or(expected("closing parenthesis"))))
130}
131
132/// Extend the state with variables, parse with `p`, then remove the variables again.
133fn with_vars<'a, I, P>(vars: Vec<&'a str>, p: P) -> impl Parser<I, State<'a>, O = P::O>
134where
135    P: Parser<I, State<'a>>,
136{
137    from_fn(|input, state: &mut State<'a>| {
138        let vars_len = vars.len();
139        state.vars.extend(vars);
140        let y = p.parse(input, state);
141        state.vars.truncate(state.vars.len() - vars_len);
142        y
143    })
144}
145
146/// Parse a term.
147fn term<'a>() -> impl Parser<&'a str, State<'a>, O = Term<'a>> {
148    let vars = ident()
149        .repeated()
150        .delimited_by(token("|"), token("|").or(expected("identifier or |")));
151    let abst = vars.and_then(|vars: Vec<_>| {
152        // we parse a term with the bound variables put into the state
153        with_vars(vars.clone(), lazy!(term)).map(|t| (Term::Abst(vars, Box::new(t))))
154    });
155    let args = atomic().repeated::<Vec<_>>();
156    let appl = atomic().then(args).map(|(head, args)| {
157        if args.is_empty() {
158            head
159        } else {
160            Term::Appl(Box::new(head), args)
161        }
162    });
163    abst.or(appl).then_ignore(space()).or(expected("term"))
164}
165
166fn handle(input: &str) {
167    let mut state = State::default();
168    let output = term().parse(input, &mut state);
169    println!("{:?}", output);
170    if let Some((e, location)) = state.expected {
171        eprintln!("Error: expected {e} at {location}");
172    }
173    for e in state.errs {
174        match e {
175            Error::UnboundVar(v) => {
176                let offset = v.as_ptr() as usize - input.as_ptr() as usize;
177                println!("Error: unbound variable \"{v}\" at byte {offset}");
178            }
179        }
180    }
181}
182
183fn main() -> std::io::Result<()> {
184    //let input = r#"(|ä y| (ä y) X0) x  "#;
185    std::io::stdin()
186        .lines()
187        .try_for_each(|line| Ok(handle(&line?)))
188}