dedukti_parse/
lib.rs

1//! Parser for the Dedukti file format
2//!
3//! This crate serves to parse Dedukti theory files.
4//! The syntax of Dedukti theories is documented
5//! [here](https://github.com/Deducteam/Dedukti/blob/aff4500e3c556f32569a016424a37230c44adf05/syntax.bnf).
6//!
7//! One of the main targets of this crate is speed:
8//! An evaluation of an older version of this crate in the article
9//! [Safe, Fast, Concurrent Proof Checking for the lambda-Pi Calculus Modulo Rewriting](https://doi.org/10.1145/3497775.3503683)
10//! showed that this parser can be more than 4x faster than the parser in Dedukti.
11//! This is relevant because the runtime of parsing
12//! can make up for nearly half the total runtime of Dedukti.
13//!
14//! This crate currently does not support the complete Dedukti syntax;
15//! in particular, commands starting with `#`, such as `#EVAL` and `#REQUIRE` are not supported.
16//! Still, the supported subset of the syntax suffices to parse many large proof corpora,
17//! such as those produced from Matita, HOL Light, and Isabelle/HOL.
18//!
19//! # Usage
20//!
21//! ## Lazy / Strict
22//!
23//! This crate supports several modes of operation:
24//!
25//! * [Strict] parsing: The whole content of the file is in memory *before* parsing.
26//! * [Lazy] parsing: The file is read bit by bit *during* parsing.
27//!
28//! Strict parsing of a whole file is faster than lazy parsing; however, it
29//! consumes more memory than lazy parsing and takes longer to get the first command.
30//!
31//! ## Scoping
32//!
33//! One important operation that is performed during parsing is [*scoping*](Scope).
34//! This operation decides how to store symbols occurring in a term.
35//! There are currently two options:
36//!
37//! 1. All symbols are unconditionally stored using strings.
38//! 2. Symbols are distinguished into
39//!    [*atoms*](term::Atom) that are either constants and variables, where
40//!    constants are saved using strings and variables as de Bruijn indices
41//!    (natural numbers that refer to the position of the binder of the variable).
42//!
43//! The first option can use `String` and `&str` as string type.
44//! However, `&str` can be only used in conjunction with strict parsing, because
45//! lazy parsing "forgets" the input string and therefore
46//! does not allow references into the input string.
47//! The second option can be used regardless of strict or lazy parsing.
48//!
49//! ## When to use what?
50//!
51//! * Use lazy parsing if you
52//!   want to wait as little as possible to get each command or
53//!   minimise your memory consumption.
54//! * Use strict parsing if you
55//!   parse a whole file and wish to reduce the total runtime of parsing.
56//! * Store symbols unconditionally using strings if
57//!   you do not care whether a symbol is a variable or not.
58//!   In that case, when doing strict parsing,
59//!   prefer `&str` over `String` as string type to reduce `String` allocations.
60//!
61//! # Example
62//!
63//! ~~~
64//! use dedukti_parse::{term, Command, Error, Lazy, Scoped, Strict, Symb};
65//!
66//! let cmds = r#"
67//!     prop: Type.
68//!     def proof : prop -> Type.
69//! "#;
70//!
71//! // strict parsing with `&str` symbols
72//! let parse = Strict::<_, Symb<&str>, &str>::new(&cmds);
73//! let parse: Result<Vec<_>, _> = parse.collect();
74//! assert_eq!(parse?.len(), 2);
75//!
76//! // strict parsing with atoms
77//! let parse = Strict::<_, term::Atom<Symb<String>>, String>::new(cmds);
78//! let parse: Result<Vec<_>, _> = parse.collect();
79//! assert_eq!(parse?.len(), 2);
80//!
81//! // lazy parsing with `String` symbols
82//! let parse = Lazy::<_, Symb<String>, String>::new(cmds.lines());
83//! let parse: Result<Vec<_>, _> = parse.collect();
84//! assert_eq!(parse?.len(), 2);
85//!
86//! // lazy parsing with atoms
87//! let parse = Lazy::<_, term::Atom<Symb<String>>, String>::new(cmds.lines());
88//! let parse: Result<Vec<_>, _> = parse.collect();
89//! assert_eq!(parse?.len(), 2);
90//!
91//! # Ok::<_, Error>(())
92//! ~~~
93#![no_std]
94
95extern crate alloc;
96
97mod cmd;
98mod lex;
99mod symb;
100pub mod term;
101
102pub use cmd::{Command, Intro, Rule};
103pub use lex::Token;
104pub use symb::Symb;
105pub use term::{Scope, Term};
106
107use alloc::string::{String, ToString};
108use alloc::vec::Vec;
109use core::borrow::Borrow;
110use logos::Logos;
111use term::Ctx;
112
113/// Parse error.
114#[derive(Debug, PartialEq)]
115pub enum Error {
116    Command(cmd::Error),
117    Term(term::Error),
118    ExpectedInput,
119}
120
121/// Strict parser.
122pub struct Strict<'s, S, A, V>
123where
124    Token<S>: Logos<'s>,
125{
126    lexer: logos::Lexer<'s, Token<S>>,
127    ctx: Ctx<A, V>,
128}
129
130/// Lazy parser.
131pub struct Lazy<I, A, V> {
132    lines: I,
133    open_comments: usize,
134    last: String,
135    state: State<String, String, A, V>,
136    ctx: Ctx<A, V>,
137    buf: Vec<Command<String, V, Term<A, V>>>,
138}
139
140impl<'s, A, V> Strict<'s, &'s str, A, V> {
141    /// Initialise the parser with the content of a Dedukti theory.
142    pub fn new(s: &'s str) -> Self {
143        Self {
144            lexer: Token::lexer(s),
145            ctx: Default::default(),
146        }
147    }
148}
149
150impl<I, A, V> Lazy<I, A, V> {
151    /// Initialise the parser with an iterator over lines of a Dedukti theory.
152    pub fn new(lines: I) -> Self {
153        Self {
154            lines,
155            open_comments: 0,
156            last: String::new(),
157            state: State::default(),
158            ctx: Default::default(),
159            buf: Vec::new(),
160        }
161    }
162}
163
164/// Hold the state of both command and term parsers.
165struct State<S, C, A, V> {
166    cmd: cmd::State<C, V, Term<A, V>>,
167    trm: term::State<S, A, V>,
168}
169
170impl<S, C, A, V> State<S, C, A, V> {
171    fn map_symb<T>(self, f: impl FnOnce(S) -> T) -> State<T, C, A, V> {
172        State {
173            cmd: self.cmd,
174            trm: self.trm.map_symb(f),
175        }
176    }
177}
178
179impl<S, C, A, V> Default for State<S, C, A, V> {
180    fn default() -> Self {
181        Self {
182            cmd: cmd::State::Init,
183            trm: term::State::Init,
184        }
185    }
186}
187
188type Open = usize;
189
190impl<S: Into<C> + Into<V>, C, A: Scope<S, V>, V: cmd::Joker> State<S, C, A, V> {
191    fn feed<I>(
192        self,
193        ctx: &mut Ctx<A, V>,
194        token: Token<S>,
195        iter: &mut I,
196    ) -> Result<(Self, Open), Error>
197    where
198        I: Iterator<Item = Token<S>>,
199    {
200        if self.cmd.expects_term() {
201            let iter = &mut core::iter::once(token).chain(iter);
202            match self.trm.parse(ctx, iter) {
203                Ok((trm, None)) => Ok((State { cmd: self.cmd, trm }, 0)),
204                Ok((trm, Some(Token::Comment(o)))) => Ok((State { cmd: self.cmd, trm }, o)),
205                Ok((term::State::ATerm(None, tm), Some(tok))) => {
206                    let cmd = self
207                        .cmd
208                        .apply(ctx.bound_mut(), tm, tok)
209                        .map_err(Error::Command)?;
210                    let trm = term::State::Init;
211                    Ok((State { cmd, trm }, 0))
212                }
213                Ok((_, Some(_tok))) => panic!("unrecognised token"),
214                Err(e) => Err(Error::Term(e)),
215            }
216        } else {
217            assert!(matches!(self.trm, term::State::Init));
218            if let Token::Comment(open) = token {
219                return Ok((self, open));
220            }
221            let cmd = self
222                .cmd
223                .parse(ctx.bound_mut(), token)
224                .map_err(Error::Command)?;
225            Ok((State { cmd, trm: self.trm }, 0))
226        }
227    }
228}
229
230impl<'s, S: Into<V>, A: Scope<S, V>, V: cmd::Joker> Iterator for Strict<'s, S, A, V>
231where
232    Token<S>: Logos<'s>,
233{
234    type Item = Result<Command<S, V, Term<A, V>>, Error>;
235    fn next(&mut self) -> Option<Self::Item> {
236        let mut state = State::<S, S, A, V>::default();
237        let mut token_seen = false;
238
239        while let Some(next) = self.lexer.next() {
240            token_seen = true;
241            match state.feed(&mut self.ctx, next, &mut self.lexer) {
242                Err(e) => return Some(Err(e)),
243                Ok((st, open)) => match st.cmd {
244                    cmd::State::Command(cmd) => return Some(Ok(cmd)),
245                    _ if open > 0 => break,
246                    _ => state = st,
247                },
248            }
249        }
250        token_seen.then(|| Err(Error::ExpectedInput))
251    }
252}
253
254impl<I, A, V> Iterator for Lazy<I, A, V>
255where
256    I: Iterator,
257    I::Item: Borrow<str>,
258    A: for<'a> Scope<&'a str, V>,
259    V: for<'a> From<&'a str> + cmd::Joker,
260{
261    type Item = Result<Command<String, V, Term<A, V>>, Error>;
262    fn next(&mut self) -> Option<Self::Item> {
263        if let Some(next) = self.buf.pop() {
264            return Some(Ok(next));
265        }
266
267        for line in &mut self.lines {
268            let mut lexer = Token::lexer(line.borrow());
269
270            // eat leading open comments
271            self.open_comments = lex::comment(&mut lexer, self.open_comments);
272            if self.open_comments > 0 {
273                continue;
274            }
275
276            let mut state = core::mem::take(&mut self.state).map_symb(|s| {
277                self.last = s;
278                &self.last as &str
279            });
280
281            while let Some(next) = lexer.next() {
282                match state.feed(&mut self.ctx, next, &mut lexer) {
283                    Err(e) => return Some(Err(e)),
284                    Ok((st, open)) => {
285                        state = if let cmd::State::Command(cmd) = st.cmd {
286                            self.buf.push(cmd);
287                            State::default()
288                        } else {
289                            st
290                        };
291                        self.open_comments = open;
292                    }
293                };
294            }
295
296            self.state = state.map_symb(|s| s.to_string());
297
298            if !self.buf.is_empty() {
299                self.buf.reverse();
300                return self.buf.pop().map(Ok);
301            }
302        }
303
304        if self.open_comments > 0 || !matches!(self.state.cmd, cmd::State::Init) {
305            Some(Err(Error::ExpectedInput))
306        } else {
307            None
308        }
309    }
310}
311
312/// A command containing scoped terms.
313pub type Scoped<S> = Command<S, S, Term<term::Atom<Symb<S>>, S>>;
314
315#[cfg(test)]
316impl<'s> Scoped<&'s str> {
317    pub fn parse_str(s: &'s str) -> Result<Self, Error> {
318        Strict::new(s).next().unwrap_or(Err(Error::ExpectedInput))
319    }
320}
321
322#[cfg(test)]
323impl Scoped<String> {
324    pub fn parse_lines<S: Borrow<str>>(lines: impl Iterator<Item = S>) -> Result<Self, Error> {
325        Lazy::new(lines).next().unwrap_or(Err(Error::ExpectedInput))
326    }
327}
328
329#[test]
330fn positive() -> Result<(), Error> {
331    Command::parse_str("prop : Type.")?;
332    Command::parse_str("imp: prop -> prop -> prop.")?;
333    Command::parse_str("def prf: prop -> Type.")?;
334    Command::parse_str("[x, y] prf (imp x y) --> prf x -> prf y.")?;
335    Command::parse_str("thm imp_refl (x: prop) : prf (imp x x) := p: prf x => p.")?;
336    Command::parse_str("[] eq _ _ --> false.")?;
337
338    Command::parse_lines("prop :\nType.".lines())?;
339    Command::parse_lines("imp : prop\n-> prop\n-> prop\n.".lines())?;
340    Command::parse_lines("(; \n ;) prop : Type. (; \n ;)".lines())?;
341    Command::parse_lines("imp : prop (; \n ;) -> (; ;) prop\n-> prop\n(; ;).".lines())?;
342    Ok(())
343}
344
345#[test]
346fn negative() {
347    use cmd::Error::*;
348    let parse_err = |s: &str| match Command::parse_str(s) {
349        Err(Error::Command(e)) => e,
350        _ => panic!("command error expected"),
351    };
352    assert_eq!(parse_err("."), ExpectedCmd);
353    assert_eq!(parse_err("x ->"), ExpectedColon);
354    assert_eq!(parse_err("def :"), ExpectedIdent);
355    assert_eq!(parse_err("def d ->"), ExpectedColonOrColonEq);
356    assert_eq!(parse_err("thm t := tm."), ExpectedColon);
357    assert_eq!(parse_err("thm t :  ty."), ExpectedColonEq);
358    assert_eq!(parse_err("thm t (->"), ExpectedIdent);
359    assert_eq!(parse_err("thm t (x ->"), ExpectedColon);
360    assert_eq!(parse_err("thm t (x : a -->"), ExpectedRPar);
361    assert_eq!(parse_err("[->"), ExpectedCommaOrRBrk);
362    assert_eq!(parse_err("[x ->"), ExpectedCommaOrRBrk);
363    assert_eq!(parse_err("[x] l."), ExpectedLongArrow);
364}