logru/textual/
parser.rs

1use std::iter::Peekable;
2
3use logos::{Logos, Span, SpannedIter};
4
5use crate::ast::{AppTerm, Query, Rule, Sym, Term, Var, VarScope};
6use crate::universe::SymbolStorage;
7
8use super::lexer::Token;
9
10struct TokenStream<'a> {
11    source: &'a str,
12    lexer: Peekable<SpannedIter<'a, Token>>,
13}
14
15impl<'a> TokenStream<'a> {
16    pub fn new(source: &'a str) -> Self {
17        let lexer = Token::lexer(source).spanned().peekable();
18
19        Self { source, lexer }
20    }
21
22    #[allow(unused)]
23    pub fn peek(&mut self) -> Option<(Result<Token, ()>, Span)> {
24        self.lexer.peek().cloned()
25    }
26
27    pub fn next(&mut self) -> Option<(Result<Token, ()>, Span)> {
28        self.lexer.next()
29    }
30
31    pub fn advance(&mut self) {
32        self.lexer.next();
33    }
34
35    pub fn peek_token(&mut self) -> Option<Result<Token, ()>> {
36        self.lexer.peek().map(|(tok, _)| tok).cloned()
37    }
38
39    #[allow(unused)]
40    pub fn next_token(&mut self) -> Option<Result<Token, ()>> {
41        self.lexer.next().map(|(tok, _)| tok)
42    }
43
44    pub fn slice(&self, span: Span) -> &str {
45        &self.source[span]
46    }
47
48    pub fn eof(&self) -> Span {
49        self.source.len()..self.source.len()
50    }
51}
52
53/// A parse error originating from [`Parser`].
54#[derive(Debug)]
55pub struct ParseError {
56    /// The range in the source text where the error occurred.
57    pub span: Span,
58    /// The type of error that occurred.
59    pub kind: ParseErrorKind,
60}
61
62impl ParseError {
63    pub fn new(span: Span, kind: ParseErrorKind) -> Self {
64        Self { span, kind }
65    }
66}
67
68/// The various types of parse errors reported by [`Parser`].
69#[derive(Debug)]
70pub enum ParseErrorKind {
71    /// The parser reached the end of the input, but expected more tokens to follow.
72    UnexpectedEof,
73    /// The parser encountered a token that doesn't belong in that place.
74    UnexpectedToken(Token),
75    /// The parser encountered input that could not be recognized as a token.
76    UnrecognizedToken,
77    /// The parser encountered more tokens after the input should have ended.
78    ExpectedEof,
79}
80
81impl ParseErrorKind {
82    /// Translate an unexpected item in the token stream (either an unexpected token or a lexer
83    /// error) into the matching [`ParseErrorKind`].
84    pub fn unexpected(res: Result<Token, ()>) -> Self {
85        match res {
86            Ok(tok) => Self::UnexpectedToken(tok),
87            Err(()) => Self::UnrecognizedToken,
88        }
89    }
90}
91
92/// A parser for terms using the Prolog-like syntax of the
93/// [TextualUniverse](super::TextualUniverse).
94pub struct Parser<T: SymbolStorage> {
95    symbols: T,
96}
97
98impl<T: SymbolStorage> Parser<T> {
99    pub fn new(symbols: T) -> Self {
100        Self { symbols }
101    }
102
103    /// The act of parsing creates a new set of symbols which correspond to the parsed queries. This extracts those symbols.
104    pub fn into_symbols(self) -> T {
105        self.symbols
106    }
107
108    // //////////////////////////////// PUBLIC PARSER ////////////////////////////////
109    pub fn parse_query_str(&mut self, query: &str) -> Result<Query, ParseError> {
110        let mut tokens = TokenStream::new(query);
111        let mut scope = VarScope::new();
112        let goals = self.parse_conjunction1(&mut tokens, &mut scope)?;
113        self.expect_eof(&mut tokens)?;
114        Ok(Query::new(goals, Some(scope)))
115    }
116
117    pub fn parse_rule_str(&mut self, rule: &str) -> Result<Rule, ParseError> {
118        let mut tokens = TokenStream::new(rule);
119        let result = self.parse_rule(&mut tokens)?;
120        self.expect_eof(&mut tokens)?;
121        Ok(result)
122    }
123
124    pub fn parse_rules_str(&mut self, rules: &str) -> Result<Vec<Rule>, ParseError> {
125        let mut tokens = TokenStream::new(rules);
126        let mut result = vec![];
127        while tokens.peek_token().is_some() {
128            result.push(self.parse_rule(&mut tokens)?);
129        }
130        Ok(result)
131    }
132
133    // //////////////////////////////// PARSER INTERNALS ////////////////////////////////
134
135    fn parse_rule(&mut self, tokens: &mut TokenStream) -> Result<Rule, ParseError> {
136        let mut scope = VarScope::new();
137        let head = self.parse_appterm(tokens, &mut scope)?;
138        let tail = match tokens.peek_token() {
139            Some(Ok(Token::ImpliedBy)) => {
140                tokens.advance();
141                self.parse_conjunction1(tokens, &mut scope)?
142            }
143            Some(Ok(Token::Period)) => {
144                tokens.advance();
145                Vec::new()
146            }
147            Some(other) => {
148                let (_, span) = tokens.next().unwrap();
149                return Err(ParseError::new(span, ParseErrorKind::unexpected(other)));
150            }
151            None => return Err(ParseError::new(tokens.eof(), ParseErrorKind::UnexpectedEof)),
152        };
153        Ok(Rule {
154            head,
155            tail,
156            scope: Some(scope),
157        })
158    }
159
160    fn parse_conjunction1(
161        &mut self,
162        tokens: &mut TokenStream,
163        scope: &mut VarScope,
164    ) -> Result<Vec<Term>, ParseError> {
165        let mut goals = vec![self.parse_term(tokens, scope)?];
166        loop {
167            match tokens.peek_token() {
168                Some(Ok(Token::Comma)) => {
169                    tokens.advance();
170                    goals.push(self.parse_term(tokens, scope)?);
171                }
172                Some(Ok(Token::Period)) => {
173                    tokens.advance();
174                    break;
175                }
176                Some(other) => {
177                    let (_, span) = tokens.next().unwrap();
178                    return Err(ParseError::new(span, ParseErrorKind::unexpected(other)));
179                }
180                None => return Err(ParseError::new(tokens.eof(), ParseErrorKind::UnexpectedEof)),
181            }
182        }
183        Ok(goals)
184    }
185
186    fn expect_eof(&mut self, tokens: &mut TokenStream) -> Result<(), ParseError> {
187        if let Some((other, span)) = tokens.next() {
188            Err(ParseError::new(span, ParseErrorKind::unexpected(other)))
189        } else {
190            Ok(())
191        }
192    }
193
194    fn expect_token(
195        &mut self,
196        tokens: &mut TokenStream,
197        expected: Token,
198    ) -> Result<Span, ParseError> {
199        if let Some((actual, span)) = tokens.next() {
200            if actual == Ok(expected) {
201                Ok(span)
202            } else {
203                Err(ParseError::new(span, ParseErrorKind::unexpected(actual)))
204            }
205        } else {
206            Err(ParseError::new(tokens.eof(), ParseErrorKind::UnexpectedEof))
207        }
208    }
209
210    fn parse_appterm(
211        &mut self,
212        tokens: &mut TokenStream,
213        scope: &mut VarScope,
214    ) -> Result<AppTerm, ParseError> {
215        let functor = self.parse_symbol(tokens)?;
216        let mut args = vec![];
217        if let Some(Ok(Token::LParen)) = tokens.peek_token() {
218            tokens.advance();
219            loop {
220                args.push(self.parse_term(tokens, scope)?);
221                match tokens.peek_token() {
222                    Some(Ok(Token::Comma)) => {
223                        tokens.advance();
224                    }
225                    Some(Ok(Token::RParen)) => {
226                        tokens.advance();
227                        break;
228                    }
229                    Some(other) => {
230                        let (_, span) = tokens.next().unwrap();
231                        return Err(ParseError::new(span, ParseErrorKind::unexpected(other)));
232                    }
233                    None => {
234                        return Err(ParseError::new(tokens.eof(), ParseErrorKind::UnexpectedEof))
235                    }
236                }
237            }
238        }
239        Ok(AppTerm::new(functor, args))
240    }
241
242    fn parse_term(
243        &mut self,
244        tokens: &mut TokenStream,
245        scope: &mut VarScope,
246    ) -> Result<Term, ParseError> {
247        match tokens.peek_token() {
248            Some(Ok(Token::Variable)) => self.parse_variable(tokens, scope).map(Term::Var),
249            Some(Ok(Token::Wildcard)) => {
250                tokens.advance();
251                Ok(Term::Var(scope.insert_wildcard()))
252            }
253            Some(Ok(Token::Int(i))) => {
254                tokens.advance();
255                Ok(Term::Int(i))
256            }
257            Some(Ok(Token::Cut)) => {
258                tokens.advance();
259                Ok(Term::Cut)
260            }
261            _ => self.parse_appterm(tokens, scope).map(Term::App),
262        }
263    }
264
265    fn parse_symbol(&mut self, tokens: &mut TokenStream) -> Result<Sym, ParseError> {
266        let span = self.expect_token(tokens, Token::Symbol)?;
267        let sym = self.symbols.get_or_insert_named(tokens.slice(span));
268        Ok(sym)
269    }
270
271    fn parse_variable(
272        &mut self,
273        tokens: &mut TokenStream,
274        scope: &mut VarScope,
275    ) -> Result<Var, ParseError> {
276        let span = self.expect_token(tokens, Token::Variable)?;
277        let var = scope.get_or_insert(tokens.slice(span));
278        Ok(var)
279    }
280}
281
282#[cfg(test)]
283mod test {
284    use super::super::pretty;
285    use super::*;
286    use crate::universe::{SymbolOverlay, SymbolStore};
287
288    fn query_roundtrip_test(input: &str) {
289        let ss = SymbolStore::new();
290        let nu = SymbolOverlay::new(&ss);
291        let mut p = Parser::new(nu);
292
293        let q = p.parse_query_str(input).unwrap();
294
295        let symbols = p.into_symbols();
296        let pretty = pretty::Prettifier::new(&symbols);
297        let qs = pretty.query_to_string(&q);
298        assert_eq!(qs, input);
299    }
300
301    #[test]
302    fn query_parsing() {
303        query_roundtrip_test("grandparent(bob, X).");
304        query_roundtrip_test("grandparent(bob, X), female(X).");
305
306        query_roundtrip_test("add(s(s(s(s(z)))), s(s(z)), X).");
307    }
308
309    fn rule_roundtrip_test(input: &str) {
310        let mut nu = SymbolStore::new();
311        let mut p = Parser::new(&mut nu);
312        let q = p.parse_rule_str(input).unwrap();
313
314        let pretty = pretty::Prettifier::new(&nu);
315        let qs = pretty.rule_to_string(&q);
316        assert_eq!(qs, input);
317    }
318
319    #[test]
320    fn rule_parsing() {
321        rule_roundtrip_test("is_natural(z).");
322        rule_roundtrip_test("is_natural(s(X)) :- is_natural(X).");
323        rule_roundtrip_test("grandparent(X, Y) :- parent(X, Z), parent(Z, Y).");
324    }
325
326    #[test]
327    fn comment_parsing() {
328        let mut nu = SymbolStore::new();
329        let mut p = Parser::new(&mut nu);
330        let with_comment = p.parse_rule_str("foo. % example comment").unwrap();
331        let no_comment = p.parse_rule_str("foo.").unwrap();
332        assert_eq!(with_comment, no_comment);
333        let with_comment = p.parse_rule_str("foo. % bar.").unwrap();
334        assert_eq!(with_comment, no_comment);
335
336        let no_comment = p
337            .parse_rules_str(
338                "foo.
339    bar.",
340            )
341            .unwrap();
342        let with_comment = p
343            .parse_rules_str(
344                "foo. % comment
345    bar.",
346            )
347            .unwrap();
348        assert_eq!(with_comment, no_comment);
349        let with_comment = p
350            .parse_rules_str(
351                "%comment
352    foo.
353    bar.",
354            )
355            .unwrap();
356        assert_eq!(with_comment, no_comment);
357    }
358}