Skip to main content

kermit_parser/
lib.rs

1//! Datalog query parser for Kermit.
2//!
3//! Parses queries in the form `Head :- Body1, Body2, ... .` into a
4//! [`JoinQuery`] AST. Built on the [winnow](https://docs.rs/winnow) parser
5//! combinator library.
6//!
7//! # Syntax
8//!
9//! - **Variables** start with an uppercase letter: `X`, `Name`
10//! - **Atoms** (constants) start with a lowercase letter: `alice`, `edge`
11//! - **Placeholders** are the anonymous wildcard `_`
12//!
13//! ```text
14//! path(X, Z) :- edge(X, Y), edge(Y, Z).
15//! ```
16
17mod join_query;
18
19pub use join_query::{JoinQuery, Predicate, Term};
20use winnow::{
21    ascii::multispace0,
22    combinator::{delimited, separated},
23    error::{ContextError, ErrMode},
24    token::take_while,
25    Parser,
26};
27
28type PResult<T> = Result<T, winnow::error::ErrMode<ContextError>>;
29
30fn ws(input: &mut &str) -> PResult<()> {
31    let _: &str = multispace0.parse_next(input)?;
32    Ok(())
33}
34
35fn ident(input: &mut &str) -> PResult<String> {
36    let start = *input;
37    take_while(1.., |c: char| c.is_ascii_alphabetic()).parse_next(input)?;
38    take_while(0.., |c: char| c.is_ascii_alphanumeric() || c == '_').parse_next(input)?;
39    let end = *input;
40    let len = start.len() - end.len();
41    Ok(start[..len].to_string())
42}
43
44fn comma(input: &mut &str) -> PResult<char> { delimited(ws, ',', ws).parse_next(input) }
45
46fn dot(input: &mut &str) -> PResult<char> { delimited(ws, '.', ws).parse_next(input) }
47
48// ---------- term / predicate ----------
49fn term(input: &mut &str) -> PResult<Term> {
50    if input.starts_with('_')
51        && (input.len() == 1
52            || !input
53                .chars()
54                .nth(1)
55                .is_some_and(|c| c.is_ascii_alphanumeric()))
56    {
57        let _ = '_'.parse_next(input)?;
58        return Ok(Term::Placeholder);
59    }
60
61    let name = ident.parse_next(input)?;
62
63    let is_var = name
64        .chars()
65        .next()
66        .map(|c| c.is_ascii_uppercase())
67        .unwrap_or(false);
68
69    Ok(if is_var {
70        Term::Var(name)
71    } else {
72        Term::Atom(name)
73    })
74}
75
76fn term_list(input: &mut &str) -> PResult<Vec<Term>> {
77    delimited(
78        delimited(ws, '(', ws),
79        separated(1.., term, comma),
80        delimited(ws, ')', ws),
81    )
82    .parse_next(input)
83}
84
85fn predicate(input: &mut &str) -> PResult<Predicate> {
86    ws.parse_next(input)?;
87    let name = ident.parse_next(input)?;
88    let terms = term_list.parse_next(input)?;
89    Ok(Predicate {
90        name,
91        terms,
92    })
93}
94
95fn query(input: &mut &str) -> PResult<JoinQuery> {
96    let head = predicate.parse_next(input)?;
97    // ":-" separates head from body
98    let _ = delimited(ws, ":-", ws).parse_next(input)?;
99    let body = separated(1.., predicate, comma).parse_next(input)?;
100    let _ = dot.parse_next(input)?;
101    Ok(JoinQuery {
102        head,
103        body,
104    })
105}
106
107impl std::str::FromStr for JoinQuery {
108    type Err = ErrMode<ContextError>;
109
110    fn from_str(s: &str) -> Result<Self, Self::Err> {
111        let mut input = s;
112        let result = query.parse_next(&mut input)?;
113        ws.parse_next(&mut input)?;
114        if !input.is_empty() {
115            return Err(ErrMode::Backtrack(ContextError::new()));
116        }
117        Ok(result)
118    }
119}
120
121// ---------- demo ----------
122
123#[cfg(test)]
124mod tests {
125    use super::*;
126
127    #[test]
128    fn test_query_parser() {
129        let mut src = "P(A, C) :- Q(A, B), R(B, C).";
130        match query.parse_next(&mut src) {
131            | Ok(ast) => {
132                println!("{ast:#?}");
133                // Remaining input (should be empty or just whitespace)
134                let _ = ws.parse_next(&mut src);
135                if !src.is_empty() {
136                    eprintln!("Unparsed tail: {src:?}");
137                }
138                assert_eq!(ast.head.name, "P");
139                assert_eq!(ast.head.terms.len(), 2);
140                assert_eq!(ast.body.len(), 2);
141            },
142            | Err(e) => panic!("Parse error: {e:?}"),
143        }
144    }
145
146    #[test]
147    fn test_query_from_str_simple() {
148        let query_str = "P(X) :- Q(X).";
149        let query: JoinQuery = query_str.parse().expect("Failed to parse query");
150
151        assert_eq!(query.head.name, "P");
152        assert_eq!(query.head.terms.len(), 1);
153        assert_eq!(query.head.terms[0], Term::Var("X".to_string()));
154
155        assert_eq!(query.body.len(), 1);
156        assert_eq!(query.body[0].name, "Q");
157        assert_eq!(query.body[0].terms.len(), 1);
158        assert_eq!(query.body[0].terms[0], Term::Var("X".to_string()));
159    }
160
161    #[test]
162    fn test_query_from_str_multiple_body_predicates() {
163        let query_str = "ancestor(X, Z) :- parent(X, Y), parent(Y, Z).";
164        let query: JoinQuery = query_str.parse().expect("Failed to parse query");
165
166        assert_eq!(query.head.name, "ancestor");
167        assert_eq!(query.head.terms.len(), 2);
168        assert_eq!(query.head.terms[0], Term::Var("X".to_string()));
169        assert_eq!(query.head.terms[1], Term::Var("Z".to_string()));
170
171        assert_eq!(query.body.len(), 2);
172        assert_eq!(query.body[0].name, "parent");
173        assert_eq!(query.body[1].name, "parent");
174    }
175
176    #[test]
177    fn test_query_from_str_with_atoms() {
178        let query_str = "likes(alice, X):- food(X), healthy(X).";
179        let query: JoinQuery = query_str.parse().expect("Failed to parse query");
180
181        assert_eq!(query.head.name, "likes");
182        assert_eq!(query.head.terms[0], Term::Atom("alice".to_string()));
183        assert_eq!(query.head.terms[1], Term::Var("X".to_string()));
184
185        assert_eq!(query.body.len(), 2);
186    }
187
188    #[test]
189    fn test_whitespace_variants() {
190        // All of these encode "P(X,Y) :- Q(X), R(Y)." with varying whitespace.
191        // The parser should handle all of them identically.
192        let cases = [
193            ("extra whitespace", "  P(X,Y)  :-  Q(X),R(Y)  .  "),
194            ("minimal whitespace", "P(X,Y):-Q(X),R(Y)."),
195            ("spaces after commas", "P( X , Y ) :- Q( X ) , R( Y ) ."),
196        ];
197        for (label, input) in cases {
198            let query: JoinQuery = input
199                .parse()
200                .unwrap_or_else(|e| panic!("Failed to parse {label}: {e:?}"));
201            assert_eq!(query.head.name, "P", "{label}");
202            assert_eq!(query.head.terms.len(), 2, "{label}");
203            assert_eq!(query.body.len(), 2, "{label}");
204        }
205    }
206
207    #[test]
208    fn test_query_from_str_complex() {
209        let query_str = "path(X, Z) :- edge(X, Y), edge(Y, Z), vertex(X), vertex(Y), vertex(Z).";
210        let query: JoinQuery = query_str.parse().expect("Failed to parse complex query");
211
212        assert_eq!(query.head.name, "path");
213        assert_eq!(query.head.terms.len(), 2);
214        assert_eq!(query.body.len(), 5);
215
216        assert_eq!(query.body[0].name, "edge");
217        assert_eq!(query.body[1].name, "edge");
218        assert_eq!(query.body[2].name, "vertex");
219        assert_eq!(query.body[3].name, "vertex");
220        assert_eq!(query.body[4].name, "vertex");
221    }
222
223    #[test]
224    fn test_invalid_syntax() {
225        let cases = [
226            ("missing dot", "P(X) :- Q(X)"),
227            ("missing :-", "P(X) Q(X)."),
228            ("empty body", "P(X) :- ."),
229            ("trailing garbage", "P(X) :- Q(X). GARBAGE"),
230            ("two queries", "P(X) :- Q(X). R(Y) :- S(Y)."),
231            ("double dot", "P(X) :- Q(X).."),
232        ];
233        for (label, input) in cases {
234            let result: Result<JoinQuery, _> = input.parse();
235            assert!(result.is_err(), "{label} should fail to parse");
236        }
237    }
238
239    #[test]
240    fn test_query_from_str_with_placeholder() {
241        let query_str = "result(X, _) :- relation(X, _).";
242        let query: JoinQuery = query_str
243            .parse()
244            .expect("Failed to parse query with placeholder");
245
246        assert_eq!(query.head.name, "result");
247        assert_eq!(query.head.terms.len(), 2);
248        assert_eq!(query.head.terms[0], Term::Var("X".to_string()));
249        assert_eq!(query.head.terms[1], Term::Placeholder);
250
251        assert_eq!(query.body.len(), 1);
252        assert_eq!(query.body[0].terms[0], Term::Var("X".to_string()));
253        assert_eq!(query.body[0].terms[1], Term::Placeholder);
254    }
255
256    #[test]
257    fn test_query_from_str_multiple_placeholders() {
258        let query_str = "query(_, _, Z) :- data(_, _, Z).";
259        let query: JoinQuery = query_str
260            .parse()
261            .expect("Failed to parse query with multiple placeholders");
262
263        assert_eq!(query.head.terms[0], Term::Placeholder);
264        assert_eq!(query.head.terms[1], Term::Placeholder);
265        assert_eq!(query.head.terms[2], Term::Var("Z".to_string()));
266
267        assert_eq!(query.body[0].terms[0], Term::Placeholder);
268        assert_eq!(query.body[0].terms[1], Term::Placeholder);
269        assert_eq!(query.body[0].terms[2], Term::Var("Z".to_string()));
270    }
271
272    #[test]
273    fn test_query_from_str_placeholder_with_atoms() {
274        let query_str = "match(alice, _) :- person(alice, _), active(_).";
275        let query: JoinQuery = query_str
276            .parse()
277            .expect("Failed to parse query with placeholders and atoms");
278
279        assert_eq!(query.head.terms[0], Term::Atom("alice".to_string()));
280        assert_eq!(query.head.terms[1], Term::Placeholder);
281
282        assert_eq!(query.body[0].name, "person");
283        assert_eq!(query.body[0].terms[0], Term::Atom("alice".to_string()));
284        assert_eq!(query.body[0].terms[1], Term::Placeholder);
285
286        assert_eq!(query.body[1].name, "active");
287        assert_eq!(query.body[1].terms[0], Term::Placeholder);
288    }
289
290    #[test]
291    fn test_query_from_str_only_placeholders() {
292        let query_str = "any(_,_) :- data(_,_).";
293        let query: JoinQuery = query_str
294            .parse()
295            .expect("Failed to parse query with only placeholders");
296
297        assert_eq!(query.head.terms.len(), 2);
298        assert_eq!(query.head.terms[0], Term::Placeholder);
299        assert_eq!(query.head.terms[1], Term::Placeholder);
300
301        assert_eq!(query.body[0].terms.len(), 2);
302        assert_eq!(query.body[0].terms[0], Term::Placeholder);
303        assert_eq!(query.body[0].terms[1], Term::Placeholder);
304    }
305
306    #[test]
307    fn test_query_from_str_placeholder_mixed() {
308        let query_str = "filter(X, _, bob, Y) :- source(X, _, bob, Y), check(X, Y).";
309        let query: JoinQuery = query_str
310            .parse()
311            .expect("Failed to parse complex query with placeholders");
312
313        assert_eq!(query.head.terms.len(), 4);
314        assert_eq!(query.head.terms[0], Term::Var("X".to_string()));
315        assert_eq!(query.head.terms[1], Term::Placeholder);
316        assert_eq!(query.head.terms[2], Term::Atom("bob".to_string()));
317        assert_eq!(query.head.terms[3], Term::Var("Y".to_string()));
318
319        assert_eq!(query.body[0].terms.len(), 4);
320        assert_eq!(query.body[0].terms[1], Term::Placeholder);
321        assert_eq!(query.body[1].terms.len(), 2);
322    }
323}
324
325#[cfg(test)]
326mod edge_case_tests {
327    use super::*;
328
329    fn parse(s: &str) -> Result<JoinQuery, String> {
330        s.parse::<JoinQuery>().map_err(|e| format!("{e:?}"))
331    }
332
333    #[test]
334    fn identifiers_with_digits() {
335        let q = parse("edge2(X1, Y2) :- node3(X1), link4(X1, Y2).").unwrap();
336        assert_eq!(q.head.name, "edge2");
337        assert_eq!(q.head.terms[0], Term::Var("X1".to_string()));
338        assert_eq!(q.body[0].name, "node3");
339    }
340
341    #[test]
342    fn multiline_query() {
343        let q = parse("P(X, Y) :-\n  Q(X),\n  R(Y).").unwrap();
344        assert_eq!(q.head.name, "P");
345        assert_eq!(q.body.len(), 2);
346    }
347
348    #[test]
349    fn duplicate_variables() {
350        let q = parse("P(X, X) :- Q(X, X).").unwrap();
351        assert_eq!(q.head.terms[0], Term::Var("X".to_string()));
352        assert_eq!(q.head.terms[1], Term::Var("X".to_string()));
353    }
354
355    #[test]
356    fn single_char_names() {
357        let q = parse("P(X) :- Q(X).").unwrap();
358        assert_eq!(q.head.name, "P");
359        assert_eq!(q.body[0].name, "Q");
360    }
361
362    #[test]
363    fn rejects_empty_input() {
364        assert!(parse("").is_err());
365    }
366
367    #[test]
368    fn rejects_whitespace_only() {
369        assert!(parse("   ").is_err());
370    }
371
372    #[test]
373    fn rejects_underscore_as_predicate_name() {
374        assert!(parse("_(X) :- Q(X).").is_err());
375    }
376
377    #[test]
378    fn rejects_underscore_alpha_as_term() {
379        assert!(parse("P(_abc) :- Q(X).").is_err());
380    }
381
382    #[test]
383    fn rejects_underscore_numeric_as_term() {
384        assert!(parse("P(_123) :- Q(X).").is_err());
385    }
386
387    #[test]
388    fn rejects_unicode_identifier() {
389        assert!(parse("\u{00d1}ame(X) :- Q(X).").is_err());
390    }
391
392    #[test]
393    fn rejects_numeric_start_identifier() {
394        assert!(parse("123abc(X) :- Q(X).").is_err());
395    }
396
397    #[test]
398    fn many_body_predicates() {
399        let body_preds: Vec<String> = (0..20).map(|i| format!("r{i}(X)")).collect();
400        let query_str = format!("P(X) :- {}.", body_preds.join(", "));
401        let q = parse(&query_str).unwrap();
402        assert_eq!(q.body.len(), 20);
403    }
404
405    #[test]
406    fn many_terms_in_predicate() {
407        let vars: Vec<String> = (0..15).map(|i| format!("X{i}")).collect();
408        let terms = vars.join(", ");
409        let query_str = format!("P({terms}) :- Q({terms}).");
410        let q = parse(&query_str).unwrap();
411        assert_eq!(q.head.terms.len(), 15);
412        assert_eq!(q.body[0].terms.len(), 15);
413    }
414}