Skip to main content

kermit_parser/
lib.rs

1//! Datalog query parser for Kermit.
2//!
3//! Parses queries in the form `Head :- Body1, Body2, ... .` into a
4//! [`JoinQuery`] AST. Built on the [winnow](https://docs.rs/winnow) parser
5//! combinator library.
6//!
7//! # Syntax
8//!
9//! - **Variables** start with an uppercase letter: `X`, `Name`
10//! - **Atoms** (constants) start with a lowercase letter: `alice`, `edge`
11//! - **Placeholders** are the anonymous wildcard `_`
12//!
13//! ```text
14//! path(X, Z) :- edge(X, Y), edge(Y, Z).
15//! ```
16//!
17//! # Example
18//!
19//! ```
20//! use kermit_parser::{JoinQuery, Term};
21//!
22//! let query: JoinQuery = "path(X, Z) :- edge(X, Y), edge(Y, Z)."
23//!     .parse()
24//!     .expect("valid Datalog");
25//!
26//! assert_eq!(query.head.name, "path");
27//! assert_eq!(query.head.terms, vec![
28//!     Term::Var("X".into()),
29//!     Term::Var("Z".into()),
30//! ]);
31//! assert_eq!(query.body.len(), 2);
32//! assert_eq!(query.body[0].name, "edge");
33//! ```
34//!
35//! Parsing returns an error if the input is not a well-formed rule:
36//!
37//! ```
38//! use kermit_parser::JoinQuery;
39//!
40//! assert!("not a valid query".parse::<JoinQuery>().is_err());
41//! ```
42
43#![deny(missing_docs)]
44
45mod join_query;
46
47pub use join_query::{JoinQuery, Predicate, Term};
48use winnow::{
49    ascii::multispace0,
50    combinator::{delimited, separated},
51    error::{ContextError, ErrMode},
52    token::take_while,
53    Parser,
54};
55
56type PResult<T> = Result<T, winnow::error::ErrMode<ContextError>>;
57
58fn ws(input: &mut &str) -> PResult<()> {
59    let _: &str = multispace0.parse_next(input)?;
60    Ok(())
61}
62
63fn ident(input: &mut &str) -> PResult<String> {
64    let start = *input;
65    take_while(1.., |c: char| c.is_ascii_alphabetic()).parse_next(input)?;
66    take_while(0.., |c: char| c.is_ascii_alphanumeric() || c == '_').parse_next(input)?;
67    let end = *input;
68    let len = start.len() - end.len();
69    Ok(start[..len].to_string())
70}
71
72fn comma(input: &mut &str) -> PResult<char> { delimited(ws, ',', ws).parse_next(input) }
73
74fn dot(input: &mut &str) -> PResult<char> { delimited(ws, '.', ws).parse_next(input) }
75
76// ---------- term / predicate ----------
77fn term(input: &mut &str) -> PResult<Term> {
78    if input.starts_with('_')
79        && (input.len() == 1
80            || !input
81                .chars()
82                .nth(1)
83                .is_some_and(|c| c.is_ascii_alphanumeric()))
84    {
85        let _ = '_'.parse_next(input)?;
86        return Ok(Term::Placeholder);
87    }
88
89    let name = ident.parse_next(input)?;
90
91    let is_var = name
92        .chars()
93        .next()
94        .map(|c| c.is_ascii_uppercase())
95        .unwrap_or(false);
96
97    Ok(if is_var {
98        Term::Var(name)
99    } else {
100        Term::Atom(name)
101    })
102}
103
104fn term_list(input: &mut &str) -> PResult<Vec<Term>> {
105    delimited(
106        delimited(ws, '(', ws),
107        separated(1.., term, comma),
108        delimited(ws, ')', ws),
109    )
110    .parse_next(input)
111}
112
113fn predicate(input: &mut &str) -> PResult<Predicate> {
114    ws.parse_next(input)?;
115    let name = ident.parse_next(input)?;
116    let terms = term_list.parse_next(input)?;
117    Ok(Predicate {
118        name,
119        terms,
120    })
121}
122
123fn query(input: &mut &str) -> PResult<JoinQuery> {
124    let head = predicate.parse_next(input)?;
125    // ":-" separates head from body
126    let _ = delimited(ws, ":-", ws).parse_next(input)?;
127    let body = separated(1.., predicate, comma).parse_next(input)?;
128    let _ = dot.parse_next(input)?;
129    Ok(JoinQuery {
130        head,
131        body,
132    })
133}
134
135impl std::str::FromStr for JoinQuery {
136    type Err = ErrMode<ContextError>;
137
138    /// Parses a Datalog rule into a [`JoinQuery`].
139    ///
140    /// # Errors
141    ///
142    /// Returns an error if `s` is not a single well-formed rule of the shape
143    /// `Head(…) :- Body1(…), …, BodyN(…).` — including missing `:-`, empty
144    /// body, missing terminating `.`, trailing content after the `.`, or
145    /// identifiers that don't match the [syntax rules](crate).
146    fn from_str(s: &str) -> Result<Self, Self::Err> {
147        let mut input = s;
148        let result = query.parse_next(&mut input)?;
149        ws.parse_next(&mut input)?;
150        if !input.is_empty() {
151            return Err(ErrMode::Backtrack(ContextError::new()));
152        }
153        Ok(result)
154    }
155}
156
157// ---------- demo ----------
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162
163    #[test]
164    fn test_query_parser() {
165        let mut src = "P(A, C) :- Q(A, B), R(B, C).";
166        match query.parse_next(&mut src) {
167            | Ok(ast) => {
168                println!("{ast:#?}");
169                // Remaining input (should be empty or just whitespace)
170                let _ = ws.parse_next(&mut src);
171                if !src.is_empty() {
172                    eprintln!("Unparsed tail: {src:?}");
173                }
174                assert_eq!(ast.head.name, "P");
175                assert_eq!(ast.head.terms.len(), 2);
176                assert_eq!(ast.body.len(), 2);
177            },
178            | Err(e) => panic!("Parse error: {e:?}"),
179        }
180    }
181
182    #[test]
183    fn test_query_from_str_simple() {
184        let query_str = "P(X) :- Q(X).";
185        let query: JoinQuery = query_str.parse().expect("Failed to parse query");
186
187        assert_eq!(query.head.name, "P");
188        assert_eq!(query.head.terms.len(), 1);
189        assert_eq!(query.head.terms[0], Term::Var("X".to_string()));
190
191        assert_eq!(query.body.len(), 1);
192        assert_eq!(query.body[0].name, "Q");
193        assert_eq!(query.body[0].terms.len(), 1);
194        assert_eq!(query.body[0].terms[0], Term::Var("X".to_string()));
195    }
196
197    #[test]
198    fn test_query_from_str_multiple_body_predicates() {
199        let query_str = "ancestor(X, Z) :- parent(X, Y), parent(Y, Z).";
200        let query: JoinQuery = query_str.parse().expect("Failed to parse query");
201
202        assert_eq!(query.head.name, "ancestor");
203        assert_eq!(query.head.terms.len(), 2);
204        assert_eq!(query.head.terms[0], Term::Var("X".to_string()));
205        assert_eq!(query.head.terms[1], Term::Var("Z".to_string()));
206
207        assert_eq!(query.body.len(), 2);
208        assert_eq!(query.body[0].name, "parent");
209        assert_eq!(query.body[1].name, "parent");
210    }
211
212    #[test]
213    fn test_query_from_str_with_atoms() {
214        let query_str = "likes(alice, X):- food(X), healthy(X).";
215        let query: JoinQuery = query_str.parse().expect("Failed to parse query");
216
217        assert_eq!(query.head.name, "likes");
218        assert_eq!(query.head.terms[0], Term::Atom("alice".to_string()));
219        assert_eq!(query.head.terms[1], Term::Var("X".to_string()));
220
221        assert_eq!(query.body.len(), 2);
222    }
223
224    #[test]
225    fn test_whitespace_variants() {
226        // All of these encode "P(X,Y) :- Q(X), R(Y)." with varying whitespace.
227        // The parser should handle all of them identically.
228        let cases = [
229            ("extra whitespace", "  P(X,Y)  :-  Q(X),R(Y)  .  "),
230            ("minimal whitespace", "P(X,Y):-Q(X),R(Y)."),
231            ("spaces after commas", "P( X , Y ) :- Q( X ) , R( Y ) ."),
232        ];
233        for (label, input) in cases {
234            let query: JoinQuery = input
235                .parse()
236                .unwrap_or_else(|e| panic!("Failed to parse {label}: {e:?}"));
237            assert_eq!(query.head.name, "P", "{label}");
238            assert_eq!(query.head.terms.len(), 2, "{label}");
239            assert_eq!(query.body.len(), 2, "{label}");
240        }
241    }
242
243    #[test]
244    fn test_query_from_str_complex() {
245        let query_str = "path(X, Z) :- edge(X, Y), edge(Y, Z), vertex(X), vertex(Y), vertex(Z).";
246        let query: JoinQuery = query_str.parse().expect("Failed to parse complex query");
247
248        assert_eq!(query.head.name, "path");
249        assert_eq!(query.head.terms.len(), 2);
250        assert_eq!(query.body.len(), 5);
251
252        assert_eq!(query.body[0].name, "edge");
253        assert_eq!(query.body[1].name, "edge");
254        assert_eq!(query.body[2].name, "vertex");
255        assert_eq!(query.body[3].name, "vertex");
256        assert_eq!(query.body[4].name, "vertex");
257    }
258
259    #[test]
260    fn test_invalid_syntax() {
261        let cases = [
262            ("missing dot", "P(X) :- Q(X)"),
263            ("missing :-", "P(X) Q(X)."),
264            ("empty body", "P(X) :- ."),
265            ("trailing garbage", "P(X) :- Q(X). GARBAGE"),
266            ("two queries", "P(X) :- Q(X). R(Y) :- S(Y)."),
267            ("double dot", "P(X) :- Q(X).."),
268        ];
269        for (label, input) in cases {
270            let result: Result<JoinQuery, _> = input.parse();
271            assert!(result.is_err(), "{label} should fail to parse");
272        }
273    }
274
275    #[test]
276    fn test_query_from_str_with_placeholder() {
277        let query_str = "result(X, _) :- relation(X, _).";
278        let query: JoinQuery = query_str
279            .parse()
280            .expect("Failed to parse query with placeholder");
281
282        assert_eq!(query.head.name, "result");
283        assert_eq!(query.head.terms.len(), 2);
284        assert_eq!(query.head.terms[0], Term::Var("X".to_string()));
285        assert_eq!(query.head.terms[1], Term::Placeholder);
286
287        assert_eq!(query.body.len(), 1);
288        assert_eq!(query.body[0].terms[0], Term::Var("X".to_string()));
289        assert_eq!(query.body[0].terms[1], Term::Placeholder);
290    }
291
292    #[test]
293    fn test_query_from_str_multiple_placeholders() {
294        let query_str = "query(_, _, Z) :- data(_, _, Z).";
295        let query: JoinQuery = query_str
296            .parse()
297            .expect("Failed to parse query with multiple placeholders");
298
299        assert_eq!(query.head.terms[0], Term::Placeholder);
300        assert_eq!(query.head.terms[1], Term::Placeholder);
301        assert_eq!(query.head.terms[2], Term::Var("Z".to_string()));
302
303        assert_eq!(query.body[0].terms[0], Term::Placeholder);
304        assert_eq!(query.body[0].terms[1], Term::Placeholder);
305        assert_eq!(query.body[0].terms[2], Term::Var("Z".to_string()));
306    }
307
308    #[test]
309    fn test_query_from_str_placeholder_with_atoms() {
310        let query_str = "match(alice, _) :- person(alice, _), active(_).";
311        let query: JoinQuery = query_str
312            .parse()
313            .expect("Failed to parse query with placeholders and atoms");
314
315        assert_eq!(query.head.terms[0], Term::Atom("alice".to_string()));
316        assert_eq!(query.head.terms[1], Term::Placeholder);
317
318        assert_eq!(query.body[0].name, "person");
319        assert_eq!(query.body[0].terms[0], Term::Atom("alice".to_string()));
320        assert_eq!(query.body[0].terms[1], Term::Placeholder);
321
322        assert_eq!(query.body[1].name, "active");
323        assert_eq!(query.body[1].terms[0], Term::Placeholder);
324    }
325
326    #[test]
327    fn test_query_from_str_only_placeholders() {
328        let query_str = "any(_,_) :- data(_,_).";
329        let query: JoinQuery = query_str
330            .parse()
331            .expect("Failed to parse query with only placeholders");
332
333        assert_eq!(query.head.terms.len(), 2);
334        assert_eq!(query.head.terms[0], Term::Placeholder);
335        assert_eq!(query.head.terms[1], Term::Placeholder);
336
337        assert_eq!(query.body[0].terms.len(), 2);
338        assert_eq!(query.body[0].terms[0], Term::Placeholder);
339        assert_eq!(query.body[0].terms[1], Term::Placeholder);
340    }
341
342    #[test]
343    fn test_query_from_str_placeholder_mixed() {
344        let query_str = "filter(X, _, bob, Y) :- source(X, _, bob, Y), check(X, Y).";
345        let query: JoinQuery = query_str
346            .parse()
347            .expect("Failed to parse complex query with placeholders");
348
349        assert_eq!(query.head.terms.len(), 4);
350        assert_eq!(query.head.terms[0], Term::Var("X".to_string()));
351        assert_eq!(query.head.terms[1], Term::Placeholder);
352        assert_eq!(query.head.terms[2], Term::Atom("bob".to_string()));
353        assert_eq!(query.head.terms[3], Term::Var("Y".to_string()));
354
355        assert_eq!(query.body[0].terms.len(), 4);
356        assert_eq!(query.body[0].terms[1], Term::Placeholder);
357        assert_eq!(query.body[1].terms.len(), 2);
358    }
359}
360
361#[cfg(test)]
362mod edge_case_tests {
363    use super::*;
364
365    fn parse(s: &str) -> Result<JoinQuery, String> {
366        s.parse::<JoinQuery>().map_err(|e| format!("{e:?}"))
367    }
368
369    #[test]
370    fn identifiers_with_digits() {
371        let q = parse("edge2(X1, Y2) :- node3(X1), link4(X1, Y2).").unwrap();
372        assert_eq!(q.head.name, "edge2");
373        assert_eq!(q.head.terms[0], Term::Var("X1".to_string()));
374        assert_eq!(q.body[0].name, "node3");
375    }
376
377    #[test]
378    fn multiline_query() {
379        let q = parse("P(X, Y) :-\n  Q(X),\n  R(Y).").unwrap();
380        assert_eq!(q.head.name, "P");
381        assert_eq!(q.body.len(), 2);
382    }
383
384    #[test]
385    fn duplicate_variables() {
386        let q = parse("P(X, X) :- Q(X, X).").unwrap();
387        assert_eq!(q.head.terms[0], Term::Var("X".to_string()));
388        assert_eq!(q.head.terms[1], Term::Var("X".to_string()));
389    }
390
391    #[test]
392    fn single_char_names() {
393        let q = parse("P(X) :- Q(X).").unwrap();
394        assert_eq!(q.head.name, "P");
395        assert_eq!(q.body[0].name, "Q");
396    }
397
398    #[test]
399    fn rejects_empty_input() {
400        assert!(parse("").is_err());
401    }
402
403    #[test]
404    fn rejects_whitespace_only() {
405        assert!(parse("   ").is_err());
406    }
407
408    #[test]
409    fn rejects_underscore_as_predicate_name() {
410        assert!(parse("_(X) :- Q(X).").is_err());
411    }
412
413    #[test]
414    fn rejects_underscore_alpha_as_term() {
415        assert!(parse("P(_abc) :- Q(X).").is_err());
416    }
417
418    #[test]
419    fn rejects_underscore_numeric_as_term() {
420        assert!(parse("P(_123) :- Q(X).").is_err());
421    }
422
423    #[test]
424    fn rejects_unicode_identifier() {
425        assert!(parse("\u{00d1}ame(X) :- Q(X).").is_err());
426    }
427
428    #[test]
429    fn rejects_numeric_start_identifier() {
430        assert!(parse("123abc(X) :- Q(X).").is_err());
431    }
432
433    #[test]
434    fn many_body_predicates() {
435        let body_preds: Vec<String> = (0..20).map(|i| format!("r{i}(X)")).collect();
436        let query_str = format!("P(X) :- {}.", body_preds.join(", "));
437        let q = parse(&query_str).unwrap();
438        assert_eq!(q.body.len(), 20);
439    }
440
441    #[test]
442    fn many_terms_in_predicate() {
443        let vars: Vec<String> = (0..15).map(|i| format!("X{i}")).collect();
444        let terms = vars.join(", ");
445        let query_str = format!("P({terms}) :- Q({terms}).");
446        let q = parse(&query_str).unwrap();
447        assert_eq!(q.head.terms.len(), 15);
448        assert_eq!(q.body[0].terms.len(), 15);
449    }
450}