1#![deny(missing_docs)]
44
45mod join_query;
46
47pub use join_query::{JoinQuery, Predicate, Term};
48use winnow::{
49 ascii::multispace0,
50 combinator::{delimited, separated},
51 error::{ContextError, ErrMode},
52 token::take_while,
53 Parser,
54};
55
56type PResult<T> = Result<T, winnow::error::ErrMode<ContextError>>;
57
58fn ws(input: &mut &str) -> PResult<()> {
59 let _: &str = multispace0.parse_next(input)?;
60 Ok(())
61}
62
63fn ident(input: &mut &str) -> PResult<String> {
64 let start = *input;
65 take_while(1.., |c: char| c.is_ascii_alphabetic()).parse_next(input)?;
66 take_while(0.., |c: char| c.is_ascii_alphanumeric() || c == '_').parse_next(input)?;
67 let end = *input;
68 let len = start.len() - end.len();
69 Ok(start[..len].to_string())
70}
71
72fn comma(input: &mut &str) -> PResult<char> { delimited(ws, ',', ws).parse_next(input) }
73
74fn dot(input: &mut &str) -> PResult<char> { delimited(ws, '.', ws).parse_next(input) }
75
76fn term(input: &mut &str) -> PResult<Term> {
78 if input.starts_with('_')
79 && (input.len() == 1
80 || !input
81 .chars()
82 .nth(1)
83 .is_some_and(|c| c.is_ascii_alphanumeric()))
84 {
85 let _ = '_'.parse_next(input)?;
86 return Ok(Term::Placeholder);
87 }
88
89 let name = ident.parse_next(input)?;
90
91 let is_var = name
92 .chars()
93 .next()
94 .map(|c| c.is_ascii_uppercase())
95 .unwrap_or(false);
96
97 Ok(if is_var {
98 Term::Var(name)
99 } else {
100 Term::Atom(name)
101 })
102}
103
104fn term_list(input: &mut &str) -> PResult<Vec<Term>> {
105 delimited(
106 delimited(ws, '(', ws),
107 separated(1.., term, comma),
108 delimited(ws, ')', ws),
109 )
110 .parse_next(input)
111}
112
113fn predicate(input: &mut &str) -> PResult<Predicate> {
114 ws.parse_next(input)?;
115 let name = ident.parse_next(input)?;
116 let terms = term_list.parse_next(input)?;
117 Ok(Predicate {
118 name,
119 terms,
120 })
121}
122
123fn query(input: &mut &str) -> PResult<JoinQuery> {
124 let head = predicate.parse_next(input)?;
125 let _ = delimited(ws, ":-", ws).parse_next(input)?;
127 let body = separated(1.., predicate, comma).parse_next(input)?;
128 let _ = dot.parse_next(input)?;
129 Ok(JoinQuery {
130 head,
131 body,
132 })
133}
134
135impl std::str::FromStr for JoinQuery {
136 type Err = ErrMode<ContextError>;
137
138 fn from_str(s: &str) -> Result<Self, Self::Err> {
147 let mut input = s;
148 let result = query.parse_next(&mut input)?;
149 ws.parse_next(&mut input)?;
150 if !input.is_empty() {
151 return Err(ErrMode::Backtrack(ContextError::new()));
152 }
153 Ok(result)
154 }
155}
156
157#[cfg(test)]
160mod tests {
161 use super::*;
162
163 #[test]
164 fn test_query_parser() {
165 let mut src = "P(A, C) :- Q(A, B), R(B, C).";
166 match query.parse_next(&mut src) {
167 | Ok(ast) => {
168 println!("{ast:#?}");
169 let _ = ws.parse_next(&mut src);
171 if !src.is_empty() {
172 eprintln!("Unparsed tail: {src:?}");
173 }
174 assert_eq!(ast.head.name, "P");
175 assert_eq!(ast.head.terms.len(), 2);
176 assert_eq!(ast.body.len(), 2);
177 },
178 | Err(e) => panic!("Parse error: {e:?}"),
179 }
180 }
181
182 #[test]
183 fn test_query_from_str_simple() {
184 let query_str = "P(X) :- Q(X).";
185 let query: JoinQuery = query_str.parse().expect("Failed to parse query");
186
187 assert_eq!(query.head.name, "P");
188 assert_eq!(query.head.terms.len(), 1);
189 assert_eq!(query.head.terms[0], Term::Var("X".to_string()));
190
191 assert_eq!(query.body.len(), 1);
192 assert_eq!(query.body[0].name, "Q");
193 assert_eq!(query.body[0].terms.len(), 1);
194 assert_eq!(query.body[0].terms[0], Term::Var("X".to_string()));
195 }
196
197 #[test]
198 fn test_query_from_str_multiple_body_predicates() {
199 let query_str = "ancestor(X, Z) :- parent(X, Y), parent(Y, Z).";
200 let query: JoinQuery = query_str.parse().expect("Failed to parse query");
201
202 assert_eq!(query.head.name, "ancestor");
203 assert_eq!(query.head.terms.len(), 2);
204 assert_eq!(query.head.terms[0], Term::Var("X".to_string()));
205 assert_eq!(query.head.terms[1], Term::Var("Z".to_string()));
206
207 assert_eq!(query.body.len(), 2);
208 assert_eq!(query.body[0].name, "parent");
209 assert_eq!(query.body[1].name, "parent");
210 }
211
212 #[test]
213 fn test_query_from_str_with_atoms() {
214 let query_str = "likes(alice, X):- food(X), healthy(X).";
215 let query: JoinQuery = query_str.parse().expect("Failed to parse query");
216
217 assert_eq!(query.head.name, "likes");
218 assert_eq!(query.head.terms[0], Term::Atom("alice".to_string()));
219 assert_eq!(query.head.terms[1], Term::Var("X".to_string()));
220
221 assert_eq!(query.body.len(), 2);
222 }
223
224 #[test]
225 fn test_whitespace_variants() {
226 let cases = [
229 ("extra whitespace", " P(X,Y) :- Q(X),R(Y) . "),
230 ("minimal whitespace", "P(X,Y):-Q(X),R(Y)."),
231 ("spaces after commas", "P( X , Y ) :- Q( X ) , R( Y ) ."),
232 ];
233 for (label, input) in cases {
234 let query: JoinQuery = input
235 .parse()
236 .unwrap_or_else(|e| panic!("Failed to parse {label}: {e:?}"));
237 assert_eq!(query.head.name, "P", "{label}");
238 assert_eq!(query.head.terms.len(), 2, "{label}");
239 assert_eq!(query.body.len(), 2, "{label}");
240 }
241 }
242
243 #[test]
244 fn test_query_from_str_complex() {
245 let query_str = "path(X, Z) :- edge(X, Y), edge(Y, Z), vertex(X), vertex(Y), vertex(Z).";
246 let query: JoinQuery = query_str.parse().expect("Failed to parse complex query");
247
248 assert_eq!(query.head.name, "path");
249 assert_eq!(query.head.terms.len(), 2);
250 assert_eq!(query.body.len(), 5);
251
252 assert_eq!(query.body[0].name, "edge");
253 assert_eq!(query.body[1].name, "edge");
254 assert_eq!(query.body[2].name, "vertex");
255 assert_eq!(query.body[3].name, "vertex");
256 assert_eq!(query.body[4].name, "vertex");
257 }
258
259 #[test]
260 fn test_invalid_syntax() {
261 let cases = [
262 ("missing dot", "P(X) :- Q(X)"),
263 ("missing :-", "P(X) Q(X)."),
264 ("empty body", "P(X) :- ."),
265 ("trailing garbage", "P(X) :- Q(X). GARBAGE"),
266 ("two queries", "P(X) :- Q(X). R(Y) :- S(Y)."),
267 ("double dot", "P(X) :- Q(X).."),
268 ];
269 for (label, input) in cases {
270 let result: Result<JoinQuery, _> = input.parse();
271 assert!(result.is_err(), "{label} should fail to parse");
272 }
273 }
274
275 #[test]
276 fn test_query_from_str_with_placeholder() {
277 let query_str = "result(X, _) :- relation(X, _).";
278 let query: JoinQuery = query_str
279 .parse()
280 .expect("Failed to parse query with placeholder");
281
282 assert_eq!(query.head.name, "result");
283 assert_eq!(query.head.terms.len(), 2);
284 assert_eq!(query.head.terms[0], Term::Var("X".to_string()));
285 assert_eq!(query.head.terms[1], Term::Placeholder);
286
287 assert_eq!(query.body.len(), 1);
288 assert_eq!(query.body[0].terms[0], Term::Var("X".to_string()));
289 assert_eq!(query.body[0].terms[1], Term::Placeholder);
290 }
291
292 #[test]
293 fn test_query_from_str_multiple_placeholders() {
294 let query_str = "query(_, _, Z) :- data(_, _, Z).";
295 let query: JoinQuery = query_str
296 .parse()
297 .expect("Failed to parse query with multiple placeholders");
298
299 assert_eq!(query.head.terms[0], Term::Placeholder);
300 assert_eq!(query.head.terms[1], Term::Placeholder);
301 assert_eq!(query.head.terms[2], Term::Var("Z".to_string()));
302
303 assert_eq!(query.body[0].terms[0], Term::Placeholder);
304 assert_eq!(query.body[0].terms[1], Term::Placeholder);
305 assert_eq!(query.body[0].terms[2], Term::Var("Z".to_string()));
306 }
307
308 #[test]
309 fn test_query_from_str_placeholder_with_atoms() {
310 let query_str = "match(alice, _) :- person(alice, _), active(_).";
311 let query: JoinQuery = query_str
312 .parse()
313 .expect("Failed to parse query with placeholders and atoms");
314
315 assert_eq!(query.head.terms[0], Term::Atom("alice".to_string()));
316 assert_eq!(query.head.terms[1], Term::Placeholder);
317
318 assert_eq!(query.body[0].name, "person");
319 assert_eq!(query.body[0].terms[0], Term::Atom("alice".to_string()));
320 assert_eq!(query.body[0].terms[1], Term::Placeholder);
321
322 assert_eq!(query.body[1].name, "active");
323 assert_eq!(query.body[1].terms[0], Term::Placeholder);
324 }
325
326 #[test]
327 fn test_query_from_str_only_placeholders() {
328 let query_str = "any(_,_) :- data(_,_).";
329 let query: JoinQuery = query_str
330 .parse()
331 .expect("Failed to parse query with only placeholders");
332
333 assert_eq!(query.head.terms.len(), 2);
334 assert_eq!(query.head.terms[0], Term::Placeholder);
335 assert_eq!(query.head.terms[1], Term::Placeholder);
336
337 assert_eq!(query.body[0].terms.len(), 2);
338 assert_eq!(query.body[0].terms[0], Term::Placeholder);
339 assert_eq!(query.body[0].terms[1], Term::Placeholder);
340 }
341
342 #[test]
343 fn test_query_from_str_placeholder_mixed() {
344 let query_str = "filter(X, _, bob, Y) :- source(X, _, bob, Y), check(X, Y).";
345 let query: JoinQuery = query_str
346 .parse()
347 .expect("Failed to parse complex query with placeholders");
348
349 assert_eq!(query.head.terms.len(), 4);
350 assert_eq!(query.head.terms[0], Term::Var("X".to_string()));
351 assert_eq!(query.head.terms[1], Term::Placeholder);
352 assert_eq!(query.head.terms[2], Term::Atom("bob".to_string()));
353 assert_eq!(query.head.terms[3], Term::Var("Y".to_string()));
354
355 assert_eq!(query.body[0].terms.len(), 4);
356 assert_eq!(query.body[0].terms[1], Term::Placeholder);
357 assert_eq!(query.body[1].terms.len(), 2);
358 }
359}
360
361#[cfg(test)]
362mod edge_case_tests {
363 use super::*;
364
365 fn parse(s: &str) -> Result<JoinQuery, String> {
366 s.parse::<JoinQuery>().map_err(|e| format!("{e:?}"))
367 }
368
369 #[test]
370 fn identifiers_with_digits() {
371 let q = parse("edge2(X1, Y2) :- node3(X1), link4(X1, Y2).").unwrap();
372 assert_eq!(q.head.name, "edge2");
373 assert_eq!(q.head.terms[0], Term::Var("X1".to_string()));
374 assert_eq!(q.body[0].name, "node3");
375 }
376
377 #[test]
378 fn multiline_query() {
379 let q = parse("P(X, Y) :-\n Q(X),\n R(Y).").unwrap();
380 assert_eq!(q.head.name, "P");
381 assert_eq!(q.body.len(), 2);
382 }
383
384 #[test]
385 fn duplicate_variables() {
386 let q = parse("P(X, X) :- Q(X, X).").unwrap();
387 assert_eq!(q.head.terms[0], Term::Var("X".to_string()));
388 assert_eq!(q.head.terms[1], Term::Var("X".to_string()));
389 }
390
391 #[test]
392 fn single_char_names() {
393 let q = parse("P(X) :- Q(X).").unwrap();
394 assert_eq!(q.head.name, "P");
395 assert_eq!(q.body[0].name, "Q");
396 }
397
398 #[test]
399 fn rejects_empty_input() {
400 assert!(parse("").is_err());
401 }
402
403 #[test]
404 fn rejects_whitespace_only() {
405 assert!(parse(" ").is_err());
406 }
407
408 #[test]
409 fn rejects_underscore_as_predicate_name() {
410 assert!(parse("_(X) :- Q(X).").is_err());
411 }
412
413 #[test]
414 fn rejects_underscore_alpha_as_term() {
415 assert!(parse("P(_abc) :- Q(X).").is_err());
416 }
417
418 #[test]
419 fn rejects_underscore_numeric_as_term() {
420 assert!(parse("P(_123) :- Q(X).").is_err());
421 }
422
423 #[test]
424 fn rejects_unicode_identifier() {
425 assert!(parse("\u{00d1}ame(X) :- Q(X).").is_err());
426 }
427
428 #[test]
429 fn rejects_numeric_start_identifier() {
430 assert!(parse("123abc(X) :- Q(X).").is_err());
431 }
432
433 #[test]
434 fn many_body_predicates() {
435 let body_preds: Vec<String> = (0..20).map(|i| format!("r{i}(X)")).collect();
436 let query_str = format!("P(X) :- {}.", body_preds.join(", "));
437 let q = parse(&query_str).unwrap();
438 assert_eq!(q.body.len(), 20);
439 }
440
441 #[test]
442 fn many_terms_in_predicate() {
443 let vars: Vec<String> = (0..15).map(|i| format!("X{i}")).collect();
444 let terms = vars.join(", ");
445 let query_str = format!("P({terms}) :- Q({terms}).");
446 let q = parse(&query_str).unwrap();
447 assert_eq!(q.head.terms.len(), 15);
448 assert_eq!(q.body[0].terms.len(), 15);
449 }
450}