1use pest::Parser;
38use pest::iterators::{Pair, Pairs};
39use pest_derive::Parser as PestParser;
40
41use crate::error::{Result, VaultdbError};
42use crate::query::{CompareOp, Expr, Predicate};
43use crate::record::Value;
44
45#[derive(PestParser)]
46#[grammar = "where_dsl.pest"]
47struct WhereParser;
48
49pub(crate) fn parse(input: &str) -> Result<Expr> {
51 let trimmed = input.trim();
52 if trimmed.is_empty() {
53 return Err(VaultdbError::InvalidWhereExpr(
54 "where expression is empty".into(),
55 ));
56 }
57
58 let mut pairs = WhereParser::parse(Rule::expr_root, trimmed)
59 .map_err(|e| VaultdbError::InvalidWhereExpr(format!("{}", e)))?;
60
61 let expr_pair = pairs
64 .next()
65 .ok_or_else(|| VaultdbError::InvalidWhereExpr("parser returned no expression".into()))?;
66 lower_expr(expr_pair)
67}
68
69fn lower_expr(pair: Pair<Rule>) -> Result<Expr> {
70 match pair.as_rule() {
71 Rule::expr => lower_expr(only_child(pair)?),
72 Rule::or_expr => lower_or(pair.into_inner()),
73 Rule::and_expr => lower_and(pair.into_inner()),
74 Rule::not_expr => lower_not(pair.into_inner()),
75 Rule::atom => lower_expr(only_child(pair)?),
76 Rule::paren_expr => {
77 let inner = pair.into_inner().next().ok_or_else(|| {
80 VaultdbError::InvalidWhereExpr("empty parenthesised expression".into())
81 })?;
82 lower_expr(inner)
83 }
84 Rule::predicate => lower_predicate(only_child(pair)?),
85 other => Err(VaultdbError::InvalidWhereExpr(format!(
86 "unexpected grammar node: {:?}",
87 other
88 ))),
89 }
90}
91
92fn lower_or(pairs: Pairs<Rule>) -> Result<Expr> {
93 let exprs: Vec<Expr> = pairs.map(lower_expr).collect::<Result<Vec<_>>>()?;
94 Ok(if exprs.len() == 1 {
95 exprs.into_iter().next().unwrap()
96 } else {
97 Expr::Or(exprs)
98 })
99}
100
101fn lower_and(pairs: Pairs<Rule>) -> Result<Expr> {
102 let exprs: Vec<Expr> = pairs.map(lower_expr).collect::<Result<Vec<_>>>()?;
103 Ok(if exprs.len() == 1 {
104 exprs.into_iter().next().unwrap()
105 } else {
106 Expr::And(exprs)
107 })
108}
109
110fn lower_not(mut pairs: Pairs<Rule>) -> Result<Expr> {
111 let first = pairs
116 .next()
117 .ok_or_else(|| VaultdbError::InvalidWhereExpr("empty not_expr".into()))?;
118 match first.as_rule() {
119 Rule::atom => lower_expr(first),
120 Rule::not_word => {
121 let operand = pairs
123 .next()
124 .ok_or_else(|| VaultdbError::InvalidWhereExpr("NOT without operand".into()))?;
125 let inner = lower_expr(operand)?;
126 Ok(Expr::Not(Box::new(inner)))
127 }
128 other => Err(VaultdbError::InvalidWhereExpr(format!(
129 "unexpected child of not_expr: {:?}",
130 other
131 ))),
132 }
133}
134
135fn lower_predicate(pair: Pair<Rule>) -> Result<Expr> {
136 match pair.as_rule() {
137 Rule::in_predicate => lower_in(pair),
138 Rule::is_null_predicate => lower_is_null(pair),
139 Rule::regex_predicate => lower_regex(pair),
140 Rule::binary_predicate => lower_binary(pair),
141 Rule::exists_predicate => {
142 lower_is_null(pair)
146 }
147 other => Err(VaultdbError::InvalidWhereExpr(format!(
148 "unexpected predicate variant: {:?}",
149 other
150 ))),
151 }
152}
153
154fn lower_in(pair: Pair<Rule>) -> Result<Expr> {
155 let mut inner = pair.into_inner();
156 let field = read_field(next_pair(&mut inner)?)?;
157 let in_op = next_pair(&mut inner)?;
158 let negated = matches!(only_child(in_op)?.as_rule(), Rule::not_in_kw);
159 let value_list_pair = next_pair(&mut inner)?;
160 let values: Vec<Value> = value_list_pair
161 .into_inner()
162 .map(read_value)
163 .collect::<Result<Vec<_>>>()?;
164
165 if values.is_empty() {
166 return Err(VaultdbError::InvalidWhereExpr(format!(
167 "IN list for field '{}' is empty",
168 field
169 )));
170 }
171
172 let alternatives: Vec<Expr> = values
174 .into_iter()
175 .map(|v| {
176 Expr::Predicate(Predicate::Equals {
177 field: field.clone(),
178 value: v,
179 })
180 })
181 .collect();
182 let union = if alternatives.len() == 1 {
183 alternatives.into_iter().next().unwrap()
184 } else {
185 Expr::Or(alternatives)
186 };
187 Ok(if negated {
188 Expr::Not(Box::new(union))
189 } else {
190 union
191 })
192}
193
194fn lower_is_null(pair: Pair<Rule>) -> Result<Expr> {
195 let mut inner = pair.into_inner();
196 let field = read_field(next_pair(&mut inner)?)?;
197 let op_pair = next_pair(&mut inner)?;
198 let op_kind = only_child(op_pair)?;
199 let predicate = match op_kind.as_rule() {
200 Rule::is_null_kw | Rule::missing_kw => Predicate::Missing { field },
201 Rule::is_not_null_kw | Rule::exists_kw => Predicate::Exists { field },
202 Rule::not_missing_kw => Predicate::Exists { field },
203 Rule::not_exists_kw => Predicate::Missing { field },
204 other => {
205 return Err(VaultdbError::InvalidWhereExpr(format!(
206 "unexpected null/exists op: {:?}",
207 other
208 )));
209 }
210 };
211 Ok(Expr::Predicate(predicate))
212}
213
214fn lower_regex(pair: Pair<Rule>) -> Result<Expr> {
215 let mut inner = pair.into_inner();
216 let field = read_field(next_pair(&mut inner)?)?;
217 let op_pair = next_pair(&mut inner)?;
218 let negated = matches!(only_child(op_pair)?.as_rule(), Rule::not_matches_kw);
219 let regex = read_regex_value(next_pair(&mut inner)?)?;
220 if regex::Regex::new(®ex).is_err() {
223 return Err(VaultdbError::RegexError {
224 pattern: regex,
225 reason: "invalid regex syntax".into(),
226 });
227 }
228 let pred = Expr::Predicate(Predicate::Matches { field, regex });
229 Ok(if negated {
230 Expr::Not(Box::new(pred))
231 } else {
232 pred
233 })
234}
235
236fn read_regex_value(pair: Pair<Rule>) -> Result<String> {
239 if pair.as_rule() != Rule::regex_value {
240 return Err(VaultdbError::InvalidWhereExpr(format!(
241 "expected regex value, got {:?}",
242 pair.as_rule()
243 )));
244 }
245 let inner = only_child(pair)?;
246 match inner.as_rule() {
247 Rule::quoted_string => match read_value_from_quoted(inner)? {
248 Value::String(s) => Ok(s),
249 other => Ok(other.display_value()),
250 },
251 Rule::regex_unquoted => Ok(inner.as_str().to_string()),
252 other => Err(VaultdbError::InvalidWhereExpr(format!(
253 "unexpected regex_value variant: {:?}",
254 other
255 ))),
256 }
257}
258
259fn read_value_from_quoted(pair: Pair<Rule>) -> Result<Value> {
262 let qstring = only_child(pair)?;
263 let raw = match qstring.as_rule() {
264 Rule::dq_string | Rule::sq_string => {
265 let s = qstring.as_str();
266 s[1..s.len() - 1].to_string()
267 }
268 other => {
269 return Err(VaultdbError::InvalidWhereExpr(format!(
270 "unexpected quoted variant: {:?}",
271 other
272 )));
273 }
274 };
275 Ok(Value::String(unescape(&raw)))
276}
277
278fn lower_binary(pair: Pair<Rule>) -> Result<Expr> {
279 let mut inner = pair.into_inner();
280 let field = read_field(next_pair(&mut inner)?)?;
281 let op_str = next_pair(&mut inner)?.as_str().trim();
282 let value = read_value(next_pair(&mut inner)?)?;
283
284 let predicate = match op_str {
285 "=" => Predicate::Equals {
286 field,
287 value: coerce_for_equals(value),
288 },
289 "!=" => Predicate::Compare {
290 field,
291 op: CompareOp::Ne,
292 value: coerce_for_compare(value),
293 },
294 "<" => Predicate::Compare {
295 field,
296 op: CompareOp::Lt,
297 value: coerce_for_compare(value),
298 },
299 ">" => Predicate::Compare {
300 field,
301 op: CompareOp::Gt,
302 value: coerce_for_compare(value),
303 },
304 "<=" => Predicate::Compare {
305 field,
306 op: CompareOp::Le,
307 value: coerce_for_compare(value),
308 },
309 ">=" => Predicate::Compare {
310 field,
311 op: CompareOp::Ge,
312 value: coerce_for_compare(value),
313 },
314 "contains" => Predicate::Contains {
315 field,
316 value: coerce_for_equals(value),
317 },
318 "!contains" => {
319 let inner = Expr::Predicate(Predicate::Contains {
320 field,
321 value: coerce_for_equals(value),
322 });
323 return Ok(Expr::Not(Box::new(inner)));
324 }
325 "startswith" => Predicate::StartsWith {
326 field,
327 value: stringify_value(value),
328 },
329 "!startswith" => {
330 let inner = Expr::Predicate(Predicate::StartsWith {
331 field,
332 value: stringify_value(value),
333 });
334 return Ok(Expr::Not(Box::new(inner)));
335 }
336 "endswith" => Predicate::EndsWith {
337 field,
338 value: stringify_value(value),
339 },
340 "!endswith" => {
341 let inner = Expr::Predicate(Predicate::EndsWith {
342 field,
343 value: stringify_value(value),
344 });
345 return Ok(Expr::Not(Box::new(inner)));
346 }
347 other => {
348 return Err(VaultdbError::InvalidWhereExpr(format!(
349 "unrecognised binary op: {}",
350 other
351 )));
352 }
353 };
354
355 Ok(Expr::Predicate(predicate))
356}
357
358fn only_child(pair: Pair<Rule>) -> Result<Pair<Rule>> {
361 let mut iter = pair.into_inner();
362 let first = iter.next().ok_or_else(|| {
363 VaultdbError::InvalidWhereExpr("expected one child node, got none".into())
364 })?;
365 if iter.next().is_some() {
366 return Err(VaultdbError::InvalidWhereExpr(
367 "expected one child node, got multiple".into(),
368 ));
369 }
370 Ok(first)
371}
372
373fn next_pair<'a>(pairs: &mut Pairs<'a, Rule>) -> Result<Pair<'a, Rule>> {
374 pairs
375 .next()
376 .ok_or_else(|| VaultdbError::InvalidWhereExpr("missing required child node".into()))
377}
378
379fn read_field(pair: Pair<Rule>) -> Result<String> {
380 if pair.as_rule() != Rule::field {
381 return Err(VaultdbError::InvalidWhereExpr(format!(
382 "expected field name, got {:?}",
383 pair.as_rule()
384 )));
385 }
386 Ok(pair.as_str().to_string())
387}
388
389fn read_value(pair: Pair<Rule>) -> Result<Value> {
390 if pair.as_rule() != Rule::value {
391 return Err(VaultdbError::InvalidWhereExpr(format!(
392 "expected value, got {:?}",
393 pair.as_rule()
394 )));
395 }
396 let inner = only_child(pair)?;
397 match inner.as_rule() {
398 Rule::quoted_string => read_value_from_quoted(inner),
399 Rule::unquoted_value => Ok(Value::String(inner.as_str().to_string())),
400 other => Err(VaultdbError::InvalidWhereExpr(format!(
401 "unexpected value variant: {:?}",
402 other
403 ))),
404 }
405}
406
407fn unescape(s: &str) -> String {
411 let mut out = String::with_capacity(s.len());
412 let mut chars = s.chars();
413 while let Some(c) = chars.next() {
414 if c != '\\' {
415 out.push(c);
416 continue;
417 }
418 match chars.next() {
419 Some('n') => out.push('\n'),
420 Some('t') => out.push('\t'),
421 Some('"') => out.push('"'),
422 Some('\'') => out.push('\''),
423 Some('\\') => out.push('\\'),
424 Some(other) => {
425 out.push('\\');
426 out.push(other);
427 }
428 None => out.push('\\'),
429 }
430 }
431 out
432}
433
434fn coerce_for_equals(v: Value) -> Value {
445 if let Value::String(ref s) = v {
446 match s.as_str() {
447 "true" => return Value::Bool(true),
448 "false" => return Value::Bool(false),
449 _ => {}
450 }
451 if let Ok(i) = s.parse::<i64>() {
452 return Value::Integer(i);
453 }
454 if let Ok(f) = s.parse::<f64>() {
455 return Value::Float(f);
456 }
457 }
458 v
459}
460
461fn coerce_for_compare(v: Value) -> Value {
464 coerce_for_equals(v)
465}
466
467fn stringify_value(v: Value) -> String {
470 match v {
471 Value::String(s) => s,
472 other => other.display_value(),
473 }
474}
475
476#[cfg(test)]
483mod tests {
484 use super::*;
485
486 fn parse_ok(input: &str) -> Expr {
487 parse(input).unwrap_or_else(|e| panic!("expected parse to succeed for {:?}: {}", input, e))
488 }
489
490 #[test]
491 fn simple_equals() {
492 let e = parse_ok("status = active");
493 match e {
494 Expr::Predicate(Predicate::Equals { field, value }) => {
495 assert_eq!(field, "status");
496 assert_eq!(value, Value::String("active".into()));
497 }
498 other => panic!("expected Equals, got {:?}", other),
499 }
500 }
501
502 #[test]
503 fn numeric_coercion_on_equals() {
504 let e = parse_ok("year = 2020");
505 if let Expr::Predicate(Predicate::Equals { value, .. }) = e {
506 assert_eq!(value, Value::Integer(2020));
507 } else {
508 panic!("expected Equals");
509 }
510 }
511
512 #[test]
513 fn quoted_string_with_spaces() {
514 let e = parse_ok(r#"title = "two words""#);
515 if let Expr::Predicate(Predicate::Equals { value, .. }) = e {
516 assert_eq!(value, Value::String("two words".into()));
517 } else {
518 panic!("expected Equals");
519 }
520 }
521
522 #[test]
523 fn quoted_string_with_escaped_quote() {
524 let e = parse_ok(r#"label = "she said \"hi\"""#);
525 if let Expr::Predicate(Predicate::Equals { value, .. }) = e {
526 assert_eq!(value, Value::String(r#"she said "hi""#.into()));
527 } else {
528 panic!("expected Equals");
529 }
530 }
531
532 #[test]
533 fn single_quoted_string() {
534 let e = parse_ok("status = 'in review'");
535 if let Expr::Predicate(Predicate::Equals { value, .. }) = e {
536 assert_eq!(value, Value::String("in review".into()));
537 } else {
538 panic!("expected Equals");
539 }
540 }
541
542 #[test]
543 fn contains_with_unquoted_path_value() {
544 let e = parse_ok("tags contains topic/ai");
545 if let Expr::Predicate(Predicate::Contains { field, value }) = e {
546 assert_eq!(field, "tags");
547 assert_eq!(value, Value::String("topic/ai".into()));
548 } else {
549 panic!("expected Contains");
550 }
551 }
552
553 #[test]
554 fn negation_via_bang_op() {
555 let e = parse_ok("tags !contains topic/movies");
556 if let Expr::Not(inner) = e {
557 if let Expr::Predicate(Predicate::Contains { .. }) = *inner {
558 } else {
560 panic!("expected Not(Contains)");
561 }
562 } else {
563 panic!("expected Not");
564 }
565 }
566
567 #[test]
568 fn negation_via_not_word() {
569 let e = parse_ok("NOT status = draft");
570 assert!(matches!(e, Expr::Not(_)));
571 }
572
573 #[test]
574 fn exists_and_missing() {
575 assert!(matches!(
576 parse_ok("title exists"),
577 Expr::Predicate(Predicate::Exists { .. })
578 ));
579 assert!(matches!(
580 parse_ok("title missing"),
581 Expr::Predicate(Predicate::Missing { .. })
582 ));
583 assert!(matches!(
584 parse_ok("title !exists"),
585 Expr::Predicate(Predicate::Missing { .. })
586 ));
587 assert!(matches!(
588 parse_ok("title !missing"),
589 Expr::Predicate(Predicate::Exists { .. })
590 ));
591 }
592
593 #[test]
594 fn is_null_and_is_not_null() {
595 assert!(matches!(
596 parse_ok("title IS NULL"),
597 Expr::Predicate(Predicate::Missing { .. })
598 ));
599 assert!(matches!(
600 parse_ok("title IS NOT NULL"),
601 Expr::Predicate(Predicate::Exists { .. })
602 ));
603 }
604
605 #[test]
606 fn matches_and_not_matches() {
607 let e = parse_ok("director matches ^Sam");
608 assert!(matches!(e, Expr::Predicate(Predicate::Matches { .. })));
609 let e = parse_ok("director !matches ^Sam");
610 assert!(matches!(e, Expr::Not(_)));
611 }
612
613 #[test]
614 fn invalid_regex_at_parse_time() {
615 let result = parse("director matches [unclosed");
616 assert!(matches!(result, Err(VaultdbError::RegexError { .. })));
617 }
618
619 #[test]
620 fn comparison_ops_coerce_to_numeric() {
621 let e = parse_ok("year > 2020");
622 if let Expr::Predicate(Predicate::Compare { op, value, .. }) = e {
623 assert_eq!(op, CompareOp::Gt);
624 assert_eq!(value, Value::Integer(2020));
625 } else {
626 panic!("expected Compare");
627 }
628 }
629
630 #[test]
631 fn or_combines_two_clauses() {
632 let e = parse_ok("status = draft || status = active");
633 match e {
634 Expr::Or(parts) => assert_eq!(parts.len(), 2),
635 other => panic!("expected Or, got {:?}", other),
636 }
637 }
638
639 #[test]
640 fn and_combines_two_clauses() {
641 let e = parse_ok("year > 2020 && status = active");
642 match e {
643 Expr::And(parts) => assert_eq!(parts.len(), 2),
644 other => panic!("expected And, got {:?}", other),
645 }
646 }
647
648 #[test]
649 fn and_binds_tighter_than_or_sql_convention() {
650 let e = parse_ok("status = draft || status = active && hsk = 1");
654 match e {
655 Expr::Or(parts) => {
656 assert_eq!(parts.len(), 2);
657 assert!(
658 matches!(parts[0], Expr::Predicate(Predicate::Equals { .. })),
659 "first arm should be a single Equals predicate, got {:?}",
660 parts[0]
661 );
662 assert!(
663 matches!(parts[1], Expr::And(_)),
664 "second arm should be And, got {:?}",
665 parts[1]
666 );
667 }
668 other => panic!("expected Or at top level, got {:?}", other),
669 }
670 }
671
672 #[test]
673 fn parens_override_precedence() {
674 let e = parse_ok("(status = draft || status = active) && hsk = 1");
677 match e {
678 Expr::And(parts) => {
679 assert_eq!(parts.len(), 2);
680 assert!(matches!(parts[0], Expr::Or(_)));
681 }
682 other => panic!("expected And, got {:?}", other),
683 }
684 }
685
686 #[test]
687 fn nested_parens() {
688 let e = parse_ok("((status = draft))");
689 assert!(matches!(e, Expr::Predicate(Predicate::Equals { .. })));
690 }
691
692 #[test]
693 fn in_predicate_desugars_to_or() {
694 let e = parse_ok("status IN (draft, active, pending)");
695 match e {
696 Expr::Or(parts) => {
697 assert_eq!(parts.len(), 3);
698 for p in &parts {
699 assert!(matches!(p, Expr::Predicate(Predicate::Equals { .. })));
700 }
701 }
702 other => panic!("expected Or, got {:?}", other),
703 }
704 }
705
706 #[test]
707 fn in_predicate_with_quoted_values() {
708 let e = parse_ok(r#"status IN ("in review", "needs follow-up")"#);
709 match e {
710 Expr::Or(parts) => {
711 assert_eq!(parts.len(), 2);
712 if let Expr::Predicate(Predicate::Equals { value, .. }) = &parts[0] {
713 assert_eq!(*value, Value::String("in review".into()));
714 }
715 }
716 other => panic!("expected Or, got {:?}", other),
717 }
718 }
719
720 #[test]
721 fn in_predicate_single_value_does_not_or_wrap() {
722 let e = parse_ok("status IN (draft)");
723 assert!(matches!(e, Expr::Predicate(Predicate::Equals { .. })));
724 }
725
726 #[test]
727 fn not_in_predicate_is_negated() {
728 let e = parse_ok("status NOT IN (draft, archived)");
729 assert!(matches!(e, Expr::Not(_)));
730 }
731
732 #[test]
733 fn empty_input_errors() {
734 assert!(parse("").is_err());
735 assert!(parse(" ").is_err());
736 }
737
738 #[test]
739 fn unbalanced_parens_error() {
740 assert!(parse("(status = active").is_err());
741 assert!(parse("status = active)").is_err());
742 }
743
744 #[test]
745 fn unknown_op_errors() {
746 assert!(parse("status :- active").is_err());
747 }
748
749 #[test]
750 fn deeply_nested_combinator_tree() {
751 let e = parse_ok("((a = 1 || b = 2) && (c = 3 || d = 4)) || NOT (e contains foo)");
752 match e {
755 Expr::Or(parts) => {
756 assert_eq!(parts.len(), 2);
757 }
758 other => panic!("expected Or, got {:?}", other),
759 }
760 }
761
762 #[test]
765 fn equals_true_coerces_to_bool() {
766 let e = parse_ok("published = true");
767 match e {
768 Expr::Predicate(Predicate::Equals { value, .. }) => {
769 assert_eq!(value, Value::Bool(true));
770 }
771 other => panic!("expected Equals(Bool(true)), got {:?}", other),
772 }
773 }
774
775 #[test]
776 fn equals_false_coerces_to_bool() {
777 let e = parse_ok("published = false");
778 match e {
779 Expr::Predicate(Predicate::Equals { value, .. }) => {
780 assert_eq!(value, Value::Bool(false));
781 }
782 other => panic!("expected Equals(Bool(false)), got {:?}", other),
783 }
784 }
785
786 #[test]
787 fn mixed_case_stays_string() {
788 let e = parse_ok("flag = True");
792 match e {
793 Expr::Predicate(Predicate::Equals { value, .. }) => {
794 assert_eq!(value, Value::String("True".into()));
795 }
796 other => panic!("expected Equals(String), got {:?}", other),
797 }
798 }
799}