1use pest::Parser;
38use pest::iterators::{Pair, Pairs};
39use pest_derive::Parser as PestParser;
40
41use crate::error::{Result, VaultdbError};
42use crate::query::{CompareOp, Expr, Predicate};
43use crate::record::Value;
44
45#[derive(PestParser)]
46#[grammar = "where_dsl.pest"]
47struct WhereParser;
48
49pub(crate) fn parse(input: &str) -> Result<Expr> {
51 let trimmed = input.trim();
52 if trimmed.is_empty() {
53 return Err(VaultdbError::InvalidWhereExpr(
54 "where expression is empty".into(),
55 ));
56 }
57
58 let mut pairs = WhereParser::parse(Rule::expr_root, trimmed)
59 .map_err(|e| VaultdbError::InvalidWhereExpr(format!("{}", e)))?;
60
61 let expr_pair = pairs
64 .next()
65 .ok_or_else(|| VaultdbError::InvalidWhereExpr("parser returned no expression".into()))?;
66 lower_expr(expr_pair)
67}
68
69fn lower_expr(pair: Pair<Rule>) -> Result<Expr> {
70 match pair.as_rule() {
71 Rule::expr => lower_expr(only_child(pair)?),
72 Rule::or_expr => lower_or(pair.into_inner()),
73 Rule::and_expr => lower_and(pair.into_inner()),
74 Rule::not_expr => lower_not(pair.into_inner()),
75 Rule::atom => lower_expr(only_child(pair)?),
76 Rule::paren_expr => {
77 let inner = pair.into_inner().next().ok_or_else(|| {
80 VaultdbError::InvalidWhereExpr("empty parenthesised expression".into())
81 })?;
82 lower_expr(inner)
83 }
84 Rule::predicate => lower_predicate(only_child(pair)?),
85 other => Err(VaultdbError::InvalidWhereExpr(format!(
86 "unexpected grammar node: {:?}",
87 other
88 ))),
89 }
90}
91
92fn lower_or(pairs: Pairs<Rule>) -> Result<Expr> {
93 let exprs: Vec<Expr> = pairs.map(lower_expr).collect::<Result<Vec<_>>>()?;
94 Ok(if exprs.len() == 1 {
95 exprs.into_iter().next().unwrap()
96 } else {
97 Expr::Or(exprs)
98 })
99}
100
101fn lower_and(pairs: Pairs<Rule>) -> Result<Expr> {
102 let exprs: Vec<Expr> = pairs.map(lower_expr).collect::<Result<Vec<_>>>()?;
103 Ok(if exprs.len() == 1 {
104 exprs.into_iter().next().unwrap()
105 } else {
106 Expr::And(exprs)
107 })
108}
109
110fn lower_not(mut pairs: Pairs<Rule>) -> Result<Expr> {
111 let first = pairs
116 .next()
117 .ok_or_else(|| VaultdbError::InvalidWhereExpr("empty not_expr".into()))?;
118 match first.as_rule() {
119 Rule::atom => lower_expr(first),
120 Rule::not_word => {
121 let operand = pairs
123 .next()
124 .ok_or_else(|| VaultdbError::InvalidWhereExpr("NOT without operand".into()))?;
125 let inner = lower_expr(operand)?;
126 Ok(Expr::Not(Box::new(inner)))
127 }
128 other => Err(VaultdbError::InvalidWhereExpr(format!(
129 "unexpected child of not_expr: {:?}",
130 other
131 ))),
132 }
133}
134
135fn lower_predicate(pair: Pair<Rule>) -> Result<Expr> {
136 match pair.as_rule() {
137 Rule::in_predicate => lower_in(pair),
138 Rule::is_null_predicate => lower_is_null(pair),
139 Rule::regex_predicate => lower_regex(pair),
140 Rule::binary_predicate => lower_binary(pair),
141 Rule::exists_predicate => {
142 lower_is_null(pair)
146 }
147 other => Err(VaultdbError::InvalidWhereExpr(format!(
148 "unexpected predicate variant: {:?}",
149 other
150 ))),
151 }
152}
153
154fn lower_in(pair: Pair<Rule>) -> Result<Expr> {
155 let mut inner = pair.into_inner();
156 let field = read_field(next_pair(&mut inner)?)?;
157 let in_op = next_pair(&mut inner)?;
158 let negated = matches!(only_child(in_op)?.as_rule(), Rule::not_in_kw);
159 let value_list_pair = next_pair(&mut inner)?;
160 let values: Vec<Value> = value_list_pair
161 .into_inner()
162 .map(read_value)
163 .collect::<Result<Vec<_>>>()?;
164
165 if values.is_empty() {
166 return Err(VaultdbError::InvalidWhereExpr(format!(
167 "IN list for field '{}' is empty",
168 field
169 )));
170 }
171
172 let alternatives: Vec<Expr> = values
174 .into_iter()
175 .map(|v| {
176 Expr::Predicate(Predicate::Equals {
177 field: field.clone(),
178 value: v,
179 })
180 })
181 .collect();
182 let union = if alternatives.len() == 1 {
183 alternatives.into_iter().next().unwrap()
184 } else {
185 Expr::Or(alternatives)
186 };
187 Ok(if negated {
188 Expr::Not(Box::new(union))
189 } else {
190 union
191 })
192}
193
194fn lower_is_null(pair: Pair<Rule>) -> Result<Expr> {
195 let mut inner = pair.into_inner();
196 let field = read_field(next_pair(&mut inner)?)?;
197 let op_pair = next_pair(&mut inner)?;
198 let op_kind = only_child(op_pair)?;
199 let predicate = match op_kind.as_rule() {
200 Rule::is_null_kw | Rule::missing_kw => Predicate::Missing { field },
201 Rule::is_not_null_kw | Rule::exists_kw => Predicate::Exists { field },
202 Rule::not_missing_kw => Predicate::Exists { field },
203 Rule::not_exists_kw => Predicate::Missing { field },
204 other => {
205 return Err(VaultdbError::InvalidWhereExpr(format!(
206 "unexpected null/exists op: {:?}",
207 other
208 )));
209 }
210 };
211 Ok(Expr::Predicate(predicate))
212}
213
214fn lower_regex(pair: Pair<Rule>) -> Result<Expr> {
215 let mut inner = pair.into_inner();
216 let field = read_field(next_pair(&mut inner)?)?;
217 let op_pair = next_pair(&mut inner)?;
218 let negated = matches!(only_child(op_pair)?.as_rule(), Rule::not_matches_kw);
219 let regex = read_regex_value(next_pair(&mut inner)?)?;
220 if regex::Regex::new(®ex).is_err() {
223 return Err(VaultdbError::RegexError {
224 pattern: regex,
225 reason: "invalid regex syntax".into(),
226 });
227 }
228 let pred = Expr::Predicate(Predicate::Matches { field, regex });
229 Ok(if negated {
230 Expr::Not(Box::new(pred))
231 } else {
232 pred
233 })
234}
235
236fn read_regex_value(pair: Pair<Rule>) -> Result<String> {
239 if pair.as_rule() != Rule::regex_value {
240 return Err(VaultdbError::InvalidWhereExpr(format!(
241 "expected regex value, got {:?}",
242 pair.as_rule()
243 )));
244 }
245 let inner = only_child(pair)?;
246 match inner.as_rule() {
247 Rule::quoted_string => match read_value_from_quoted(inner)? {
248 Value::String(s) => Ok(s),
249 other => Ok(other.display_value()),
250 },
251 Rule::regex_unquoted => Ok(inner.as_str().to_string()),
252 other => Err(VaultdbError::InvalidWhereExpr(format!(
253 "unexpected regex_value variant: {:?}",
254 other
255 ))),
256 }
257}
258
259fn read_value_from_quoted(pair: Pair<Rule>) -> Result<Value> {
262 let qstring = only_child(pair)?;
263 let raw = match qstring.as_rule() {
264 Rule::dq_string | Rule::sq_string => {
265 let s = qstring.as_str();
266 s[1..s.len() - 1].to_string()
267 }
268 other => {
269 return Err(VaultdbError::InvalidWhereExpr(format!(
270 "unexpected quoted variant: {:?}",
271 other
272 )));
273 }
274 };
275 Ok(Value::String(unescape(&raw)))
276}
277
278fn lower_binary(pair: Pair<Rule>) -> Result<Expr> {
279 let mut inner = pair.into_inner();
280 let field = read_field(next_pair(&mut inner)?)?;
281 let op_str = next_pair(&mut inner)?.as_str().trim();
282 let value = read_value(next_pair(&mut inner)?)?;
283
284 let predicate = match op_str {
285 "=" => Predicate::Equals {
286 field,
287 value: coerce_for_equals(value),
288 },
289 "!=" => Predicate::Compare {
290 field,
291 op: CompareOp::Ne,
292 value: coerce_for_compare(value),
293 },
294 "<" => Predicate::Compare {
295 field,
296 op: CompareOp::Lt,
297 value: coerce_for_compare(value),
298 },
299 ">" => Predicate::Compare {
300 field,
301 op: CompareOp::Gt,
302 value: coerce_for_compare(value),
303 },
304 "<=" => Predicate::Compare {
305 field,
306 op: CompareOp::Le,
307 value: coerce_for_compare(value),
308 },
309 ">=" => Predicate::Compare {
310 field,
311 op: CompareOp::Ge,
312 value: coerce_for_compare(value),
313 },
314 "contains" => Predicate::Contains {
315 field,
316 value: coerce_for_equals(value),
317 },
318 "!contains" => {
319 let inner = Expr::Predicate(Predicate::Contains {
320 field,
321 value: coerce_for_equals(value),
322 });
323 return Ok(Expr::Not(Box::new(inner)));
324 }
325 "startswith" => Predicate::StartsWith {
326 field,
327 value: stringify_value(value),
328 },
329 "!startswith" => {
330 let inner = Expr::Predicate(Predicate::StartsWith {
331 field,
332 value: stringify_value(value),
333 });
334 return Ok(Expr::Not(Box::new(inner)));
335 }
336 "endswith" => Predicate::EndsWith {
337 field,
338 value: stringify_value(value),
339 },
340 "!endswith" => {
341 let inner = Expr::Predicate(Predicate::EndsWith {
342 field,
343 value: stringify_value(value),
344 });
345 return Ok(Expr::Not(Box::new(inner)));
346 }
347 other => {
348 return Err(VaultdbError::InvalidWhereExpr(format!(
349 "unrecognised binary op: {}",
350 other
351 )));
352 }
353 };
354
355 Ok(Expr::Predicate(predicate))
356}
357
358fn only_child(pair: Pair<Rule>) -> Result<Pair<Rule>> {
361 let mut iter = pair.into_inner();
362 let first = iter.next().ok_or_else(|| {
363 VaultdbError::InvalidWhereExpr("expected one child node, got none".into())
364 })?;
365 if iter.next().is_some() {
366 return Err(VaultdbError::InvalidWhereExpr(
367 "expected one child node, got multiple".into(),
368 ));
369 }
370 Ok(first)
371}
372
373fn next_pair<'a>(pairs: &mut Pairs<'a, Rule>) -> Result<Pair<'a, Rule>> {
374 pairs
375 .next()
376 .ok_or_else(|| VaultdbError::InvalidWhereExpr("missing required child node".into()))
377}
378
379fn read_field(pair: Pair<Rule>) -> Result<String> {
380 if pair.as_rule() != Rule::field {
381 return Err(VaultdbError::InvalidWhereExpr(format!(
382 "expected field name, got {:?}",
383 pair.as_rule()
384 )));
385 }
386 Ok(pair.as_str().to_string())
387}
388
389fn read_value(pair: Pair<Rule>) -> Result<Value> {
390 if pair.as_rule() != Rule::value {
391 return Err(VaultdbError::InvalidWhereExpr(format!(
392 "expected value, got {:?}",
393 pair.as_rule()
394 )));
395 }
396 let inner = only_child(pair)?;
397 match inner.as_rule() {
398 Rule::quoted_string => read_value_from_quoted(inner),
399 Rule::unquoted_value => Ok(Value::String(inner.as_str().to_string())),
400 other => Err(VaultdbError::InvalidWhereExpr(format!(
401 "unexpected value variant: {:?}",
402 other
403 ))),
404 }
405}
406
407fn unescape(s: &str) -> String {
411 let mut out = String::with_capacity(s.len());
412 let mut chars = s.chars();
413 while let Some(c) = chars.next() {
414 if c != '\\' {
415 out.push(c);
416 continue;
417 }
418 match chars.next() {
419 Some('n') => out.push('\n'),
420 Some('t') => out.push('\t'),
421 Some('"') => out.push('"'),
422 Some('\'') => out.push('\''),
423 Some('\\') => out.push('\\'),
424 Some(other) => {
425 out.push('\\');
426 out.push(other);
427 }
428 None => out.push('\\'),
429 }
430 }
431 out
432}
433
434fn coerce_for_equals(v: Value) -> Value {
439 if let Value::String(ref s) = v {
440 if let Ok(i) = s.parse::<i64>() {
441 return Value::Integer(i);
442 }
443 if let Ok(f) = s.parse::<f64>() {
444 return Value::Float(f);
445 }
446 }
447 v
448}
449
450fn coerce_for_compare(v: Value) -> Value {
453 coerce_for_equals(v)
454}
455
456fn stringify_value(v: Value) -> String {
459 match v {
460 Value::String(s) => s,
461 other => other.display_value(),
462 }
463}
464
465#[cfg(test)]
472mod tests {
473 use super::*;
474
475 fn parse_ok(input: &str) -> Expr {
476 parse(input).unwrap_or_else(|e| panic!("expected parse to succeed for {:?}: {}", input, e))
477 }
478
479 #[test]
480 fn simple_equals() {
481 let e = parse_ok("status = active");
482 match e {
483 Expr::Predicate(Predicate::Equals { field, value }) => {
484 assert_eq!(field, "status");
485 assert_eq!(value, Value::String("active".into()));
486 }
487 other => panic!("expected Equals, got {:?}", other),
488 }
489 }
490
491 #[test]
492 fn numeric_coercion_on_equals() {
493 let e = parse_ok("year = 2020");
494 if let Expr::Predicate(Predicate::Equals { value, .. }) = e {
495 assert_eq!(value, Value::Integer(2020));
496 } else {
497 panic!("expected Equals");
498 }
499 }
500
501 #[test]
502 fn quoted_string_with_spaces() {
503 let e = parse_ok(r#"title = "two words""#);
504 if let Expr::Predicate(Predicate::Equals { value, .. }) = e {
505 assert_eq!(value, Value::String("two words".into()));
506 } else {
507 panic!("expected Equals");
508 }
509 }
510
511 #[test]
512 fn quoted_string_with_escaped_quote() {
513 let e = parse_ok(r#"label = "she said \"hi\"""#);
514 if let Expr::Predicate(Predicate::Equals { value, .. }) = e {
515 assert_eq!(value, Value::String(r#"she said "hi""#.into()));
516 } else {
517 panic!("expected Equals");
518 }
519 }
520
521 #[test]
522 fn single_quoted_string() {
523 let e = parse_ok("status = 'in review'");
524 if let Expr::Predicate(Predicate::Equals { value, .. }) = e {
525 assert_eq!(value, Value::String("in review".into()));
526 } else {
527 panic!("expected Equals");
528 }
529 }
530
531 #[test]
532 fn contains_with_unquoted_path_value() {
533 let e = parse_ok("tags contains topic/ai");
534 if let Expr::Predicate(Predicate::Contains { field, value }) = e {
535 assert_eq!(field, "tags");
536 assert_eq!(value, Value::String("topic/ai".into()));
537 } else {
538 panic!("expected Contains");
539 }
540 }
541
542 #[test]
543 fn negation_via_bang_op() {
544 let e = parse_ok("tags !contains topic/movies");
545 if let Expr::Not(inner) = e {
546 if let Expr::Predicate(Predicate::Contains { .. }) = *inner {
547 } else {
549 panic!("expected Not(Contains)");
550 }
551 } else {
552 panic!("expected Not");
553 }
554 }
555
556 #[test]
557 fn negation_via_not_word() {
558 let e = parse_ok("NOT status = draft");
559 assert!(matches!(e, Expr::Not(_)));
560 }
561
562 #[test]
563 fn exists_and_missing() {
564 assert!(matches!(
565 parse_ok("title exists"),
566 Expr::Predicate(Predicate::Exists { .. })
567 ));
568 assert!(matches!(
569 parse_ok("title missing"),
570 Expr::Predicate(Predicate::Missing { .. })
571 ));
572 assert!(matches!(
573 parse_ok("title !exists"),
574 Expr::Predicate(Predicate::Missing { .. })
575 ));
576 assert!(matches!(
577 parse_ok("title !missing"),
578 Expr::Predicate(Predicate::Exists { .. })
579 ));
580 }
581
582 #[test]
583 fn is_null_and_is_not_null() {
584 assert!(matches!(
585 parse_ok("title IS NULL"),
586 Expr::Predicate(Predicate::Missing { .. })
587 ));
588 assert!(matches!(
589 parse_ok("title IS NOT NULL"),
590 Expr::Predicate(Predicate::Exists { .. })
591 ));
592 }
593
594 #[test]
595 fn matches_and_not_matches() {
596 let e = parse_ok("director matches ^Sam");
597 assert!(matches!(e, Expr::Predicate(Predicate::Matches { .. })));
598 let e = parse_ok("director !matches ^Sam");
599 assert!(matches!(e, Expr::Not(_)));
600 }
601
602 #[test]
603 fn invalid_regex_at_parse_time() {
604 let result = parse("director matches [unclosed");
605 assert!(matches!(result, Err(VaultdbError::RegexError { .. })));
606 }
607
608 #[test]
609 fn comparison_ops_coerce_to_numeric() {
610 let e = parse_ok("year > 2020");
611 if let Expr::Predicate(Predicate::Compare { op, value, .. }) = e {
612 assert_eq!(op, CompareOp::Gt);
613 assert_eq!(value, Value::Integer(2020));
614 } else {
615 panic!("expected Compare");
616 }
617 }
618
619 #[test]
620 fn or_combines_two_clauses() {
621 let e = parse_ok("status = draft || status = active");
622 match e {
623 Expr::Or(parts) => assert_eq!(parts.len(), 2),
624 other => panic!("expected Or, got {:?}", other),
625 }
626 }
627
628 #[test]
629 fn and_combines_two_clauses() {
630 let e = parse_ok("year > 2020 && status = active");
631 match e {
632 Expr::And(parts) => assert_eq!(parts.len(), 2),
633 other => panic!("expected And, got {:?}", other),
634 }
635 }
636
637 #[test]
638 fn and_binds_tighter_than_or_sql_convention() {
639 let e = parse_ok("status = draft || status = active && hsk = 1");
643 match e {
644 Expr::Or(parts) => {
645 assert_eq!(parts.len(), 2);
646 assert!(
647 matches!(parts[0], Expr::Predicate(Predicate::Equals { .. })),
648 "first arm should be a single Equals predicate, got {:?}",
649 parts[0]
650 );
651 assert!(
652 matches!(parts[1], Expr::And(_)),
653 "second arm should be And, got {:?}",
654 parts[1]
655 );
656 }
657 other => panic!("expected Or at top level, got {:?}", other),
658 }
659 }
660
661 #[test]
662 fn parens_override_precedence() {
663 let e = parse_ok("(status = draft || status = active) && hsk = 1");
666 match e {
667 Expr::And(parts) => {
668 assert_eq!(parts.len(), 2);
669 assert!(matches!(parts[0], Expr::Or(_)));
670 }
671 other => panic!("expected And, got {:?}", other),
672 }
673 }
674
675 #[test]
676 fn nested_parens() {
677 let e = parse_ok("((status = draft))");
678 assert!(matches!(e, Expr::Predicate(Predicate::Equals { .. })));
679 }
680
681 #[test]
682 fn in_predicate_desugars_to_or() {
683 let e = parse_ok("status IN (draft, active, pending)");
684 match e {
685 Expr::Or(parts) => {
686 assert_eq!(parts.len(), 3);
687 for p in &parts {
688 assert!(matches!(p, Expr::Predicate(Predicate::Equals { .. })));
689 }
690 }
691 other => panic!("expected Or, got {:?}", other),
692 }
693 }
694
695 #[test]
696 fn in_predicate_with_quoted_values() {
697 let e = parse_ok(r#"status IN ("in review", "needs follow-up")"#);
698 match e {
699 Expr::Or(parts) => {
700 assert_eq!(parts.len(), 2);
701 if let Expr::Predicate(Predicate::Equals { value, .. }) = &parts[0] {
702 assert_eq!(*value, Value::String("in review".into()));
703 }
704 }
705 other => panic!("expected Or, got {:?}", other),
706 }
707 }
708
709 #[test]
710 fn in_predicate_single_value_does_not_or_wrap() {
711 let e = parse_ok("status IN (draft)");
712 assert!(matches!(e, Expr::Predicate(Predicate::Equals { .. })));
713 }
714
715 #[test]
716 fn not_in_predicate_is_negated() {
717 let e = parse_ok("status NOT IN (draft, archived)");
718 assert!(matches!(e, Expr::Not(_)));
719 }
720
721 #[test]
722 fn empty_input_errors() {
723 assert!(parse("").is_err());
724 assert!(parse(" ").is_err());
725 }
726
727 #[test]
728 fn unbalanced_parens_error() {
729 assert!(parse("(status = active").is_err());
730 assert!(parse("status = active)").is_err());
731 }
732
733 #[test]
734 fn unknown_op_errors() {
735 assert!(parse("status :- active").is_err());
736 }
737
738 #[test]
739 fn deeply_nested_combinator_tree() {
740 let e = parse_ok("((a = 1 || b = 2) && (c = 3 || d = 4)) || NOT (e contains foo)");
741 match e {
744 Expr::Or(parts) => {
745 assert_eq!(parts.len(), 2);
746 }
747 other => panic!("expected Or, got {:?}", other),
748 }
749 }
750}