1use thiserror::Error;
41
42#[derive(Debug, Clone, PartialEq)]
44pub enum Expr {
45 Eq(String, ExprValue),
47 Neq(String, ExprValue),
49 Gt(String, f64),
51 Gte(String, f64),
53 Lt(String, f64),
55 Lte(String, f64),
57 Contains(String, ExprValue),
59 In(String, Vec<ExprValue>),
61 Exists(String),
63 Not(Box<Expr>),
65 And(Box<Expr>, Box<Expr>),
67 Or(Box<Expr>, Box<Expr>),
69}
70
71#[derive(Debug, Clone, PartialEq)]
73pub enum ExprValue {
74 String(String),
76 Number(f64),
78 Bool(bool),
80 Null,
82}
83
84#[derive(Debug, Error, PartialEq, Eq)]
86pub enum ExprError {
87 #[error("unexpected end of expression")]
89 UnexpectedEnd,
90 #[error("unexpected token: {0}")]
92 UnexpectedToken(String),
93 #[error("unterminated string literal")]
95 UnterminatedString,
96 #[error("invalid number: {0}")]
98 InvalidNumber(String),
99}
100
101#[derive(Debug, Clone, PartialEq)]
102enum Token {
103 Ident(String),
104 StringLit(String),
105 NumberLit(f64),
106 Eq,
107 Neq,
108 Gt,
109 Gte,
110 Lt,
111 Lte,
112 And,
113 Or,
114 Not,
115 In,
116 Exists,
117 Contains,
118 True,
119 False,
120 Null,
121 LParen,
122 RParen,
123 Comma,
124}
125
126struct Lexer<'a> {
127 input: &'a [u8],
128 pos: usize,
129}
130
131impl<'a> Lexer<'a> {
132 fn new(input: &'a str) -> Self {
133 Self {
134 input: input.as_bytes(),
135 pos: 0,
136 }
137 }
138
139 fn skip_whitespace(&mut self) {
140 while self.pos < self.input.len() && self.input[self.pos].is_ascii_whitespace() {
141 self.pos += 1;
142 }
143 }
144
145 fn peek_byte(&self) -> Option<u8> {
146 self.input.get(self.pos).copied()
147 }
148
149 fn tokenize(&mut self) -> Result<Vec<Token>, ExprError> {
150 let mut tokens = Vec::new();
151 loop {
152 self.skip_whitespace();
153 if self.pos >= self.input.len() {
154 break;
155 }
156 tokens.push(self.next_token()?);
157 }
158 Ok(tokens)
159 }
160
161 fn next_token(&mut self) -> Result<Token, ExprError> {
162 self.skip_whitespace();
163
164 let byte = match self.peek_byte() {
165 Some(b) => b,
166 None => return Err(ExprError::UnexpectedEnd),
167 };
168
169 match byte {
170 b'"' | b'\'' => self.lex_string(),
171 b'!' => {
172 self.pos += 1;
173 if self.peek_byte() == Some(b'=') {
174 self.pos += 1;
175 Ok(Token::Neq)
176 } else {
177 Err(ExprError::UnexpectedToken("!".into()))
178 }
179 }
180 b'=' => {
181 self.pos += 1;
182 Ok(Token::Eq)
183 }
184 b'>' => {
185 self.pos += 1;
186 if self.peek_byte() == Some(b'=') {
187 self.pos += 1;
188 Ok(Token::Gte)
189 } else {
190 Ok(Token::Gt)
191 }
192 }
193 b'<' => {
194 self.pos += 1;
195 if self.peek_byte() == Some(b'=') {
196 self.pos += 1;
197 Ok(Token::Lte)
198 } else {
199 Ok(Token::Lt)
200 }
201 }
202 b'(' => {
203 self.pos += 1;
204 Ok(Token::LParen)
205 }
206 b')' => {
207 self.pos += 1;
208 Ok(Token::RParen)
209 }
210 b',' => {
211 self.pos += 1;
212 Ok(Token::Comma)
213 }
214 b'-' => self.lex_number(),
215 b if b.is_ascii_digit() => self.lex_number(),
216 b if b.is_ascii_alphabetic() || b == b'_' => self.lex_ident_or_keyword(),
217 _ => Err(ExprError::UnexpectedToken(
218 String::from_utf8_lossy(&self.input[self.pos..self.pos + 1]).into_owned(),
219 )),
220 }
221 }
222
223 fn lex_string(&mut self) -> Result<Token, ExprError> {
224 let quote = self.input[self.pos];
225 self.pos += 1;
226 let start = self.pos;
227
228 while self.pos < self.input.len() {
229 if self.input[self.pos] == quote {
230 let value = String::from_utf8_lossy(&self.input[start..self.pos]).into_owned();
231 self.pos += 1;
232 return Ok(Token::StringLit(value));
233 }
234 self.pos += 1;
235 }
236
237 Err(ExprError::UnterminatedString)
238 }
239
240 fn lex_number(&mut self) -> Result<Token, ExprError> {
241 let start = self.pos;
242
243 if self.peek_byte() == Some(b'-') {
244 self.pos += 1;
245 }
246
247 if self.pos >= self.input.len() || !self.input[self.pos].is_ascii_digit() {
248 let fragment = String::from_utf8_lossy(&self.input[start..self.pos]).into_owned();
249 return Err(ExprError::InvalidNumber(fragment));
250 }
251
252 while self.pos < self.input.len() && self.input[self.pos].is_ascii_digit() {
253 self.pos += 1;
254 }
255
256 if self.pos < self.input.len() && self.input[self.pos] == b'.' {
257 self.pos += 1;
258 while self.pos < self.input.len() && self.input[self.pos].is_ascii_digit() {
259 self.pos += 1;
260 }
261 }
262
263 let fragment = String::from_utf8_lossy(&self.input[start..self.pos]).into_owned();
264 let value = fragment
265 .parse::<f64>()
266 .map_err(|_| ExprError::InvalidNumber(fragment))?;
267 Ok(Token::NumberLit(value))
268 }
269
270 fn lex_ident_or_keyword(&mut self) -> Result<Token, ExprError> {
271 let start = self.pos;
272
273 while self.pos < self.input.len()
274 && (self.input[self.pos].is_ascii_alphanumeric()
275 || self.input[self.pos] == b'_'
276 || self.input[self.pos] == b'.')
277 {
278 self.pos += 1;
279 }
280
281 let word = String::from_utf8_lossy(&self.input[start..self.pos]).into_owned();
282 let upper = word.to_ascii_uppercase();
283
284 match upper.as_str() {
285 "AND" => Ok(Token::And),
286 "OR" => Ok(Token::Or),
287 "NOT" => Ok(Token::Not),
288 "IN" => Ok(Token::In),
289 "EXISTS" => Ok(Token::Exists),
290 "CONTAINS" => Ok(Token::Contains),
291 "TRUE" => Ok(Token::True),
292 "FALSE" => Ok(Token::False),
293 "NULL" => Ok(Token::Null),
294 _ => Ok(Token::Ident(word)),
295 }
296 }
297}
298
299struct Parser {
300 tokens: Vec<Token>,
301 pos: usize,
302}
303
304impl Parser {
305 fn new(tokens: Vec<Token>) -> Self {
306 Self { tokens, pos: 0 }
307 }
308
309 fn peek(&self) -> Option<&Token> {
310 self.tokens.get(self.pos)
311 }
312
313 fn advance(&mut self) -> Option<Token> {
314 if self.pos < self.tokens.len() {
315 let token = self.tokens[self.pos].clone();
316 self.pos += 1;
317 Some(token)
318 } else {
319 None
320 }
321 }
322
323 fn parse_expr(&mut self) -> Result<Expr, ExprError> {
324 self.parse_or()
325 }
326
327 fn parse_or(&mut self) -> Result<Expr, ExprError> {
328 let mut left = self.parse_and()?;
329
330 while self.peek() == Some(&Token::Or) {
331 self.advance();
332 let right = self.parse_and()?;
333 left = Expr::Or(Box::new(left), Box::new(right));
334 }
335
336 Ok(left)
337 }
338
339 fn parse_and(&mut self) -> Result<Expr, ExprError> {
340 let mut left = self.parse_primary()?;
341
342 while self.peek() == Some(&Token::And) {
343 self.advance();
344 let right = self.parse_primary()?;
345 left = Expr::And(Box::new(left), Box::new(right));
346 }
347
348 Ok(left)
349 }
350
351 fn parse_primary(&mut self) -> Result<Expr, ExprError> {
352 match self.peek() {
353 Some(Token::Not) => {
354 self.advance();
355 let inner = self.parse_primary()?;
356 Ok(Expr::Not(Box::new(inner)))
357 }
358 Some(Token::LParen) => {
359 self.advance();
360 let inner = self.parse_expr()?;
361 if self.advance() != Some(Token::RParen) {
362 return Err(ExprError::UnexpectedToken("expected ')'".into()));
363 }
364 Ok(inner)
365 }
366 _ => self.parse_comparison(),
367 }
368 }
369
370 fn parse_comparison(&mut self) -> Result<Expr, ExprError> {
371 let field = match self.advance() {
372 Some(Token::Ident(name)) => name,
373 Some(other) => return Err(ExprError::UnexpectedToken(format!("{other:?}"))),
374 None => return Err(ExprError::UnexpectedEnd),
375 };
376
377 let op = match self.advance() {
378 Some(t) => t,
379 None => return Err(ExprError::UnexpectedEnd),
380 };
381
382 match op {
383 Token::Eq => {
384 let value = self.parse_value()?;
385 Ok(Expr::Eq(field, value))
386 }
387 Token::Neq => {
388 let value = self.parse_value()?;
389 Ok(Expr::Neq(field, value))
390 }
391 Token::Gt => {
392 let n = self.parse_number_value()?;
393 Ok(Expr::Gt(field, n))
394 }
395 Token::Gte => {
396 let n = self.parse_number_value()?;
397 Ok(Expr::Gte(field, n))
398 }
399 Token::Lt => {
400 let n = self.parse_number_value()?;
401 Ok(Expr::Lt(field, n))
402 }
403 Token::Lte => {
404 let n = self.parse_number_value()?;
405 Ok(Expr::Lte(field, n))
406 }
407 Token::Contains => {
408 let value = self.parse_value()?;
409 Ok(Expr::Contains(field, value))
410 }
411 Token::In => {
412 if self.advance() != Some(Token::LParen) {
413 return Err(ExprError::UnexpectedToken("expected '(' after IN".into()));
414 }
415 if self.peek() == Some(&Token::RParen) {
416 return Err(ExprError::UnexpectedToken("IN list cannot be empty".into()));
417 }
418 let mut values = vec![self.parse_value()?];
419 while self.peek() == Some(&Token::Comma) {
420 self.advance();
421 values.push(self.parse_value()?);
422 }
423 if self.advance() != Some(Token::RParen) {
424 return Err(ExprError::UnexpectedToken(
425 "expected ')' to close IN list".into(),
426 ));
427 }
428 Ok(Expr::In(field, values))
429 }
430 Token::Exists => Ok(Expr::Exists(field)),
431 other => Err(ExprError::UnexpectedToken(format!("{other:?}"))),
432 }
433 }
434
435 fn parse_value(&mut self) -> Result<ExprValue, ExprError> {
436 match self.advance() {
437 Some(Token::StringLit(s)) => Ok(ExprValue::String(s)),
438 Some(Token::NumberLit(n)) => Ok(ExprValue::Number(n)),
439 Some(Token::True) => Ok(ExprValue::Bool(true)),
440 Some(Token::False) => Ok(ExprValue::Bool(false)),
441 Some(Token::Null) => Ok(ExprValue::Null),
442 Some(other) => Err(ExprError::UnexpectedToken(format!("{other:?}"))),
443 None => Err(ExprError::UnexpectedEnd),
444 }
445 }
446
447 fn parse_number_value(&mut self) -> Result<f64, ExprError> {
448 match self.advance() {
449 Some(Token::NumberLit(n)) => Ok(n),
450 Some(other) => Err(ExprError::UnexpectedToken(format!("{other:?}"))),
451 None => Err(ExprError::UnexpectedEnd),
452 }
453 }
454}
455
456pub fn parse_where(input: &str) -> Result<Expr, ExprError> {
458 let mut lexer = Lexer::new(input);
459 let tokens = lexer.tokenize()?;
460
461 if tokens.is_empty() {
462 return Err(ExprError::UnexpectedEnd);
463 }
464
465 let mut parser = Parser::new(tokens);
466 let expr = parser.parse_expr()?;
467
468 if parser.pos < parser.tokens.len() {
469 return Err(ExprError::UnexpectedToken(format!(
470 "{:?}",
471 parser.tokens[parser.pos]
472 )));
473 }
474
475 Ok(expr)
476}
477
478#[cfg(test)]
479mod tests {
480 use super::*;
481
482 #[test]
483 fn parse_simple_equality() {
484 let expr = parse_where(r#"city = "Accra""#).unwrap();
485 assert_eq!(
486 expr,
487 Expr::Eq("city".into(), ExprValue::String("Accra".into()))
488 );
489 }
490
491 #[test]
492 fn parse_single_quoted_string() {
493 let expr = parse_where("city = 'Accra'").unwrap();
494 assert_eq!(
495 expr,
496 Expr::Eq("city".into(), ExprValue::String("Accra".into()))
497 );
498 }
499
500 #[test]
501 fn parse_numeric_comparison() {
502 let expr = parse_where("age >= 25").unwrap();
503 assert_eq!(expr, Expr::Gte("age".into(), 25.0));
504 }
505
506 #[test]
507 fn parse_negative_number() {
508 let expr = parse_where("temp > -10").unwrap();
509 assert_eq!(expr, Expr::Gt("temp".into(), -10.0));
510 }
511
512 #[test]
513 fn parse_float_number() {
514 let expr = parse_where("score >= 3.15").unwrap();
515 assert_eq!(expr, Expr::Gte("score".into(), 3.15));
516 }
517
518 #[test]
519 fn parse_boolean_value() {
520 let expr = parse_where("active = true").unwrap();
521 assert_eq!(expr, Expr::Eq("active".into(), ExprValue::Bool(true)));
522 }
523
524 #[test]
525 fn parse_null_value() {
526 let expr = parse_where("deleted = null").unwrap();
527 assert_eq!(expr, Expr::Eq("deleted".into(), ExprValue::Null));
528 }
529
530 #[test]
531 fn parse_not_equal() {
532 let expr = parse_where(r#"status != "inactive""#).unwrap();
533 assert_eq!(
534 expr,
535 Expr::Neq("status".into(), ExprValue::String("inactive".into()))
536 );
537 }
538
539 #[test]
540 fn parse_contains() {
541 let expr = parse_where(r#"tags CONTAINS "rust""#).unwrap();
542 assert_eq!(
543 expr,
544 Expr::Contains("tags".into(), ExprValue::String("rust".into()))
545 );
546 }
547
548 #[test]
549 fn parse_dotted_path() {
550 let expr = parse_where(r#"address.city = "Accra""#).unwrap();
551 assert_eq!(
552 expr,
553 Expr::Eq("address.city".into(), ExprValue::String("Accra".into()))
554 );
555 }
556
557 #[test]
558 fn parse_and() {
559 let expr = parse_where(r#"city = "Accra" AND age >= 25"#).unwrap();
560 assert_eq!(
561 expr,
562 Expr::And(
563 Box::new(Expr::Eq("city".into(), ExprValue::String("Accra".into()))),
564 Box::new(Expr::Gte("age".into(), 25.0)),
565 )
566 );
567 }
568
569 #[test]
570 fn parse_or() {
571 let expr = parse_where(r#"city = "Accra" OR city = "Lagos""#).unwrap();
572 assert_eq!(
573 expr,
574 Expr::Or(
575 Box::new(Expr::Eq("city".into(), ExprValue::String("Accra".into()))),
576 Box::new(Expr::Eq("city".into(), ExprValue::String("Lagos".into()))),
577 )
578 );
579 }
580
581 #[test]
582 fn parse_and_or_precedence() {
583 let expr = parse_where(r#"a = "x" OR b = "y" AND c = "z""#).unwrap();
584 assert_eq!(
585 expr,
586 Expr::Or(
587 Box::new(Expr::Eq("a".into(), ExprValue::String("x".into()))),
588 Box::new(Expr::And(
589 Box::new(Expr::Eq("b".into(), ExprValue::String("y".into()))),
590 Box::new(Expr::Eq("c".into(), ExprValue::String("z".into()))),
591 )),
592 )
593 );
594 }
595
596 #[test]
597 fn parse_multiple_and() {
598 let expr = parse_where(r#"a = "x" AND b = "y" AND c = "z""#).unwrap();
599 assert_eq!(
600 expr,
601 Expr::And(
602 Box::new(Expr::And(
603 Box::new(Expr::Eq("a".into(), ExprValue::String("x".into()))),
604 Box::new(Expr::Eq("b".into(), ExprValue::String("y".into()))),
605 )),
606 Box::new(Expr::Eq("c".into(), ExprValue::String("z".into()))),
607 )
608 );
609 }
610
611 #[test]
612 fn parse_case_insensitive_keywords() {
613 let expr = parse_where(r#"city = "Accra" and age >= 25"#).unwrap();
614 assert_eq!(
615 expr,
616 Expr::And(
617 Box::new(Expr::Eq("city".into(), ExprValue::String("Accra".into()))),
618 Box::new(Expr::Gte("age".into(), 25.0)),
619 )
620 );
621 }
622
623 #[test]
624 fn parse_contains_case_insensitive() {
625 let expr = parse_where(r#"tags contains "rust""#).unwrap();
626 assert_eq!(
627 expr,
628 Expr::Contains("tags".into(), ExprValue::String("rust".into()))
629 );
630 }
631
632 #[test]
633 fn error_unexpected_end() {
634 let err = parse_where("city =").unwrap_err();
635 assert_eq!(err, ExprError::UnexpectedEnd);
636 }
637
638 #[test]
639 fn error_unterminated_string() {
640 let err = parse_where(r#"city = "Accra"#).unwrap_err();
641 assert_eq!(err, ExprError::UnterminatedString);
642 }
643
644 #[test]
645 fn error_empty_input() {
646 let err = parse_where("").unwrap_err();
647 assert_eq!(err, ExprError::UnexpectedEnd);
648 }
649
650 #[test]
651 fn error_malformed_expression() {
652 let err = parse_where(r#"= "Accra""#).unwrap_err();
653 assert!(matches!(err, ExprError::UnexpectedToken(_)));
654 }
655
656 #[test]
657 fn parse_in_strings() {
658 let expr = parse_where(r#"status IN ("active", "pending")"#).unwrap();
659 assert_eq!(
660 expr,
661 Expr::In(
662 "status".into(),
663 vec![
664 ExprValue::String("active".into()),
665 ExprValue::String("pending".into()),
666 ],
667 )
668 );
669 }
670
671 #[test]
672 fn parse_in_numbers() {
673 let expr = parse_where("age IN (25, 30, 35)").unwrap();
674 assert_eq!(
675 expr,
676 Expr::In(
677 "age".into(),
678 vec![
679 ExprValue::Number(25.0),
680 ExprValue::Number(30.0),
681 ExprValue::Number(35.0),
682 ],
683 )
684 );
685 }
686
687 #[test]
688 fn parse_in_single_value() {
689 let expr = parse_where(r#"status IN ("active")"#).unwrap();
690 assert_eq!(
691 expr,
692 Expr::In("status".into(), vec![ExprValue::String("active".into())])
693 );
694 }
695
696 #[test]
697 fn parse_in_mixed_with_and() {
698 let expr = parse_where(r#"status IN ("active", "pending") AND age > 25"#).unwrap();
699 assert_eq!(
700 expr,
701 Expr::And(
702 Box::new(Expr::In(
703 "status".into(),
704 vec![
705 ExprValue::String("active".into()),
706 ExprValue::String("pending".into()),
707 ],
708 )),
709 Box::new(Expr::Gt("age".into(), 25.0)),
710 )
711 );
712 }
713
714 #[test]
715 fn parse_in_case_insensitive() {
716 let expr = parse_where(r#"status in ("active")"#).unwrap();
717 assert_eq!(
718 expr,
719 Expr::In("status".into(), vec![ExprValue::String("active".into())])
720 );
721 }
722
723 #[test]
724 fn parse_exists() {
725 let expr = parse_where("address.city EXISTS").unwrap();
726 assert_eq!(expr, Expr::Exists("address.city".into()));
727 }
728
729 #[test]
730 fn parse_exists_case_insensitive() {
731 let expr = parse_where("name exists").unwrap();
732 assert_eq!(expr, Expr::Exists("name".into()));
733 }
734
735 #[test]
736 fn parse_exists_with_and() {
737 let expr = parse_where(r#"email EXISTS AND status = "active""#).unwrap();
738 assert_eq!(
739 expr,
740 Expr::And(
741 Box::new(Expr::Exists("email".into())),
742 Box::new(Expr::Eq(
743 "status".into(),
744 ExprValue::String("active".into()),
745 )),
746 )
747 );
748 }
749
750 #[test]
751 fn parse_parenthesized_grouping() {
752 let expr = parse_where(r#"(city = "Accra" OR city = "Lagos") AND age > 25"#).unwrap();
753 assert_eq!(
754 expr,
755 Expr::And(
756 Box::new(Expr::Or(
757 Box::new(Expr::Eq("city".into(), ExprValue::String("Accra".into()),)),
758 Box::new(Expr::Eq("city".into(), ExprValue::String("Lagos".into()),)),
759 )),
760 Box::new(Expr::Gt("age".into(), 25.0)),
761 )
762 );
763 }
764
765 #[test]
766 fn parse_nested_parentheses() {
767 let expr = parse_where(r#"(a = "x" AND (b = "y" OR c = "z"))"#).unwrap();
768 assert_eq!(
769 expr,
770 Expr::And(
771 Box::new(Expr::Eq("a".into(), ExprValue::String("x".into()))),
772 Box::new(Expr::Or(
773 Box::new(Expr::Eq("b".into(), ExprValue::String("y".into()))),
774 Box::new(Expr::Eq("c".into(), ExprValue::String("z".into()))),
775 )),
776 )
777 );
778 }
779
780 #[test]
781 fn parse_not_simple() {
782 let expr = parse_where(r#"NOT status = "deleted""#).unwrap();
783 assert_eq!(
784 expr,
785 Expr::Not(Box::new(Expr::Eq(
786 "status".into(),
787 ExprValue::String("deleted".into()),
788 )))
789 );
790 }
791
792 #[test]
793 fn parse_not_parenthesized() {
794 let expr = parse_where(r#"NOT (a = "x" AND b = "y")"#).unwrap();
795 assert_eq!(
796 expr,
797 Expr::Not(Box::new(Expr::And(
798 Box::new(Expr::Eq("a".into(), ExprValue::String("x".into()))),
799 Box::new(Expr::Eq("b".into(), ExprValue::String("y".into()))),
800 )))
801 );
802 }
803
804 #[test]
805 fn parse_not_case_insensitive() {
806 let expr = parse_where("not active = true").unwrap();
807 assert_eq!(
808 expr,
809 Expr::Not(Box::new(Expr::Eq("active".into(), ExprValue::Bool(true))))
810 );
811 }
812
813 #[test]
814 fn parse_not_with_and() {
815 let expr = parse_where(r#"NOT status = "deleted" AND age > 18"#).unwrap();
816 assert_eq!(
817 expr,
818 Expr::And(
819 Box::new(Expr::Not(Box::new(Expr::Eq(
820 "status".into(),
821 ExprValue::String("deleted".into()),
822 )))),
823 Box::new(Expr::Gt("age".into(), 18.0)),
824 )
825 );
826 }
827
828 #[test]
829 fn error_empty_in_list() {
830 let err = parse_where("status IN ()").unwrap_err();
831 assert!(matches!(err, ExprError::UnexpectedToken(_)));
832 }
833
834 #[test]
835 fn parse_double_not() {
836 let expr = parse_where(r#"NOT NOT active = true"#).unwrap();
837 assert_eq!(
838 expr,
839 Expr::Not(Box::new(Expr::Not(Box::new(Expr::Eq(
840 "active".into(),
841 ExprValue::Bool(true),
842 )))))
843 );
844 }
845}