Skip to main content

zerodds_sql_filter/
parser.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 ZeroDDS Contributors
3
4//! Recursive-descent parser with precedence climbing.
5//!
6//! Grammar (EBNF, not all concrete literals):
7//!
8//! ```text
9//! expr    = or_expr
10//! or_expr = and_expr { OR and_expr }
11//! and_expr= not_expr { AND not_expr }
12//! not_expr= [ NOT ] atom
13//! atom    = '(' expr ')' | cmp
14//! cmp     = operand cmp_op operand
15//! operand = literal | field | '%' DIGITS
16//! ```
17
18use alloc::boxed::Box;
19use alloc::string::String;
20use alloc::vec::Vec;
21
22use crate::ast::{CmpOp, Expr, Operand, Value};
23use crate::lexer::{LexError, Token, tokenize};
24
25/// Parse error with a human-readable message.
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct ParseError(pub String);
28
29impl core::fmt::Display for ParseError {
30    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
31        write!(f, "parse error: {}", self.0)
32    }
33}
34
35#[cfg(feature = "std")]
36impl std::error::Error for ParseError {}
37
38impl From<LexError> for ParseError {
39    fn from(e: LexError) -> Self {
40        Self(e.0)
41    }
42}
43
44/// Parse entry point. The entire input must be part of exactly one
45/// expression — leftover strings are treated as errors.
46pub fn parse(input: &str) -> Result<Expr, ParseError> {
47    let tokens = tokenize(input)?;
48    let mut p = Parser { tokens, pos: 0 };
49    let expr = p.parse_or()?;
50    if p.pos != p.tokens.len() {
51        return Err(ParseError(alloc::format!(
52            "unexpected trailing token at position {}",
53            p.pos
54        )));
55    }
56    Ok(expr)
57}
58
59struct Parser {
60    tokens: Vec<Token>,
61    pos: usize,
62}
63
64impl Parser {
65    fn peek(&self) -> Option<&Token> {
66        self.tokens.get(self.pos)
67    }
68
69    fn peek_n(&self, offset: usize) -> Option<&Token> {
70        self.tokens.get(self.pos + offset)
71    }
72
73    fn consume(&mut self) -> Option<Token> {
74        let tok = self.tokens.get(self.pos).cloned();
75        if tok.is_some() {
76            self.pos += 1;
77        }
78        tok
79    }
80
81    fn parse_or(&mut self) -> Result<Expr, ParseError> {
82        let mut left = self.parse_and()?;
83        while matches!(self.peek(), Some(Token::Or)) {
84            self.pos += 1;
85            let right = self.parse_and()?;
86            left = Expr::Or(Box::new(left), Box::new(right));
87        }
88        Ok(left)
89    }
90
91    fn parse_and(&mut self) -> Result<Expr, ParseError> {
92        let mut left = self.parse_not()?;
93        while matches!(self.peek(), Some(Token::And)) {
94            self.pos += 1;
95            let right = self.parse_not()?;
96            left = Expr::And(Box::new(left), Box::new(right));
97        }
98        Ok(left)
99    }
100
101    fn parse_not(&mut self) -> Result<Expr, ParseError> {
102        if matches!(self.peek(), Some(Token::Not)) {
103            self.pos += 1;
104            let inner = self.parse_not()?;
105            return Ok(Expr::Not(Box::new(inner)));
106        }
107        self.parse_atom()
108    }
109
110    fn parse_atom(&mut self) -> Result<Expr, ParseError> {
111        if matches!(self.peek(), Some(Token::LParen)) {
112            self.pos += 1;
113            let inner = self.parse_or()?;
114            if !matches!(self.peek(), Some(Token::RParen)) {
115                return Err(ParseError("expected ')'".into()));
116            }
117            self.pos += 1;
118            return Ok(inner);
119        }
120        self.parse_cmp()
121    }
122
123    fn parse_cmp(&mut self) -> Result<Expr, ParseError> {
124        let lhs = self.parse_operand()?;
125        // BETWEEN-Predicate: `field [NOT] BETWEEN low AND high`
126        // (Spec §B.2.1 BetweenPredicate).
127        let mut negated = false;
128        if matches!(self.peek(), Some(Token::Not)) && matches!(self.peek_n(1), Some(Token::Between))
129        {
130            self.pos += 1;
131            negated = true;
132        }
133        if matches!(self.peek(), Some(Token::Between)) {
134            self.pos += 1;
135            let low = self.parse_operand()?;
136            if !matches!(self.consume(), Some(Token::And)) {
137                return Err(ParseError("expected AND after BETWEEN lower bound".into()));
138            }
139            let high = self.parse_operand()?;
140            return Ok(Expr::Between {
141                field: lhs,
142                low,
143                high,
144                negated,
145            });
146        }
147        if negated {
148            return Err(ParseError("NOT must be followed by BETWEEN".into()));
149        }
150        let op = match self.consume() {
151            Some(Token::Eq) => CmpOp::Eq,
152            Some(Token::Neq) => CmpOp::Neq,
153            Some(Token::Lt) => CmpOp::Lt,
154            Some(Token::Le) => CmpOp::Le,
155            Some(Token::Gt) => CmpOp::Gt,
156            Some(Token::Ge) => CmpOp::Ge,
157            Some(Token::Like) => CmpOp::Like,
158            other => {
159                return Err(ParseError(alloc::format!(
160                    "expected comparison op, got {other:?}"
161                )));
162            }
163        };
164        let rhs = self.parse_operand()?;
165        Ok(Expr::Cmp { lhs, op, rhs })
166    }
167
168    fn parse_operand(&mut self) -> Result<Operand, ParseError> {
169        let tok = self
170            .consume()
171            .ok_or_else(|| ParseError("unexpected end".into()))?;
172        match tok {
173            Token::StrLit(s) => Ok(Operand::Literal(Value::String(s))),
174            Token::IntLit(n) => Ok(Operand::Literal(Value::Int(n))),
175            Token::FloatLit(f) => Ok(Operand::Literal(Value::Float(f))),
176            Token::BoolLit(b) => Ok(Operand::Literal(Value::Bool(b))),
177            Token::Ident(name) => Ok(Operand::Field(name)),
178            Token::Param(i) => Ok(Operand::Param(i)),
179            other => Err(ParseError(alloc::format!(
180                "expected operand, got {other:?}"
181            ))),
182        }
183    }
184}
185
186#[cfg(test)]
187#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
188mod tests {
189    use super::*;
190
191    #[test]
192    fn parse_simple_eq() {
193        let e = parse("color = 'RED'").expect("parse");
194        match e {
195            Expr::Cmp { lhs, op, rhs } => {
196                assert_eq!(lhs, Operand::Field("color".into()));
197                assert_eq!(op, CmpOp::Eq);
198                assert_eq!(rhs, Operand::Literal(Value::String("RED".into())));
199            }
200            _ => panic!("expected Cmp"),
201        }
202    }
203
204    #[test]
205    fn parse_and_or_precedence() {
206        // AND binds more tightly than OR.
207        let e = parse("a = 1 OR b = 2 AND c = 3").expect("parse");
208        match e {
209            Expr::Or(_, rhs) => {
210                assert!(matches!(*rhs, Expr::And(_, _)));
211            }
212            _ => panic!("expected Or at top level"),
213        }
214    }
215
216    #[test]
217    fn parse_parenthesized() {
218        let e = parse("(a = 1 OR b = 2) AND c = 3").expect("parse");
219        match e {
220            Expr::And(lhs, _) => {
221                assert!(matches!(*lhs, Expr::Or(_, _)));
222            }
223            _ => panic!("expected And at top level"),
224        }
225    }
226
227    #[test]
228    fn parse_not() {
229        let e = parse("NOT a = 1").expect("parse");
230        assert!(matches!(e, Expr::Not(_)));
231    }
232
233    #[test]
234    fn parse_param() {
235        let e = parse("x > %3").expect("parse");
236        match e {
237            Expr::Cmp {
238                rhs: Operand::Param(3),
239                ..
240            } => {}
241            _ => panic!("expected param operand"),
242        }
243    }
244
245    #[test]
246    fn parse_rejects_trailing_garbage() {
247        assert!(parse("a = 1 garbage").is_err());
248    }
249
250    #[test]
251    fn parse_between_predicate() {
252        let e = parse("x BETWEEN 1 AND 10").expect("parse");
253        match e {
254            Expr::Between {
255                field,
256                low,
257                high,
258                negated,
259            } => {
260                assert_eq!(field, Operand::Field("x".into()));
261                assert_eq!(low, Operand::Literal(Value::Int(1)));
262                assert_eq!(high, Operand::Literal(Value::Int(10)));
263                assert!(!negated);
264            }
265            _ => panic!("expected Between"),
266        }
267    }
268
269    #[test]
270    fn parse_not_between_predicate() {
271        let e = parse("x NOT BETWEEN 1 AND 10").expect("parse");
272        match e {
273            Expr::Between { negated, .. } => assert!(negated),
274            _ => panic!("expected negated Between"),
275        }
276    }
277
278    #[test]
279    fn parse_between_with_params() {
280        let e = parse("ts BETWEEN %0 AND %1").expect("parse");
281        let params = e.collect_param_indices();
282        assert_eq!(params, alloc::vec![0, 1]);
283    }
284
285    #[test]
286    fn parse_rejects_unbalanced_parens() {
287        assert!(parse("(a = 1").is_err());
288    }
289
290    #[test]
291    fn parse_node_count() {
292        let e = parse("a = 1 AND b = 2 OR c = 3").expect("parse");
293        // 3 comparisons + 1 AND + 1 OR = 5.
294        assert_eq!(e.node_count(), 5);
295    }
296
297    #[test]
298    fn parse_collects_param_indices() {
299        let e = parse("a = %0 AND b = %2 OR c = %1").expect("parse");
300        let mut idx = e.collect_param_indices();
301        idx.sort_unstable();
302        assert_eq!(idx, vec![0, 1, 2]);
303    }
304}