Skip to main content

zerodds_sql_filter/
parser.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 ZeroDDS Contributors
3
4//! Recursive-Descent-Parser mit Precedence-Klettern.
5//!
6//! Grammar (EBNF, nicht alle konkreten Literale):
7//!
8//! ```text
9//! expr    = or_expr
10//! or_expr = and_expr { OR and_expr }
11//! and_expr= not_expr { AND not_expr }
12//! not_expr= [ NOT ] atom
13//! atom    = '(' expr ')' | cmp
14//! cmp     = operand cmp_op operand
15//! operand = literal | field | '%' DIGITS
16//! ```
17
18use alloc::boxed::Box;
19use alloc::string::String;
20use alloc::vec::Vec;
21
22use crate::ast::{CmpOp, Expr, Operand, Value};
23use crate::lexer::{LexError, Token, tokenize};
24
25/// Parse-Fehler mit menschlich lesbarer Message.
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct ParseError(pub String);
28
29impl core::fmt::Display for ParseError {
30    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
31        write!(f, "parse error: {}", self.0)
32    }
33}
34
35#[cfg(feature = "std")]
36impl std::error::Error for ParseError {}
37
38impl From<LexError> for ParseError {
39    fn from(e: LexError) -> Self {
40        Self(e.0)
41    }
42}
43
44/// Parse-Einstieg. Das gesamte Input muss Teil genau eines Ausdrucks
45/// sein — Reststrings werden als Fehler gewertet.
46pub fn parse(input: &str) -> Result<Expr, ParseError> {
47    let tokens = tokenize(input)?;
48    let mut p = Parser { tokens, pos: 0 };
49    let expr = p.parse_or()?;
50    if p.pos != p.tokens.len() {
51        return Err(ParseError(alloc::format!(
52            "unerwartetes Trailing-Token an Position {}",
53            p.pos
54        )));
55    }
56    Ok(expr)
57}
58
59struct Parser {
60    tokens: Vec<Token>,
61    pos: usize,
62}
63
64impl Parser {
65    fn peek(&self) -> Option<&Token> {
66        self.tokens.get(self.pos)
67    }
68
69    fn peek_n(&self, offset: usize) -> Option<&Token> {
70        self.tokens.get(self.pos + offset)
71    }
72
73    fn consume(&mut self) -> Option<Token> {
74        let tok = self.tokens.get(self.pos).cloned();
75        if tok.is_some() {
76            self.pos += 1;
77        }
78        tok
79    }
80
81    fn parse_or(&mut self) -> Result<Expr, ParseError> {
82        let mut left = self.parse_and()?;
83        while matches!(self.peek(), Some(Token::Or)) {
84            self.pos += 1;
85            let right = self.parse_and()?;
86            left = Expr::Or(Box::new(left), Box::new(right));
87        }
88        Ok(left)
89    }
90
91    fn parse_and(&mut self) -> Result<Expr, ParseError> {
92        let mut left = self.parse_not()?;
93        while matches!(self.peek(), Some(Token::And)) {
94            self.pos += 1;
95            let right = self.parse_not()?;
96            left = Expr::And(Box::new(left), Box::new(right));
97        }
98        Ok(left)
99    }
100
101    fn parse_not(&mut self) -> Result<Expr, ParseError> {
102        if matches!(self.peek(), Some(Token::Not)) {
103            self.pos += 1;
104            let inner = self.parse_not()?;
105            return Ok(Expr::Not(Box::new(inner)));
106        }
107        self.parse_atom()
108    }
109
110    fn parse_atom(&mut self) -> Result<Expr, ParseError> {
111        if matches!(self.peek(), Some(Token::LParen)) {
112            self.pos += 1;
113            let inner = self.parse_or()?;
114            if !matches!(self.peek(), Some(Token::RParen)) {
115                return Err(ParseError("erwarte ')'".into()));
116            }
117            self.pos += 1;
118            return Ok(inner);
119        }
120        self.parse_cmp()
121    }
122
123    fn parse_cmp(&mut self) -> Result<Expr, ParseError> {
124        let lhs = self.parse_operand()?;
125        // BETWEEN-Predicate: `field [NOT] BETWEEN low AND high`
126        // (Spec §B.2.1 BetweenPredicate).
127        let mut negated = false;
128        if matches!(self.peek(), Some(Token::Not)) && matches!(self.peek_n(1), Some(Token::Between))
129        {
130            self.pos += 1;
131            negated = true;
132        }
133        if matches!(self.peek(), Some(Token::Between)) {
134            self.pos += 1;
135            let low = self.parse_operand()?;
136            if !matches!(self.consume(), Some(Token::And)) {
137                return Err(ParseError("erwarte AND nach BETWEEN-Untergrenze".into()));
138            }
139            let high = self.parse_operand()?;
140            return Ok(Expr::Between {
141                field: lhs,
142                low,
143                high,
144                negated,
145            });
146        }
147        if negated {
148            return Err(ParseError("NOT muss von BETWEEN gefolgt sein".into()));
149        }
150        let op = match self.consume() {
151            Some(Token::Eq) => CmpOp::Eq,
152            Some(Token::Neq) => CmpOp::Neq,
153            Some(Token::Lt) => CmpOp::Lt,
154            Some(Token::Le) => CmpOp::Le,
155            Some(Token::Gt) => CmpOp::Gt,
156            Some(Token::Ge) => CmpOp::Ge,
157            Some(Token::Like) => CmpOp::Like,
158            other => {
159                return Err(ParseError(alloc::format!(
160                    "erwarte Vergleichs-Op, got {other:?}"
161                )));
162            }
163        };
164        let rhs = self.parse_operand()?;
165        Ok(Expr::Cmp { lhs, op, rhs })
166    }
167
168    fn parse_operand(&mut self) -> Result<Operand, ParseError> {
169        let tok = self
170            .consume()
171            .ok_or_else(|| ParseError("unerwartetes Ende".into()))?;
172        match tok {
173            Token::StrLit(s) => Ok(Operand::Literal(Value::String(s))),
174            Token::IntLit(n) => Ok(Operand::Literal(Value::Int(n))),
175            Token::FloatLit(f) => Ok(Operand::Literal(Value::Float(f))),
176            Token::BoolLit(b) => Ok(Operand::Literal(Value::Bool(b))),
177            Token::Ident(name) => Ok(Operand::Field(name)),
178            Token::Param(i) => Ok(Operand::Param(i)),
179            other => Err(ParseError(alloc::format!("erwarte Operand, got {other:?}"))),
180        }
181    }
182}
183
184#[cfg(test)]
185#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
186mod tests {
187    use super::*;
188
189    #[test]
190    fn parse_simple_eq() {
191        let e = parse("color = 'RED'").expect("parse");
192        match e {
193            Expr::Cmp { lhs, op, rhs } => {
194                assert_eq!(lhs, Operand::Field("color".into()));
195                assert_eq!(op, CmpOp::Eq);
196                assert_eq!(rhs, Operand::Literal(Value::String("RED".into())));
197            }
198            _ => panic!("erwarte Cmp"),
199        }
200    }
201
202    #[test]
203    fn parse_and_or_precedence() {
204        // AND bindet staerker als OR.
205        let e = parse("a = 1 OR b = 2 AND c = 3").expect("parse");
206        match e {
207            Expr::Or(_, rhs) => {
208                assert!(matches!(*rhs, Expr::And(_, _)));
209            }
210            _ => panic!("erwarte Or auf top-level"),
211        }
212    }
213
214    #[test]
215    fn parse_parenthesized() {
216        let e = parse("(a = 1 OR b = 2) AND c = 3").expect("parse");
217        match e {
218            Expr::And(lhs, _) => {
219                assert!(matches!(*lhs, Expr::Or(_, _)));
220            }
221            _ => panic!("erwarte And auf top-level"),
222        }
223    }
224
225    #[test]
226    fn parse_not() {
227        let e = parse("NOT a = 1").expect("parse");
228        assert!(matches!(e, Expr::Not(_)));
229    }
230
231    #[test]
232    fn parse_param() {
233        let e = parse("x > %3").expect("parse");
234        match e {
235            Expr::Cmp {
236                rhs: Operand::Param(3),
237                ..
238            } => {}
239            _ => panic!("erwarte Param-Operand"),
240        }
241    }
242
243    #[test]
244    fn parse_rejects_trailing_garbage() {
245        assert!(parse("a = 1 garbage").is_err());
246    }
247
248    #[test]
249    fn parse_between_predicate() {
250        let e = parse("x BETWEEN 1 AND 10").expect("parse");
251        match e {
252            Expr::Between {
253                field,
254                low,
255                high,
256                negated,
257            } => {
258                assert_eq!(field, Operand::Field("x".into()));
259                assert_eq!(low, Operand::Literal(Value::Int(1)));
260                assert_eq!(high, Operand::Literal(Value::Int(10)));
261                assert!(!negated);
262            }
263            _ => panic!("erwarte Between"),
264        }
265    }
266
267    #[test]
268    fn parse_not_between_predicate() {
269        let e = parse("x NOT BETWEEN 1 AND 10").expect("parse");
270        match e {
271            Expr::Between { negated, .. } => assert!(negated),
272            _ => panic!("erwarte negiertes Between"),
273        }
274    }
275
276    #[test]
277    fn parse_between_with_params() {
278        let e = parse("ts BETWEEN %0 AND %1").expect("parse");
279        let params = e.collect_param_indices();
280        assert_eq!(params, alloc::vec![0, 1]);
281    }
282
283    #[test]
284    fn parse_rejects_unbalanced_parens() {
285        assert!(parse("(a = 1").is_err());
286    }
287
288    #[test]
289    fn parse_node_count() {
290        let e = parse("a = 1 AND b = 2 OR c = 3").expect("parse");
291        // 3 Vergleiche + 1 AND + 1 OR = 5.
292        assert_eq!(e.node_count(), 5);
293    }
294
295    #[test]
296    fn parse_collects_param_indices() {
297        let e = parse("a = %0 AND b = %2 OR c = %1").expect("parse");
298        let mut idx = e.collect_param_indices();
299        idx.sort_unstable();
300        assert_eq!(idx, vec![0, 1, 2]);
301    }
302}