Skip to main content

reddb_server/storage/query/parser/
expr.rs

1//! Pratt-style parser for the Fase 2 `Expr` AST.
2//!
3//! This module is the Week 2 deliverable of the parser v2 refactor
4//! tracked in `/home/cyber/.claude/plans/squishy-mixing-honey.md`.
5//! It produces `ast::Expr` trees with proper operator precedence,
6//! `Span` tracking from the lexer, and support for the full set of
7//! unary / binary / postfix operators the existing hand-rolled
8//! projection climb covers in Fase 1.3 — plus the missing pieces
9//! (CASE, CAST, parenthesised subexprs, IS NULL, IN, BETWEEN).
10//!
11//! # Design notes
12//!
13//! The parser is now the canonical entry point for SQL expression
14//! parsing in the table-query flow:
15//! - `SELECT` projections parse through `Parser::parse_expr`
16//! - `WHERE` / `HAVING` operands parse through `Parser::parse_expr`
17//! - `ORDER BY` expressions parse through `Parser::parse_expr`
18//!
19//! Some legacy AST slots are still adapter-based (`Projection`,
20//! `Filter`, `GROUP BY` strings), so statement parsing still lowers
21//! `Expr` trees into those older shapes at the boundary.
22//!
23//! # Precedence table (matches PG gram.y modulo features we don't have)
24//!
25//! ```text
26//! prec  operators
27//! ----  ----------------------------------
28//!  10   OR
29//!  20   AND
30//!  25   NOT                      (prefix)
31//!  30   = <> < <= > >=           (comparison)
32//!  32   IS NULL / IS NOT NULL    (postfix)
33//!  33   BETWEEN … AND …          (postfix)
34//!  34   IN (…)                   (postfix)
35//!  40   ||                       (string concat)
36//!  50   + -                      (additive)
37//!  60   * / %                    (multiplicative)
38//!  70   -                        (unary negation)
39//!  80   ::type  CAST(…AS type)   (explicit type coercion)
40//! ```
41//!
42//! Higher precedence binds tighter. The climb uses the classic
43//! "min-precedence" algorithm — `parse_expr_prec(min)` loops consuming
44//! any infix operator whose precedence is ≥ `min`, recursing with
45//! `prec + 1` on the right-hand side for left-associativity.
46
47use super::super::ast::{BinOp, Expr, FieldRef, Span, UnaryOp};
48use super::super::lexer::Token;
49use super::error::ParseError;
50use super::Parser;
51use crate::storage::schema::{DataType, Value};
52
53fn is_duration_unit(unit: &str) -> bool {
54    matches!(
55        unit.to_ascii_lowercase().as_str(),
56        "ms" | "msec"
57            | "millisecond"
58            | "milliseconds"
59            | "s"
60            | "sec"
61            | "secs"
62            | "second"
63            | "seconds"
64            | "m"
65            | "min"
66            | "mins"
67            | "minute"
68            | "minutes"
69            | "h"
70            | "hr"
71            | "hrs"
72            | "hour"
73            | "hours"
74            | "d"
75            | "day"
76            | "days"
77    )
78}
79
80fn keyword_function_name(token: &Token) -> Option<&'static str> {
81    match token {
82        Token::Count => Some("COUNT"),
83        Token::Sum => Some("SUM"),
84        Token::Avg => Some("AVG"),
85        Token::Min => Some("MIN"),
86        Token::Max => Some("MAX"),
87        Token::First => Some("FIRST"),
88        Token::Last => Some("LAST"),
89        Token::Left => Some("LEFT"),
90        Token::Right => Some("RIGHT"),
91        Token::Kv => Some("KV"),
92        _ => None,
93    }
94}
95
96impl<'a> Parser<'a> {
97    /// Parse a complete expression at the lowest precedence level.
98    /// Entry point for every caller that wants an `Expr` tree.
99    pub fn parse_expr(&mut self) -> Result<Expr, ParseError> {
100        self.parse_expr_prec(0)
101    }
102
103    pub(crate) fn parse_expr_with_min_precedence(
104        &mut self,
105        min_prec: u8,
106    ) -> Result<Expr, ParseError> {
107        self.parse_expr_prec(min_prec)
108    }
109
110    /// Continue parsing an expression after the caller has already
111    /// materialized the left-hand side atom.
112    pub(crate) fn continue_expr(&mut self, left: Expr, min_prec: u8) -> Result<Expr, ParseError> {
113        self.parse_expr_suffix(left, min_prec)
114    }
115
116    /// Pratt climb: parse a unary atom then consume any infix operators
117    /// whose precedence meets or exceeds `min_prec`.
118    fn parse_expr_prec(&mut self, min_prec: u8) -> Result<Expr, ParseError> {
119        // Depth guard: every recursive descent point in the expr
120        // grammar bottoms out here, so checking once is enough to
121        // catch deeply nested literals like `((((((1))))))` and
122        // boolean chains like `NOT NOT NOT NOT … x`.
123        self.enter_depth()?;
124        let result = (|| {
125            let left = self.parse_expr_unary()?;
126            self.parse_expr_suffix(left, min_prec)
127        })();
128        self.exit_depth();
129        result
130    }
131
132    fn parse_expr_suffix(&mut self, mut left: Expr, min_prec: u8) -> Result<Expr, ParseError> {
133        loop {
134            let Some((op, prec)) = self.peek_binop() else {
135                // Not a standard infix op — check for postfix forms.
136                if min_prec <= 32 {
137                    if let Some(node) = self.try_parse_postfix(&left)? {
138                        left = node;
139                        continue;
140                    }
141                }
142                break;
143            };
144            if prec < min_prec {
145                break;
146            }
147            self.advance()?; // consume the operator token
148            let start_span = self.span_start_of(&left);
149            let rhs = self.parse_expr_prec(prec + 1)?;
150            let end_span = self.span_end_of(&rhs);
151            left = Expr::BinaryOp {
152                op,
153                lhs: Box::new(left),
154                rhs: Box::new(rhs),
155                span: Span::new(start_span, end_span),
156            };
157        }
158        Ok(left)
159    }
160
161    /// Parse a unary-prefix expression or drop through to the atomic
162    /// factor. Handles `NOT`, unary `-`, and `+` (no-op sign).
163    fn parse_expr_unary(&mut self) -> Result<Expr, ParseError> {
164        match self.peek() {
165            Token::Not => {
166                let start = self.position();
167                self.advance()?;
168                let operand = self.parse_expr_prec(25)?;
169                let end = self.span_end_of(&operand);
170                Ok(Expr::UnaryOp {
171                    op: UnaryOp::Not,
172                    operand: Box::new(operand),
173                    span: Span::new(start, end),
174                })
175            }
176            Token::Dash => {
177                let start = self.position();
178                self.advance()?;
179                let operand = self.parse_expr_prec(70)?;
180                let end = self.span_end_of(&operand);
181                Ok(Expr::UnaryOp {
182                    op: UnaryOp::Neg,
183                    operand: Box::new(operand),
184                    span: Span::new(start, end),
185                })
186            }
187            Token::Plus => {
188                // Unary plus is a no-op. Consume and recurse.
189                self.advance()?;
190                self.parse_expr_prec(70)
191            }
192            _ => self.parse_expr_factor(),
193        }
194    }
195
196    /// Parse a single atomic expression factor: literal, column ref,
197    /// parenthesised subexpression, CAST, CASE, or function call.
198    fn parse_expr_factor(&mut self) -> Result<Expr, ParseError> {
199        let start = self.position();
200
201        // Parenthesised subexpression: `( expr )`
202        if self.consume(&Token::LParen)? {
203            let inner = self.parse_expr_prec(0)?;
204            self.expect(Token::RParen)?;
205            return Ok(inner);
206        }
207
208        // Literal: true / false / null
209        if self.consume(&Token::True)? {
210            return Ok(Expr::Literal {
211                value: Value::Boolean(true),
212                span: Span::new(start, self.position()),
213            });
214        }
215        if self.consume(&Token::False)? {
216            return Ok(Expr::Literal {
217                value: Value::Boolean(false),
218                span: Span::new(start, self.position()),
219            });
220        }
221        if self.consume(&Token::Null)? {
222            return Ok(Expr::Literal {
223                value: Value::Null,
224                span: Span::new(start, self.position()),
225            });
226        }
227
228        // Numeric literals — with optional duration-unit suffix (e.g. `5m`, `10s`, `2h`).
229        // Duration literals are emitted as Value::Text so downstream code sees "5m" verbatim
230        // (matching the legacy Projection::Column("LIT:5m") path used by time_bucket).
231        if let Token::Integer(n) = *self.peek() {
232            self.advance()?;
233            if let Token::Ident(ref unit) = *self.peek() {
234                if is_duration_unit(unit) {
235                    let duration = format!("{n}{}", unit.to_ascii_lowercase());
236                    self.advance()?;
237                    return Ok(Expr::Literal {
238                        value: Value::text(duration),
239                        span: Span::new(start, self.position()),
240                    });
241                }
242            }
243            return Ok(Expr::Literal {
244                value: Value::Integer(n),
245                span: Span::new(start, self.position()),
246            });
247        }
248        if let Token::Float(n) = *self.peek() {
249            self.advance()?;
250            return Ok(Expr::Literal {
251                value: Value::Float(n),
252                span: Span::new(start, self.position()),
253            });
254        }
255        if let Token::String(ref s) = *self.peek() {
256            let text = s.clone();
257            self.advance()?;
258            return Ok(Expr::Literal {
259                value: Value::text(text),
260                span: Span::new(start, self.position()),
261            });
262        }
263
264        // JSON object `{…}` and array `[…]` literals — delegate to the DML literal parser
265        // which already handles the full JSON value grammar including nested objects.
266        // `JsonLiteral` is the strict-JSON variant emitted by the lexer's sub-mode
267        // when `{` is followed by `"`; both shapes route through `parse_literal_value`.
268        if matches!(
269            self.peek(),
270            Token::LBrace | Token::LBracket | Token::JsonLiteral(_)
271        ) {
272            let value = self
273                .parse_literal_value()
274                .map_err(|e| ParseError::new(e.message, self.position()))?;
275            return Ok(Expr::Literal {
276                value,
277                span: Span::new(start, self.position()),
278            });
279        }
280
281        if self.consume(&Token::Dollar)? {
282            let path = self.parse_dollar_ref_path()?;
283            let path_lc = path.to_ascii_lowercase();
284            let (name, key) = if let Some(rest) = path_lc.strip_prefix("secret.") {
285                ("__SECRET_REF", format!("red.vault/{rest}"))
286            } else if path_lc.starts_with("red.secret.") {
287                let rest = path_lc.trim_start_matches("red.secret.");
288                ("__SECRET_REF", format!("red.vault/{rest}"))
289            } else if let Some(rest) = path_lc.strip_prefix("config.") {
290                ("CONFIG", format!("red.config/{rest}"))
291            } else if path_lc.starts_with("red.config.") {
292                let rest = path_lc.trim_start_matches("red.config.");
293                ("CONFIG", format!("red.config/{rest}"))
294            } else {
295                return Err(ParseError::new(
296                    format!(
297                        "unknown $ reference `${path}`; expected $secret.*, $red.secret.*, $config.*, or $red.config.*"
298                    ),
299                    self.position(),
300                ));
301            };
302            return Ok(Expr::FunctionCall {
303                name: name.to_string(),
304                args: vec![Expr::Literal {
305                    value: Value::text(key),
306                    span: Span::new(start, self.position()),
307                }],
308                span: Span::new(start, self.position()),
309            });
310        }
311
312        if let Some(name) = keyword_function_name(self.peek()) {
313            if matches!(self.peek_next()?, Token::LParen) {
314                self.advance()?; // consume the keyword token
315                return self.parse_function_call_expr_with_name(start, name.to_string());
316            }
317        }
318
319        // Identifier-led constructs: function call, CAST, CASE, column.
320        //
321        // We commit to consuming the identifier immediately and then
322        // inspect the NEXT token to decide shape. This avoids needing
323        // two-token lookahead on the parser. If the next token is `(`
324        // it's a function call; if `.` it's a qualified column ref;
325        // otherwise it's a bare column ref.
326        if let Token::Ident(ref name) = *self.peek() {
327            let name_upper = name.to_uppercase();
328
329            // CAST(expr AS type) — must test before consuming because
330            // CAST is not a reserved keyword; users could legitimately
331            // have a column literally named `cast`. Distinguish by
332            // looking at whether the identifier equals CAST AND is
333            // immediately followed by `(`. Since we can't two-step
334            // lookahead, handle CAST by parsing the ident, then if the
335            // uppercased name is CAST and the next token is `(`,
336            // switch to the CAST form; otherwise the saved name
337            // becomes the first segment of a column ref.
338            if name_upper == "CASE" {
339                return self.parse_case_expr(start);
340            }
341
342            let saved_name = name.clone();
343            self.advance()?; // consume the identifier unconditionally
344
345            // Function call / CAST: IDENT (
346            if matches!(self.peek(), Token::LParen) {
347                return self.parse_function_call_expr_with_name(start, saved_name);
348            }
349
350            // Qualified column: IDENT.IDENT[.IDENT …]
351            if matches!(self.peek(), Token::Dot) {
352                let mut segments = vec![saved_name];
353                while self.consume(&Token::Dot)? {
354                    segments.push(self.expect_ident_or_keyword()?);
355                }
356                let field = FieldRef::TableColumn {
357                    table: segments.remove(0),
358                    column: segments.join("."),
359                };
360                let end = self.position();
361                return Ok(Expr::Column {
362                    field,
363                    span: Span::new(start, end),
364                });
365            }
366
367            // Bare column reference with empty table name.
368            let field = FieldRef::TableColumn {
369                table: String::new(),
370                column: saved_name,
371            };
372            let end = self.position();
373            return Ok(Expr::Column {
374                field,
375                span: Span::new(start, end),
376            });
377        }
378
379        // Default: column reference (optionally qualified: table.column).
380        // Reached only when the leading token is not an Ident. Falls
381        // through to parse_field_ref which handles keyword-shaped
382        // column names.
383        let field = self.parse_field_ref()?;
384        let end = self.position();
385        Ok(Expr::Column {
386            field,
387            span: Span::new(start, end),
388        })
389    }
390
391    fn parse_dollar_ref_path(&mut self) -> Result<String, ParseError> {
392        let mut path = self.expect_ident_or_keyword()?;
393        while self.consume(&Token::Dot)? {
394            let next = self.expect_ident_or_keyword()?;
395            path = format!("{path}.{next}");
396        }
397        Ok(path)
398    }
399
400    fn parse_function_call_expr_with_name(
401        &mut self,
402        start: crate::storage::query::lexer::Position,
403        function_name: String,
404    ) -> Result<Expr, ParseError> {
405        self.expect(Token::LParen)?;
406
407        if function_name.eq_ignore_ascii_case("CAST") {
408            let inner = self.parse_expr_prec(0)?;
409            self.expect(Token::As)?;
410            let type_name = self.expect_ident_or_keyword()?;
411            self.expect(Token::RParen)?;
412            let end = self.position();
413            let Some(target) = DataType::from_sql_name(&type_name) else {
414                return Err(ParseError::new(
415                    // F-05: `type_name` is caller-controlled identifier text.
416                    // Render via `{:?}` so embedded CR/LF/NUL/quotes are
417                    // escaped before reaching downstream serialization sinks.
418                    format!("unknown type name {type_name:?} in CAST"),
419                    self.position(),
420                ));
421            };
422            return Ok(Expr::Cast {
423                inner: Box::new(inner),
424                target,
425                span: Span::new(start, end),
426            });
427        }
428
429        if function_name.eq_ignore_ascii_case("TRIM") {
430            let (name, args) = self.parse_trim_expr_args()?;
431            self.expect(Token::RParen)?;
432            let end = self.position();
433            return Ok(Expr::FunctionCall {
434                name,
435                args,
436                span: Span::new(start, end),
437            });
438        }
439
440        if function_name.eq_ignore_ascii_case("POSITION") {
441            let args = self.parse_position_expr_args()?;
442            self.expect(Token::RParen)?;
443            let end = self.position();
444            return Ok(Expr::FunctionCall {
445                name: function_name,
446                args,
447                span: Span::new(start, end),
448            });
449        }
450
451        if function_name.eq_ignore_ascii_case("SUBSTRING") {
452            let args = self.parse_substring_expr_args()?;
453            self.expect(Token::RParen)?;
454            let end = self.position();
455            return Ok(Expr::FunctionCall {
456                name: function_name,
457                args,
458                span: Span::new(start, end),
459            });
460        }
461
462        if function_name.eq_ignore_ascii_case("COUNT") {
463            if self.consume(&Token::Distinct)? {
464                let arg = self.parse_expr_prec(0)?;
465                self.expect(Token::RParen)?;
466                let end = self.position();
467                return Ok(Expr::FunctionCall {
468                    name: "COUNT_DISTINCT".to_string(),
469                    args: vec![arg],
470                    span: Span::new(start, end),
471                });
472            }
473
474            if self.consume(&Token::Star)? {
475                self.expect(Token::RParen)?;
476                let end = self.position();
477                return Ok(Expr::FunctionCall {
478                    name: function_name,
479                    args: vec![Expr::Column {
480                        field: FieldRef::TableColumn {
481                            table: String::new(),
482                            column: "*".to_string(),
483                        },
484                        span: Span::synthetic(),
485                    }],
486                    span: Span::new(start, end),
487                });
488            }
489        }
490
491        let mut args = Vec::new();
492        if !self.check(&Token::RParen) {
493            loop {
494                args.push(self.parse_expr_prec(0)?);
495                if !self.consume(&Token::Comma)? {
496                    break;
497                }
498            }
499        }
500        self.expect(Token::RParen)?;
501        let end = self.position();
502        Ok(Expr::FunctionCall {
503            name: function_name,
504            args,
505            span: Span::new(start, end),
506        })
507    }
508
509    /// Parse `CASE WHEN cond THEN val [WHEN …] [ELSE val] END`.
510    /// Assumes the caller has already peeked `CASE`.
511    fn parse_case_expr(
512        &mut self,
513        start: crate::storage::query::lexer::Position,
514    ) -> Result<Expr, ParseError> {
515        self.advance()?; // consume CASE
516        let mut branches: Vec<(Expr, Expr)> = Vec::new();
517        loop {
518            if !self.consume_ident_ci("WHEN")? {
519                break;
520            }
521            let cond = self.parse_expr_prec(0)?;
522            if !self.consume_ident_ci("THEN")? {
523                return Err(ParseError::new(
524                    "expected THEN after CASE WHEN condition".to_string(),
525                    self.position(),
526                ));
527            }
528            let then_val = self.parse_expr_prec(0)?;
529            branches.push((cond, then_val));
530        }
531        if branches.is_empty() {
532            return Err(ParseError::new(
533                "CASE must have at least one WHEN branch".to_string(),
534                self.position(),
535            ));
536        }
537        let else_ = if self.consume_ident_ci("ELSE")? {
538            Some(Box::new(self.parse_expr_prec(0)?))
539        } else {
540            None
541        };
542        if !self.consume_ident_ci("END")? {
543            return Err(ParseError::new(
544                "expected END to close CASE expression".to_string(),
545                self.position(),
546            ));
547        }
548        let end = self.position();
549        Ok(Expr::Case {
550            branches,
551            else_,
552            span: Span::new(start, end),
553        })
554    }
555
556    fn parse_trim_expr_args(&mut self) -> Result<(String, Vec<Expr>), ParseError> {
557        let mut function_name = "TRIM".to_string();
558
559        if self.consume_ident_ci("LEADING")? {
560            function_name = "LTRIM".to_string();
561        } else if self.consume_ident_ci("TRAILING")? {
562            function_name = "RTRIM".to_string();
563        } else if self.consume_ident_ci("BOTH")? {
564            function_name = "TRIM".to_string();
565        }
566
567        if self.consume(&Token::From)? {
568            let source = self.parse_expr_prec(0)?;
569            return Ok((function_name, vec![source]));
570        }
571
572        let first = self.parse_expr_prec(0)?;
573
574        if self.consume(&Token::Comma)? {
575            let second = self.parse_expr_prec(0)?;
576            return Ok((function_name, vec![first, second]));
577        }
578
579        if self.consume(&Token::From)? {
580            let source = self.parse_expr_prec(0)?;
581            return Ok((function_name, vec![source, first]));
582        }
583
584        Ok((function_name, vec![first]))
585    }
586
587    /// PostgreSQL-style `POSITION(substr IN string)` or plain
588    /// `POSITION(substr, string)` lowered to the ordinary two-argument
589    /// function form.
590    fn parse_position_expr_args(&mut self) -> Result<Vec<Expr>, ParseError> {
591        // `IN` is also a postfix operator in the main expression grammar, so
592        // parse the first operand above postfix-IN precedence and then consume
593        // the function's `IN` keyword explicitly.
594        let needle = self.parse_expr_prec(35)?;
595        if !self.consume(&Token::Comma)? {
596            self.expect(Token::In)?;
597        }
598        let haystack = self.parse_expr_prec(0)?;
599        Ok(vec![needle, haystack])
600    }
601
602    /// PostgreSQL-style `SUBSTRING` syntax:
603    /// - `SUBSTRING(expr FROM start [FOR count])`
604    /// - `SUBSTRING(expr FOR count [FROM start])`
605    /// - plain function-call form `SUBSTRING(expr, start[, count])`
606    ///
607    /// The SQL-syntax variants are desugared to the comma-arg form so the
608    /// rest of the stack sees the same `Expr::FunctionCall` shape.
609    fn parse_substring_expr_args(&mut self) -> Result<Vec<Expr>, ParseError> {
610        let source = self.parse_expr_prec(0)?;
611
612        if self.consume(&Token::Comma)? {
613            let mut args = vec![source];
614            loop {
615                args.push(self.parse_expr_prec(0)?);
616                if !self.consume(&Token::Comma)? {
617                    break;
618                }
619            }
620            return Ok(args);
621        }
622
623        if self.consume(&Token::From)? {
624            let start = self.parse_expr_prec(0)?;
625            if self.consume(&Token::For)? {
626                let count = self.parse_expr_prec(0)?;
627                return Ok(vec![source, start, count]);
628            }
629            return Ok(vec![source, start]);
630        }
631
632        if self.consume(&Token::For)? {
633            let count = self.parse_expr_prec(0)?;
634            if self.consume(&Token::From)? {
635                let start = self.parse_expr_prec(0)?;
636                return Ok(vec![source, start, count]);
637            }
638            return Ok(vec![source, Expr::lit(Value::Integer(1)), count]);
639        }
640
641        Ok(vec![source])
642    }
643
644    /// Try to consume a postfix operator on top of the already-parsed
645    /// `left` expression: `IS [NOT] NULL`, `[NOT] BETWEEN … AND …`,
646    /// `[NOT] IN (…)`. Returns `Ok(None)` if no postfix follows.
647    ///
648    /// NOT at this position is unambiguous — prefix `NOT` is always
649    /// consumed at `parse_expr_unary` level before reaching postfix.
650    /// So seeing `NOT` here means the user wrote `x NOT BETWEEN …`
651    /// or `x NOT IN …`; we consume it eagerly and require BETWEEN
652    /// or IN to follow.
653    fn try_parse_postfix(&mut self, left: &Expr) -> Result<Option<Expr>, ParseError> {
654        let start = self.span_start_of(left);
655
656        // IS [NOT] NULL
657        if self.consume(&Token::Is)? {
658            let negated = self.consume(&Token::Not)?;
659            self.expect(Token::Null)?;
660            let end = self.position();
661            return Ok(Some(Expr::IsNull {
662                operand: Box::new(left.clone()),
663                negated,
664                span: Span::new(start, end),
665            }));
666        }
667
668        // Detect NOT BETWEEN / NOT IN. NOT is consumed eagerly — we
669        // don't have two-token lookahead and the grammar guarantees
670        // no other valid postfix starts with NOT.
671        let negated = if matches!(self.peek(), Token::Not) {
672            self.advance()?;
673            if !matches!(self.peek(), Token::Between | Token::In) {
674                return Err(ParseError::new(
675                    "expected BETWEEN or IN after postfix NOT".to_string(),
676                    self.position(),
677                ));
678            }
679            true
680        } else {
681            false
682        };
683
684        // BETWEEN low AND high
685        if self.consume(&Token::Between)? {
686            let low = self.parse_expr_prec(34)?;
687            self.expect(Token::And)?;
688            let high = self.parse_expr_prec(34)?;
689            let end = self.position();
690            return Ok(Some(Expr::Between {
691                target: Box::new(left.clone()),
692                low: Box::new(low),
693                high: Box::new(high),
694                negated,
695                span: Span::new(start, end),
696            }));
697        }
698
699        // IN (v1, v2, …)
700        if self.consume(&Token::In)? {
701            self.expect(Token::LParen)?;
702            let mut values = Vec::new();
703            if !self.check(&Token::RParen) {
704                loop {
705                    values.push(self.parse_expr_prec(0)?);
706                    if !self.consume(&Token::Comma)? {
707                        break;
708                    }
709                }
710            }
711            self.expect(Token::RParen)?;
712            let end = self.position();
713            return Ok(Some(Expr::InList {
714                target: Box::new(left.clone()),
715                values,
716                negated,
717                span: Span::new(start, end),
718            }));
719        }
720
721        if negated {
722            // Unreachable because the early-return above already
723            // validated NOT is followed by BETWEEN or IN. Guarded
724            // to keep callers loud if the grammar grows later.
725            return Err(ParseError::new(
726                "internal: NOT consumed without BETWEEN/IN follow".to_string(),
727                self.position(),
728            ));
729        }
730        Ok(None)
731    }
732
733    /// Peek the current token and translate it into a `BinOp` plus
734    /// its precedence. Returns `None` if the token is not a recognised
735    /// infix operator — the caller then tries postfix handling.
736    fn peek_binop(&self) -> Option<(BinOp, u8)> {
737        let op = match self.peek() {
738            Token::Or => BinOp::Or,
739            Token::And => BinOp::And,
740            Token::Eq => BinOp::Eq,
741            Token::Ne => BinOp::Ne,
742            Token::Lt => BinOp::Lt,
743            Token::Le => BinOp::Le,
744            Token::Gt => BinOp::Gt,
745            Token::Ge => BinOp::Ge,
746            Token::DoublePipe => BinOp::Concat,
747            Token::Plus => BinOp::Add,
748            Token::Dash => BinOp::Sub,
749            Token::Star => BinOp::Mul,
750            Token::Slash => BinOp::Div,
751            Token::Percent => BinOp::Mod,
752            _ => return None,
753        };
754        Some((op, op.precedence()))
755    }
756
757    /// Return the start position of an expression's span. Handles the
758    /// synthetic case by falling back to the current parser cursor,
759    /// which is good enough for the Pratt climb since the caller just
760    /// parsed the atom.
761    fn span_start_of(&self, expr: &Expr) -> crate::storage::query::lexer::Position {
762        let s = expr.span();
763        if s.is_synthetic() {
764            self.position()
765        } else {
766            s.start
767        }
768    }
769
770    /// Return the end position of an expression's span — same
771    /// synthetic fallback as `span_start_of`.
772    fn span_end_of(&self, expr: &Expr) -> crate::storage::query::lexer::Position {
773        let s = expr.span();
774        if s.is_synthetic() {
775            self.position()
776        } else {
777            s.end
778        }
779    }
780}
781
782// Avoid `unused` lints in partial-migration builds where the analyzer
783// still does not consume every expression shape directly.
784#[allow(dead_code)]
785fn _expr_module_used(_: Expr) {}
786
787#[cfg(test)]
788mod tests {
789    use super::*;
790    use crate::storage::query::ast::FieldRef;
791
792    fn parse(input: &str) -> Expr {
793        let mut parser = Parser::new(input).expect("lexer init");
794        let expr = parser.parse_expr().expect("parse_expr");
795        expr
796    }
797
798    #[test]
799    fn literal_integer() {
800        let e = parse("42");
801        match e {
802            Expr::Literal {
803                value: Value::Integer(42),
804                ..
805            } => {}
806            other => panic!("expected Integer(42), got {other:?}"),
807        }
808    }
809
810    #[test]
811    fn literal_float() {
812        let e = parse("3.14");
813        match e {
814            Expr::Literal {
815                value: Value::Float(f),
816                ..
817            } => assert!((f - 3.14).abs() < 1e-9),
818            other => panic!("expected float literal, got {other:?}"),
819        }
820    }
821
822    #[test]
823    fn literal_string() {
824        let e = parse("'hello'");
825        match e {
826            Expr::Literal {
827                value: Value::Text(ref s),
828                ..
829            } if s.as_ref() == "hello" => {}
830            other => panic!("expected Text(hello), got {other:?}"),
831        }
832    }
833
834    #[test]
835    fn literal_booleans_and_null() {
836        assert!(matches!(
837            parse("TRUE"),
838            Expr::Literal {
839                value: Value::Boolean(true),
840                ..
841            }
842        ));
843        assert!(matches!(
844            parse("FALSE"),
845            Expr::Literal {
846                value: Value::Boolean(false),
847                ..
848            }
849        ));
850        assert!(matches!(
851            parse("NULL"),
852            Expr::Literal {
853                value: Value::Null,
854                ..
855            }
856        ));
857    }
858
859    #[test]
860    fn bare_column() {
861        let e = parse("user_id");
862        match e {
863            Expr::Column {
864                field: FieldRef::TableColumn { column, .. },
865                ..
866            } => {
867                assert_eq!(column, "user_id");
868            }
869            other => panic!("expected column, got {other:?}"),
870        }
871    }
872
873    #[test]
874    fn arithmetic_precedence_mul_over_add() {
875        // a + b * c  →  Add(a, Mul(b, c))
876        let e = parse("a + b * c");
877        let Expr::BinaryOp {
878            op: BinOp::Add,
879            rhs,
880            ..
881        } = e
882        else {
883            panic!("root must be Add");
884        };
885        let Expr::BinaryOp { op: BinOp::Mul, .. } = *rhs else {
886            panic!("rhs must be Mul");
887        };
888    }
889
890    #[test]
891    fn arithmetic_left_associativity() {
892        // a - b - c  →  Sub(Sub(a, b), c)
893        let e = parse("a - b - c");
894        let Expr::BinaryOp {
895            op: BinOp::Sub,
896            lhs,
897            ..
898        } = e
899        else {
900            panic!("root must be Sub");
901        };
902        let Expr::BinaryOp { op: BinOp::Sub, .. } = *lhs else {
903            panic!("lhs must be Sub (left-assoc)");
904        };
905    }
906
907    #[test]
908    fn parenthesised_override() {
909        // (a + b) * c  →  Mul(Add(a, b), c)
910        let e = parse("(a + b) * c");
911        let Expr::BinaryOp {
912            op: BinOp::Mul,
913            lhs,
914            ..
915        } = e
916        else {
917            panic!("root must be Mul");
918        };
919        let Expr::BinaryOp { op: BinOp::Add, .. } = *lhs else {
920            panic!("lhs must be Add");
921        };
922    }
923
924    #[test]
925    fn comparison_binds_weaker_than_arith() {
926        // a + 1 = b - 2
927        //   →  Eq(Add(a, 1), Sub(b, 2))
928        let e = parse("a + 1 = b - 2");
929        let Expr::BinaryOp {
930            op: BinOp::Eq,
931            lhs,
932            rhs,
933            ..
934        } = e
935        else {
936            panic!("root must be Eq");
937        };
938        assert!(matches!(*lhs, Expr::BinaryOp { op: BinOp::Add, .. }));
939        assert!(matches!(*rhs, Expr::BinaryOp { op: BinOp::Sub, .. }));
940    }
941
942    #[test]
943    fn and_binds_tighter_than_or() {
944        // a OR b AND c  →  Or(a, And(b, c))
945        let e = parse("a OR b AND c");
946        let Expr::BinaryOp {
947            op: BinOp::Or, rhs, ..
948        } = e
949        else {
950            panic!("root must be Or");
951        };
952        assert!(matches!(*rhs, Expr::BinaryOp { op: BinOp::And, .. }));
953    }
954
955    #[test]
956    fn unary_negation() {
957        let e = parse("-a");
958        let Expr::UnaryOp {
959            op: UnaryOp::Neg, ..
960        } = e
961        else {
962            panic!("expected unary Neg");
963        };
964    }
965
966    #[test]
967    fn unary_not() {
968        let e = parse("NOT a");
969        let Expr::UnaryOp {
970            op: UnaryOp::Not, ..
971        } = e
972        else {
973            panic!("expected unary Not");
974        };
975    }
976
977    #[test]
978    fn concat_operator() {
979        let e = parse("'hello' || name");
980        let Expr::BinaryOp {
981            op: BinOp::Concat, ..
982        } = e
983        else {
984            panic!("expected Concat");
985        };
986    }
987
988    #[test]
989    fn cast_expr() {
990        let e = parse("CAST(age AS TEXT)");
991        let Expr::Cast { target, .. } = e else {
992            panic!("expected Cast");
993        };
994        assert_eq!(target, DataType::Text);
995    }
996
997    #[test]
998    fn case_expr() {
999        let e = parse("CASE WHEN a = 1 THEN 'one' WHEN a = 2 THEN 'two' ELSE 'other' END");
1000        let Expr::Case {
1001            branches, else_, ..
1002        } = e
1003        else {
1004            panic!("expected Case");
1005        };
1006        assert_eq!(branches.len(), 2);
1007        assert!(else_.is_some());
1008    }
1009
1010    #[test]
1011    fn is_null_postfix() {
1012        let e = parse("name IS NULL");
1013        assert!(matches!(e, Expr::IsNull { negated: false, .. }));
1014    }
1015
1016    #[test]
1017    fn is_not_null_postfix() {
1018        let e = parse("name IS NOT NULL");
1019        assert!(matches!(e, Expr::IsNull { negated: true, .. }));
1020    }
1021
1022    #[test]
1023    fn between_with_columns() {
1024        let e = parse("temp BETWEEN min_t AND max_t");
1025        let Expr::Between {
1026            target,
1027            low,
1028            high,
1029            negated,
1030            ..
1031        } = e
1032        else {
1033            panic!("expected Between");
1034        };
1035        assert!(!negated);
1036        assert!(matches!(*target, Expr::Column { .. }));
1037        assert!(matches!(*low, Expr::Column { .. }));
1038        assert!(matches!(*high, Expr::Column { .. }));
1039    }
1040
1041    #[test]
1042    fn not_between_negates() {
1043        let e = parse("temp NOT BETWEEN 0 AND 100");
1044        let Expr::Between { negated: true, .. } = e else {
1045            panic!("expected negated Between");
1046        };
1047    }
1048
1049    #[test]
1050    fn in_list_literal() {
1051        let e = parse("status IN (1, 2, 3)");
1052        let Expr::InList {
1053            values, negated, ..
1054        } = e
1055        else {
1056            panic!("expected InList");
1057        };
1058        assert!(!negated);
1059        assert_eq!(values.len(), 3);
1060    }
1061
1062    #[test]
1063    fn not_in_list() {
1064        let e = parse("status NOT IN (1, 2)");
1065        let Expr::InList { negated: true, .. } = e else {
1066            panic!("expected negated InList");
1067        };
1068    }
1069
1070    #[test]
1071    fn function_call_with_args() {
1072        let e = parse("UPPER(name)");
1073        let Expr::FunctionCall { name, args, .. } = e else {
1074            panic!("expected FunctionCall");
1075        };
1076        assert_eq!(name, "UPPER");
1077        assert_eq!(args.len(), 1);
1078    }
1079
1080    #[test]
1081    fn nested_function_call() {
1082        let e = parse("COALESCE(a, UPPER(b))");
1083        let Expr::FunctionCall { name, args, .. } = e else {
1084            panic!("expected FunctionCall");
1085        };
1086        assert_eq!(name, "COALESCE");
1087        assert_eq!(args.len(), 2);
1088        assert!(matches!(&args[1], Expr::FunctionCall { .. }));
1089    }
1090
1091    #[test]
1092    fn duration_literal_parses_as_text() {
1093        let e = parse("time_bucket(5m)");
1094        let Expr::FunctionCall { name, args, .. } = e else {
1095            panic!("expected FunctionCall, got {e:?}");
1096        };
1097        assert_eq!(name.to_uppercase(), "TIME_BUCKET");
1098        assert_eq!(args.len(), 1);
1099        assert!(
1100            matches!(&args[0], Expr::Literal { value: Value::Text(s), .. } if s.as_ref() == "5m"),
1101            "expected Text(\"5m\"), got {:?}",
1102            args[0]
1103        );
1104    }
1105
1106    #[test]
1107    fn span_tracks_token_range() {
1108        // A literal's span must cover the exact tokens consumed.
1109        let mut parser = Parser::new("123 + 456").expect("lexer");
1110        let e = parser.parse_expr().expect("parse_expr");
1111        let span = e.span();
1112        assert!(!span.is_synthetic(), "root span must be real");
1113        assert!(span.start.offset < span.end.offset);
1114    }
1115}