Skip to main content

reddb_server/storage/query/parser/
expr.rs

1//! Pratt-style parser for the Fase 2 `Expr` AST.
2//!
3//! This module is the Week 2 deliverable of the parser v2 refactor
4//! tracked in `/home/cyber/.claude/plans/squishy-mixing-honey.md`.
5//! It produces `ast::Expr` trees with proper operator precedence,
6//! `Span` tracking from the lexer, and support for the full set of
7//! unary / binary / postfix operators the existing hand-rolled
8//! projection climb covers in Fase 1.3 — plus the missing pieces
9//! (CASE, CAST, parenthesised subexprs, IS NULL, IN, BETWEEN).
10//!
11//! # Design notes
12//!
13//! The parser is now the canonical entry point for SQL expression
14//! parsing in the table-query flow:
15//! - `SELECT` projections parse through `Parser::parse_expr`
16//! - `WHERE` / `HAVING` operands parse through `Parser::parse_expr`
17//! - `ORDER BY` expressions parse through `Parser::parse_expr`
18//!
19//! Some legacy AST slots are still adapter-based (`Projection`,
20//! `Filter`, `GROUP BY` strings), so statement parsing still lowers
21//! `Expr` trees into those older shapes at the boundary.
22//!
23//! # Precedence table (matches PG gram.y modulo features we don't have)
24//!
25//! ```text
26//! prec  operators
27//! ----  ----------------------------------
28//!  10   OR
29//!  20   AND
30//!  25   NOT                      (prefix)
31//!  30   = <> < <= > >=           (comparison)
32//!  32   IS NULL / IS NOT NULL    (postfix)
33//!  33   BETWEEN … AND …          (postfix)
34//!  34   IN (…)                   (postfix)
35//!  40   ||                       (string concat)
36//!  50   + -                      (additive)
37//!  60   * / %                    (multiplicative)
38//!  70   -                        (unary negation)
39//!  80   ::type  CAST(…AS type)   (explicit type coercion)
40//! ```
41//!
42//! Higher precedence binds tighter. The climb uses the classic
43//! "min-precedence" algorithm — `parse_expr_prec(min)` loops consuming
44//! any infix operator whose precedence is ≥ `min`, recursing with
45//! `prec + 1` on the right-hand side for left-associativity.
46
47use super::super::ast::{BinOp, Expr, FieldRef, Span, UnaryOp};
48use super::super::lexer::Token;
49use super::error::ParseError;
50use super::Parser;
51use super::PlaceholderMode;
52use crate::storage::schema::{DataType, Value};
53
54fn is_duration_unit(unit: &str) -> bool {
55    matches!(
56        unit.to_ascii_lowercase().as_str(),
57        "ms" | "msec"
58            | "millisecond"
59            | "milliseconds"
60            | "s"
61            | "sec"
62            | "secs"
63            | "second"
64            | "seconds"
65            | "m"
66            | "min"
67            | "mins"
68            | "minute"
69            | "minutes"
70            | "h"
71            | "hr"
72            | "hrs"
73            | "hour"
74            | "hours"
75            | "d"
76            | "day"
77            | "days"
78    )
79}
80
81fn keyword_function_name(token: &Token) -> Option<&'static str> {
82    match token {
83        Token::Count => Some("COUNT"),
84        Token::Sum => Some("SUM"),
85        Token::Avg => Some("AVG"),
86        Token::Min => Some("MIN"),
87        Token::Max => Some("MAX"),
88        Token::First => Some("FIRST"),
89        Token::Last => Some("LAST"),
90        Token::Left => Some("LEFT"),
91        Token::Right => Some("RIGHT"),
92        Token::Kv => Some("KV"),
93        _ => None,
94    }
95}
96
97impl<'a> Parser<'a> {
98    /// Parse a complete expression at the lowest precedence level.
99    /// Entry point for every caller that wants an `Expr` tree.
100    pub fn parse_expr(&mut self) -> Result<Expr, ParseError> {
101        self.parse_expr_prec(0)
102    }
103
104    pub(crate) fn parse_expr_with_min_precedence(
105        &mut self,
106        min_prec: u8,
107    ) -> Result<Expr, ParseError> {
108        self.parse_expr_prec(min_prec)
109    }
110
111    /// Continue parsing an expression after the caller has already
112    /// materialized the left-hand side atom.
113    pub(crate) fn continue_expr(&mut self, left: Expr, min_prec: u8) -> Result<Expr, ParseError> {
114        self.parse_expr_suffix(left, min_prec)
115    }
116
117    /// Pratt climb: parse a unary atom then consume any infix operators
118    /// whose precedence meets or exceeds `min_prec`.
119    fn parse_expr_prec(&mut self, min_prec: u8) -> Result<Expr, ParseError> {
120        // Depth guard: every recursive descent point in the expr
121        // grammar bottoms out here, so checking once is enough to
122        // catch deeply nested literals like `((((((1))))))` and
123        // boolean chains like `NOT NOT NOT NOT … x`.
124        self.enter_depth()?;
125        let result = (|| {
126            let left = self.parse_expr_unary()?;
127            self.parse_expr_suffix(left, min_prec)
128        })();
129        self.exit_depth();
130        result
131    }
132
133    fn parse_expr_suffix(&mut self, mut left: Expr, min_prec: u8) -> Result<Expr, ParseError> {
134        loop {
135            let Some((op, prec)) = self.peek_binop() else {
136                // Not a standard infix op — check for postfix forms.
137                if min_prec <= 32 {
138                    if let Some(node) = self.try_parse_postfix(&left)? {
139                        left = node;
140                        continue;
141                    }
142                }
143                break;
144            };
145            if prec < min_prec {
146                break;
147            }
148            self.advance()?; // consume the operator token
149            let start_span = self.span_start_of(&left);
150            let rhs = self.parse_expr_prec(prec + 1)?;
151            let end_span = self.span_end_of(&rhs);
152            left = Expr::BinaryOp {
153                op,
154                lhs: Box::new(left),
155                rhs: Box::new(rhs),
156                span: Span::new(start_span, end_span),
157            };
158        }
159        Ok(left)
160    }
161
162    /// Parse a unary-prefix expression or drop through to the atomic
163    /// factor. Handles `NOT`, unary `-`, and `+` (no-op sign).
164    fn parse_expr_unary(&mut self) -> Result<Expr, ParseError> {
165        match self.peek() {
166            Token::Not => {
167                let start = self.position();
168                self.advance()?;
169                let operand = self.parse_expr_prec(25)?;
170                let end = self.span_end_of(&operand);
171                Ok(Expr::UnaryOp {
172                    op: UnaryOp::Not,
173                    operand: Box::new(operand),
174                    span: Span::new(start, end),
175                })
176            }
177            Token::Dash => {
178                let start = self.position();
179                self.advance()?;
180                let operand = self.parse_expr_prec(70)?;
181                let end = self.span_end_of(&operand);
182                Ok(Expr::UnaryOp {
183                    op: UnaryOp::Neg,
184                    operand: Box::new(operand),
185                    span: Span::new(start, end),
186                })
187            }
188            Token::Plus => {
189                // Unary plus is a no-op. Consume and recurse.
190                self.advance()?;
191                self.parse_expr_prec(70)
192            }
193            _ => self.parse_expr_factor(),
194        }
195    }
196
197    /// Parse a single atomic expression factor: literal, column ref,
198    /// parenthesised subexpression, CAST, CASE, or function call.
199    fn parse_expr_factor(&mut self) -> Result<Expr, ParseError> {
200        let start = self.position();
201
202        // Parenthesised subexpression: `( expr )`
203        if self.consume(&Token::LParen)? {
204            let inner = self.parse_expr_prec(0)?;
205            self.expect(Token::RParen)?;
206            return Ok(inner);
207        }
208
209        // Literal: true / false / null
210        if self.consume(&Token::True)? {
211            return Ok(Expr::Literal {
212                value: Value::Boolean(true),
213                span: Span::new(start, self.position()),
214            });
215        }
216        if self.consume(&Token::False)? {
217            return Ok(Expr::Literal {
218                value: Value::Boolean(false),
219                span: Span::new(start, self.position()),
220            });
221        }
222        if self.consume(&Token::Null)? {
223            return Ok(Expr::Literal {
224                value: Value::Null,
225                span: Span::new(start, self.position()),
226            });
227        }
228
229        // Numeric literals — with optional duration-unit suffix (e.g. `5m`, `10s`, `2h`).
230        // Duration literals are emitted as Value::Text so downstream code sees "5m" verbatim
231        // (matching the legacy Projection::Column("LIT:5m") path used by time_bucket).
232        if let Token::Integer(n) = *self.peek() {
233            self.advance()?;
234            if let Token::Ident(ref unit) = *self.peek() {
235                if is_duration_unit(unit) {
236                    let duration = format!("{n}{}", unit.to_ascii_lowercase());
237                    self.advance()?;
238                    return Ok(Expr::Literal {
239                        value: Value::text(duration),
240                        span: Span::new(start, self.position()),
241                    });
242                }
243            }
244            return Ok(Expr::Literal {
245                value: Value::Integer(n),
246                span: Span::new(start, self.position()),
247            });
248        }
249        if let Token::Float(n) = *self.peek() {
250            self.advance()?;
251            return Ok(Expr::Literal {
252                value: Value::Float(n),
253                span: Span::new(start, self.position()),
254            });
255        }
256        if let Token::String(ref s) = *self.peek() {
257            let text = s.clone();
258            self.advance()?;
259            return Ok(Expr::Literal {
260                value: Value::text(text),
261                span: Span::new(start, self.position()),
262            });
263        }
264
265        // JSON object `{…}` and array `[…]` literals — delegate to the DML literal parser
266        // which already handles the full JSON value grammar including nested objects.
267        // `JsonLiteral` is the strict-JSON variant emitted by the lexer's sub-mode
268        // when `{` is followed by `"`; both shapes route through `parse_literal_value`.
269        if matches!(
270            self.peek(),
271            Token::LBrace | Token::LBracket | Token::JsonLiteral(_)
272        ) {
273            let value = self
274                .parse_literal_value()
275                .map_err(|e| ParseError::new(e.message, self.position()))?;
276            return Ok(Expr::Literal {
277                value,
278                span: Span::new(start, self.position()),
279            });
280        }
281
282        // `?` positional placeholder — auto-numbered left-to-right
283        // starting at 1. Mixing with `$N` in one statement is rejected.
284        if self.consume(&Token::Question)? {
285            match self.placeholder_mode {
286                PlaceholderMode::Dollar => {
287                    return Err(ParseError::new(
288                        "cannot mix `?` and `$N` placeholders in one statement".to_string(),
289                        self.position(),
290                    ));
291                }
292                _ => self.placeholder_mode = PlaceholderMode::Question,
293            }
294            self.question_count += 1;
295            let index = self.question_count - 1;
296            return Ok(Expr::Parameter {
297                index,
298                span: Span::new(start, self.position()),
299            });
300        }
301
302        if self.consume(&Token::Dollar)? {
303            // `$N` positional parameter placeholder (1-based in source,
304            // 0-based in the AST so it matches `Vec<Value>` indexing).
305            // Rejected at parse time when N < 1; gaps and arity are
306            // validated by the binder once the full statement is parsed.
307            if let Token::Integer(n) = *self.peek() {
308                if n < 1 {
309                    return Err(ParseError::new(
310                        "placeholder index must be >= 1".to_string(),
311                        self.position(),
312                    ));
313                }
314                if self.placeholder_mode == PlaceholderMode::Question {
315                    return Err(ParseError::new(
316                        "cannot mix `?` and `$N` placeholders in one statement".to_string(),
317                        self.position(),
318                    ));
319                }
320                self.placeholder_mode = PlaceholderMode::Dollar;
321                self.advance()?;
322                return Ok(Expr::Parameter {
323                    index: (n - 1) as usize,
324                    span: Span::new(start, self.position()),
325                });
326            }
327            let path = self.parse_dollar_ref_path()?;
328            let path_lc = path.to_ascii_lowercase();
329            let (name, key) = if let Some(rest) = path_lc.strip_prefix("secret.") {
330                ("__SECRET_REF", format!("red.vault/{rest}"))
331            } else if path_lc.starts_with("red.secret.") {
332                let rest = path_lc.trim_start_matches("red.secret.");
333                ("__SECRET_REF", format!("red.vault/{rest}"))
334            } else if let Some(rest) = path_lc.strip_prefix("config.") {
335                ("CONFIG", format!("red.config/{rest}"))
336            } else if path_lc.starts_with("red.config.") {
337                let rest = path_lc.trim_start_matches("red.config.");
338                ("CONFIG", format!("red.config/{rest}"))
339            } else {
340                return Err(ParseError::new(
341                    format!(
342                        "unknown $ reference `${path}`; expected $secret.*, $red.secret.*, $config.*, or $red.config.*"
343                    ),
344                    self.position(),
345                ));
346            };
347            return Ok(Expr::FunctionCall {
348                name: name.to_string(),
349                args: vec![Expr::Literal {
350                    value: Value::text(key),
351                    span: Span::new(start, self.position()),
352                }],
353                span: Span::new(start, self.position()),
354            });
355        }
356
357        if let Some(name) = keyword_function_name(self.peek()) {
358            if matches!(self.peek_next()?, Token::LParen) {
359                self.advance()?; // consume the keyword token
360                return self.parse_function_call_expr_with_name(start, name.to_string());
361            }
362        }
363
364        // Identifier-led constructs: function call, CAST, CASE, column.
365        //
366        // We commit to consuming the identifier immediately and then
367        // inspect the NEXT token to decide shape. This avoids needing
368        // two-token lookahead on the parser. If the next token is `(`
369        // it's a function call; if `.` it's a qualified column ref;
370        // otherwise it's a bare column ref.
371        if let Token::Ident(ref name) = *self.peek() {
372            let name_upper = name.to_uppercase();
373
374            // CAST(expr AS type) — must test before consuming because
375            // CAST is not a reserved keyword; users could legitimately
376            // have a column literally named `cast`. Distinguish by
377            // looking at whether the identifier equals CAST AND is
378            // immediately followed by `(`. Since we can't two-step
379            // lookahead, handle CAST by parsing the ident, then if the
380            // uppercased name is CAST and the next token is `(`,
381            // switch to the CAST form; otherwise the saved name
382            // becomes the first segment of a column ref.
383            if name_upper == "CASE" {
384                return self.parse_case_expr(start);
385            }
386
387            let saved_name = name.clone();
388            self.advance()?; // consume the identifier unconditionally
389
390            // Function call / CAST: IDENT (
391            if matches!(self.peek(), Token::LParen) {
392                return self.parse_function_call_expr_with_name(start, saved_name);
393            }
394
395            // Qualified column: IDENT.IDENT[.IDENT …]
396            if matches!(self.peek(), Token::Dot) {
397                let mut segments = vec![saved_name];
398                while self.consume(&Token::Dot)? {
399                    segments.push(self.expect_ident_or_keyword()?);
400                }
401                let field = FieldRef::TableColumn {
402                    table: segments.remove(0),
403                    column: segments.join("."),
404                };
405                let end = self.position();
406                return Ok(Expr::Column {
407                    field,
408                    span: Span::new(start, end),
409                });
410            }
411
412            // Bare column reference with empty table name.
413            let field = FieldRef::TableColumn {
414                table: String::new(),
415                column: saved_name,
416            };
417            let end = self.position();
418            return Ok(Expr::Column {
419                field,
420                span: Span::new(start, end),
421            });
422        }
423
424        // Default: column reference (optionally qualified: table.column).
425        // Reached only when the leading token is not an Ident. Falls
426        // through to parse_field_ref which handles keyword-shaped
427        // column names.
428        let field = self.parse_field_ref()?;
429        let end = self.position();
430        Ok(Expr::Column {
431            field,
432            span: Span::new(start, end),
433        })
434    }
435
436    fn parse_dollar_ref_path(&mut self) -> Result<String, ParseError> {
437        let mut path = self.expect_ident_or_keyword()?;
438        while self.consume(&Token::Dot)? {
439            let next = self.expect_ident_or_keyword()?;
440            path = format!("{path}.{next}");
441        }
442        Ok(path)
443    }
444
445    fn parse_function_call_expr_with_name(
446        &mut self,
447        start: crate::storage::query::lexer::Position,
448        function_name: String,
449    ) -> Result<Expr, ParseError> {
450        self.expect(Token::LParen)?;
451
452        if function_name.eq_ignore_ascii_case("CAST") {
453            let inner = self.parse_expr_prec(0)?;
454            self.expect(Token::As)?;
455            let type_name = self.expect_ident_or_keyword()?;
456            self.expect(Token::RParen)?;
457            let end = self.position();
458            let Some(target) = DataType::from_sql_name(&type_name) else {
459                return Err(ParseError::new(
460                    // F-05: `type_name` is caller-controlled identifier text.
461                    // Render via `{:?}` so embedded CR/LF/NUL/quotes are
462                    // escaped before reaching downstream serialization sinks.
463                    format!("unknown type name {type_name:?} in CAST"),
464                    self.position(),
465                ));
466            };
467            return Ok(Expr::Cast {
468                inner: Box::new(inner),
469                target,
470                span: Span::new(start, end),
471            });
472        }
473
474        if function_name.eq_ignore_ascii_case("TRIM") {
475            let (name, args) = self.parse_trim_expr_args()?;
476            self.expect(Token::RParen)?;
477            let end = self.position();
478            return Ok(Expr::FunctionCall {
479                name,
480                args,
481                span: Span::new(start, end),
482            });
483        }
484
485        if function_name.eq_ignore_ascii_case("POSITION") {
486            let args = self.parse_position_expr_args()?;
487            self.expect(Token::RParen)?;
488            let end = self.position();
489            return Ok(Expr::FunctionCall {
490                name: function_name,
491                args,
492                span: Span::new(start, end),
493            });
494        }
495
496        if function_name.eq_ignore_ascii_case("SUBSTRING") {
497            let args = self.parse_substring_expr_args()?;
498            self.expect(Token::RParen)?;
499            let end = self.position();
500            return Ok(Expr::FunctionCall {
501                name: function_name,
502                args,
503                span: Span::new(start, end),
504            });
505        }
506
507        if function_name.eq_ignore_ascii_case("COUNT") {
508            if self.consume(&Token::Distinct)? {
509                let arg = self.parse_expr_prec(0)?;
510                self.expect(Token::RParen)?;
511                let end = self.position();
512                return Ok(Expr::FunctionCall {
513                    name: "COUNT_DISTINCT".to_string(),
514                    args: vec![arg],
515                    span: Span::new(start, end),
516                });
517            }
518
519            if self.consume(&Token::Star)? {
520                self.expect(Token::RParen)?;
521                let end = self.position();
522                return Ok(Expr::FunctionCall {
523                    name: function_name,
524                    args: vec![Expr::Column {
525                        field: FieldRef::TableColumn {
526                            table: String::new(),
527                            column: "*".to_string(),
528                        },
529                        span: Span::synthetic(),
530                    }],
531                    span: Span::new(start, end),
532                });
533            }
534        }
535
536        let mut args = Vec::new();
537        if !self.check(&Token::RParen) {
538            loop {
539                args.push(self.parse_expr_prec(0)?);
540                if !self.consume(&Token::Comma)? {
541                    break;
542                }
543            }
544        }
545        self.expect(Token::RParen)?;
546        let end = self.position();
547        Ok(Expr::FunctionCall {
548            name: function_name,
549            args,
550            span: Span::new(start, end),
551        })
552    }
553
554    /// Parse `CASE WHEN cond THEN val [WHEN …] [ELSE val] END`.
555    /// Assumes the caller has already peeked `CASE`.
556    fn parse_case_expr(
557        &mut self,
558        start: crate::storage::query::lexer::Position,
559    ) -> Result<Expr, ParseError> {
560        self.advance()?; // consume CASE
561        let mut branches: Vec<(Expr, Expr)> = Vec::new();
562        loop {
563            if !self.consume_ident_ci("WHEN")? {
564                break;
565            }
566            let cond = self.parse_expr_prec(0)?;
567            if !self.consume_ident_ci("THEN")? {
568                return Err(ParseError::new(
569                    "expected THEN after CASE WHEN condition".to_string(),
570                    self.position(),
571                ));
572            }
573            let then_val = self.parse_expr_prec(0)?;
574            branches.push((cond, then_val));
575        }
576        if branches.is_empty() {
577            return Err(ParseError::new(
578                "CASE must have at least one WHEN branch".to_string(),
579                self.position(),
580            ));
581        }
582        let else_ = if self.consume_ident_ci("ELSE")? {
583            Some(Box::new(self.parse_expr_prec(0)?))
584        } else {
585            None
586        };
587        if !self.consume_ident_ci("END")? {
588            return Err(ParseError::new(
589                "expected END to close CASE expression".to_string(),
590                self.position(),
591            ));
592        }
593        let end = self.position();
594        Ok(Expr::Case {
595            branches,
596            else_,
597            span: Span::new(start, end),
598        })
599    }
600
601    fn parse_trim_expr_args(&mut self) -> Result<(String, Vec<Expr>), ParseError> {
602        let mut function_name = "TRIM".to_string();
603
604        if self.consume_ident_ci("LEADING")? {
605            function_name = "LTRIM".to_string();
606        } else if self.consume_ident_ci("TRAILING")? {
607            function_name = "RTRIM".to_string();
608        } else if self.consume_ident_ci("BOTH")? {
609            function_name = "TRIM".to_string();
610        }
611
612        if self.consume(&Token::From)? {
613            let source = self.parse_expr_prec(0)?;
614            return Ok((function_name, vec![source]));
615        }
616
617        let first = self.parse_expr_prec(0)?;
618
619        if self.consume(&Token::Comma)? {
620            let second = self.parse_expr_prec(0)?;
621            return Ok((function_name, vec![first, second]));
622        }
623
624        if self.consume(&Token::From)? {
625            let source = self.parse_expr_prec(0)?;
626            return Ok((function_name, vec![source, first]));
627        }
628
629        Ok((function_name, vec![first]))
630    }
631
632    /// PostgreSQL-style `POSITION(substr IN string)` or plain
633    /// `POSITION(substr, string)` lowered to the ordinary two-argument
634    /// function form.
635    fn parse_position_expr_args(&mut self) -> Result<Vec<Expr>, ParseError> {
636        // `IN` is also a postfix operator in the main expression grammar, so
637        // parse the first operand above postfix-IN precedence and then consume
638        // the function's `IN` keyword explicitly.
639        let needle = self.parse_expr_prec(35)?;
640        if !self.consume(&Token::Comma)? {
641            self.expect(Token::In)?;
642        }
643        let haystack = self.parse_expr_prec(0)?;
644        Ok(vec![needle, haystack])
645    }
646
647    /// PostgreSQL-style `SUBSTRING` syntax:
648    /// - `SUBSTRING(expr FROM start [FOR count])`
649    /// - `SUBSTRING(expr FOR count [FROM start])`
650    /// - plain function-call form `SUBSTRING(expr, start[, count])`
651    ///
652    /// The SQL-syntax variants are desugared to the comma-arg form so the
653    /// rest of the stack sees the same `Expr::FunctionCall` shape.
654    fn parse_substring_expr_args(&mut self) -> Result<Vec<Expr>, ParseError> {
655        let source = self.parse_expr_prec(0)?;
656
657        if self.consume(&Token::Comma)? {
658            let mut args = vec![source];
659            loop {
660                args.push(self.parse_expr_prec(0)?);
661                if !self.consume(&Token::Comma)? {
662                    break;
663                }
664            }
665            return Ok(args);
666        }
667
668        if self.consume(&Token::From)? {
669            let start = self.parse_expr_prec(0)?;
670            if self.consume(&Token::For)? {
671                let count = self.parse_expr_prec(0)?;
672                return Ok(vec![source, start, count]);
673            }
674            return Ok(vec![source, start]);
675        }
676
677        if self.consume(&Token::For)? {
678            let count = self.parse_expr_prec(0)?;
679            if self.consume(&Token::From)? {
680                let start = self.parse_expr_prec(0)?;
681                return Ok(vec![source, start, count]);
682            }
683            return Ok(vec![source, Expr::lit(Value::Integer(1)), count]);
684        }
685
686        Ok(vec![source])
687    }
688
689    /// Try to consume a postfix operator on top of the already-parsed
690    /// `left` expression: `IS [NOT] NULL`, `[NOT] BETWEEN … AND …`,
691    /// `[NOT] IN (…)`. Returns `Ok(None)` if no postfix follows.
692    ///
693    /// NOT at this position is unambiguous — prefix `NOT` is always
694    /// consumed at `parse_expr_unary` level before reaching postfix.
695    /// So seeing `NOT` here means the user wrote `x NOT BETWEEN …`
696    /// or `x NOT IN …`; we consume it eagerly and require BETWEEN
697    /// or IN to follow.
698    fn try_parse_postfix(&mut self, left: &Expr) -> Result<Option<Expr>, ParseError> {
699        let start = self.span_start_of(left);
700
701        // IS [NOT] NULL
702        if self.consume(&Token::Is)? {
703            let negated = self.consume(&Token::Not)?;
704            self.expect(Token::Null)?;
705            let end = self.position();
706            return Ok(Some(Expr::IsNull {
707                operand: Box::new(left.clone()),
708                negated,
709                span: Span::new(start, end),
710            }));
711        }
712
713        // Detect NOT BETWEEN / NOT IN. NOT is consumed eagerly — we
714        // don't have two-token lookahead and the grammar guarantees
715        // no other valid postfix starts with NOT.
716        let negated = if matches!(self.peek(), Token::Not) {
717            self.advance()?;
718            if !matches!(self.peek(), Token::Between | Token::In) {
719                return Err(ParseError::new(
720                    "expected BETWEEN or IN after postfix NOT".to_string(),
721                    self.position(),
722                ));
723            }
724            true
725        } else {
726            false
727        };
728
729        // BETWEEN low AND high
730        if self.consume(&Token::Between)? {
731            let low = self.parse_expr_prec(34)?;
732            self.expect(Token::And)?;
733            let high = self.parse_expr_prec(34)?;
734            let end = self.position();
735            return Ok(Some(Expr::Between {
736                target: Box::new(left.clone()),
737                low: Box::new(low),
738                high: Box::new(high),
739                negated,
740                span: Span::new(start, end),
741            }));
742        }
743
744        // IN (v1, v2, …)
745        if self.consume(&Token::In)? {
746            self.expect(Token::LParen)?;
747            let mut values = Vec::new();
748            if !self.check(&Token::RParen) {
749                loop {
750                    values.push(self.parse_expr_prec(0)?);
751                    if !self.consume(&Token::Comma)? {
752                        break;
753                    }
754                }
755            }
756            self.expect(Token::RParen)?;
757            let end = self.position();
758            return Ok(Some(Expr::InList {
759                target: Box::new(left.clone()),
760                values,
761                negated,
762                span: Span::new(start, end),
763            }));
764        }
765
766        if negated {
767            // Unreachable because the early-return above already
768            // validated NOT is followed by BETWEEN or IN. Guarded
769            // to keep callers loud if the grammar grows later.
770            return Err(ParseError::new(
771                "internal: NOT consumed without BETWEEN/IN follow".to_string(),
772                self.position(),
773            ));
774        }
775        Ok(None)
776    }
777
778    /// Peek the current token and translate it into a `BinOp` plus
779    /// its precedence. Returns `None` if the token is not a recognised
780    /// infix operator — the caller then tries postfix handling.
781    fn peek_binop(&self) -> Option<(BinOp, u8)> {
782        let op = match self.peek() {
783            Token::Or => BinOp::Or,
784            Token::And => BinOp::And,
785            Token::Eq => BinOp::Eq,
786            Token::Ne => BinOp::Ne,
787            Token::Lt => BinOp::Lt,
788            Token::Le => BinOp::Le,
789            Token::Gt => BinOp::Gt,
790            Token::Ge => BinOp::Ge,
791            Token::DoublePipe => BinOp::Concat,
792            Token::Plus => BinOp::Add,
793            Token::Dash => BinOp::Sub,
794            Token::Star => BinOp::Mul,
795            Token::Slash => BinOp::Div,
796            Token::Percent => BinOp::Mod,
797            _ => return None,
798        };
799        Some((op, op.precedence()))
800    }
801
802    /// Return the start position of an expression's span. Handles the
803    /// synthetic case by falling back to the current parser cursor,
804    /// which is good enough for the Pratt climb since the caller just
805    /// parsed the atom.
806    fn span_start_of(&self, expr: &Expr) -> crate::storage::query::lexer::Position {
807        let s = expr.span();
808        if s.is_synthetic() {
809            self.position()
810        } else {
811            s.start
812        }
813    }
814
815    /// Return the end position of an expression's span — same
816    /// synthetic fallback as `span_start_of`.
817    fn span_end_of(&self, expr: &Expr) -> crate::storage::query::lexer::Position {
818        let s = expr.span();
819        if s.is_synthetic() {
820            self.position()
821        } else {
822            s.end
823        }
824    }
825}
826
827// Avoid `unused` lints in partial-migration builds where the analyzer
828// still does not consume every expression shape directly.
829#[allow(dead_code)]
830fn _expr_module_used(_: Expr) {}
831
832#[cfg(test)]
833mod tests {
834    use super::*;
835    use crate::storage::query::ast::FieldRef;
836
837    fn parse(input: &str) -> Expr {
838        let mut parser = Parser::new(input).expect("lexer init");
839        let expr = parser.parse_expr().expect("parse_expr");
840        expr
841    }
842
843    #[test]
844    fn literal_integer() {
845        let e = parse("42");
846        match e {
847            Expr::Literal {
848                value: Value::Integer(42),
849                ..
850            } => {}
851            other => panic!("expected Integer(42), got {other:?}"),
852        }
853    }
854
855    #[test]
856    fn literal_float() {
857        let e = parse("3.14");
858        match e {
859            Expr::Literal {
860                value: Value::Float(f),
861                ..
862            } => assert!((f - 3.14).abs() < 1e-9),
863            other => panic!("expected float literal, got {other:?}"),
864        }
865    }
866
867    #[test]
868    fn literal_string() {
869        let e = parse("'hello'");
870        match e {
871            Expr::Literal {
872                value: Value::Text(ref s),
873                ..
874            } if s.as_ref() == "hello" => {}
875            other => panic!("expected Text(hello), got {other:?}"),
876        }
877    }
878
879    #[test]
880    fn literal_booleans_and_null() {
881        assert!(matches!(
882            parse("TRUE"),
883            Expr::Literal {
884                value: Value::Boolean(true),
885                ..
886            }
887        ));
888        assert!(matches!(
889            parse("FALSE"),
890            Expr::Literal {
891                value: Value::Boolean(false),
892                ..
893            }
894        ));
895        assert!(matches!(
896            parse("NULL"),
897            Expr::Literal {
898                value: Value::Null,
899                ..
900            }
901        ));
902    }
903
904    #[test]
905    fn bare_column() {
906        let e = parse("user_id");
907        match e {
908            Expr::Column {
909                field: FieldRef::TableColumn { column, .. },
910                ..
911            } => {
912                assert_eq!(column, "user_id");
913            }
914            other => panic!("expected column, got {other:?}"),
915        }
916    }
917
918    #[test]
919    fn arithmetic_precedence_mul_over_add() {
920        // a + b * c  →  Add(a, Mul(b, c))
921        let e = parse("a + b * c");
922        let Expr::BinaryOp {
923            op: BinOp::Add,
924            rhs,
925            ..
926        } = e
927        else {
928            panic!("root must be Add");
929        };
930        let Expr::BinaryOp { op: BinOp::Mul, .. } = *rhs else {
931            panic!("rhs must be Mul");
932        };
933    }
934
935    #[test]
936    fn arithmetic_left_associativity() {
937        // a - b - c  →  Sub(Sub(a, b), c)
938        let e = parse("a - b - c");
939        let Expr::BinaryOp {
940            op: BinOp::Sub,
941            lhs,
942            ..
943        } = e
944        else {
945            panic!("root must be Sub");
946        };
947        let Expr::BinaryOp { op: BinOp::Sub, .. } = *lhs else {
948            panic!("lhs must be Sub (left-assoc)");
949        };
950    }
951
952    #[test]
953    fn parenthesised_override() {
954        // (a + b) * c  →  Mul(Add(a, b), c)
955        let e = parse("(a + b) * c");
956        let Expr::BinaryOp {
957            op: BinOp::Mul,
958            lhs,
959            ..
960        } = e
961        else {
962            panic!("root must be Mul");
963        };
964        let Expr::BinaryOp { op: BinOp::Add, .. } = *lhs else {
965            panic!("lhs must be Add");
966        };
967    }
968
969    #[test]
970    fn comparison_binds_weaker_than_arith() {
971        // a + 1 = b - 2
972        //   →  Eq(Add(a, 1), Sub(b, 2))
973        let e = parse("a + 1 = b - 2");
974        let Expr::BinaryOp {
975            op: BinOp::Eq,
976            lhs,
977            rhs,
978            ..
979        } = e
980        else {
981            panic!("root must be Eq");
982        };
983        assert!(matches!(*lhs, Expr::BinaryOp { op: BinOp::Add, .. }));
984        assert!(matches!(*rhs, Expr::BinaryOp { op: BinOp::Sub, .. }));
985    }
986
987    #[test]
988    fn and_binds_tighter_than_or() {
989        // a OR b AND c  →  Or(a, And(b, c))
990        let e = parse("a OR b AND c");
991        let Expr::BinaryOp {
992            op: BinOp::Or, rhs, ..
993        } = e
994        else {
995            panic!("root must be Or");
996        };
997        assert!(matches!(*rhs, Expr::BinaryOp { op: BinOp::And, .. }));
998    }
999
1000    #[test]
1001    fn unary_negation() {
1002        let e = parse("-a");
1003        let Expr::UnaryOp {
1004            op: UnaryOp::Neg, ..
1005        } = e
1006        else {
1007            panic!("expected unary Neg");
1008        };
1009    }
1010
1011    #[test]
1012    fn unary_not() {
1013        let e = parse("NOT a");
1014        let Expr::UnaryOp {
1015            op: UnaryOp::Not, ..
1016        } = e
1017        else {
1018            panic!("expected unary Not");
1019        };
1020    }
1021
1022    #[test]
1023    fn concat_operator() {
1024        let e = parse("'hello' || name");
1025        let Expr::BinaryOp {
1026            op: BinOp::Concat, ..
1027        } = e
1028        else {
1029            panic!("expected Concat");
1030        };
1031    }
1032
1033    #[test]
1034    fn cast_expr() {
1035        let e = parse("CAST(age AS TEXT)");
1036        let Expr::Cast { target, .. } = e else {
1037            panic!("expected Cast");
1038        };
1039        assert_eq!(target, DataType::Text);
1040    }
1041
1042    #[test]
1043    fn case_expr() {
1044        let e = parse("CASE WHEN a = 1 THEN 'one' WHEN a = 2 THEN 'two' ELSE 'other' END");
1045        let Expr::Case {
1046            branches, else_, ..
1047        } = e
1048        else {
1049            panic!("expected Case");
1050        };
1051        assert_eq!(branches.len(), 2);
1052        assert!(else_.is_some());
1053    }
1054
1055    #[test]
1056    fn is_null_postfix() {
1057        let e = parse("name IS NULL");
1058        assert!(matches!(e, Expr::IsNull { negated: false, .. }));
1059    }
1060
1061    #[test]
1062    fn is_not_null_postfix() {
1063        let e = parse("name IS NOT NULL");
1064        assert!(matches!(e, Expr::IsNull { negated: true, .. }));
1065    }
1066
1067    #[test]
1068    fn between_with_columns() {
1069        let e = parse("temp BETWEEN min_t AND max_t");
1070        let Expr::Between {
1071            target,
1072            low,
1073            high,
1074            negated,
1075            ..
1076        } = e
1077        else {
1078            panic!("expected Between");
1079        };
1080        assert!(!negated);
1081        assert!(matches!(*target, Expr::Column { .. }));
1082        assert!(matches!(*low, Expr::Column { .. }));
1083        assert!(matches!(*high, Expr::Column { .. }));
1084    }
1085
1086    #[test]
1087    fn not_between_negates() {
1088        let e = parse("temp NOT BETWEEN 0 AND 100");
1089        let Expr::Between { negated: true, .. } = e else {
1090            panic!("expected negated Between");
1091        };
1092    }
1093
1094    #[test]
1095    fn in_list_literal() {
1096        let e = parse("status IN (1, 2, 3)");
1097        let Expr::InList {
1098            values, negated, ..
1099        } = e
1100        else {
1101            panic!("expected InList");
1102        };
1103        assert!(!negated);
1104        assert_eq!(values.len(), 3);
1105    }
1106
1107    #[test]
1108    fn not_in_list() {
1109        let e = parse("status NOT IN (1, 2)");
1110        let Expr::InList { negated: true, .. } = e else {
1111            panic!("expected negated InList");
1112        };
1113    }
1114
1115    #[test]
1116    fn function_call_with_args() {
1117        let e = parse("UPPER(name)");
1118        let Expr::FunctionCall { name, args, .. } = e else {
1119            panic!("expected FunctionCall");
1120        };
1121        assert_eq!(name, "UPPER");
1122        assert_eq!(args.len(), 1);
1123    }
1124
1125    #[test]
1126    fn nested_function_call() {
1127        let e = parse("COALESCE(a, UPPER(b))");
1128        let Expr::FunctionCall { name, args, .. } = e else {
1129            panic!("expected FunctionCall");
1130        };
1131        assert_eq!(name, "COALESCE");
1132        assert_eq!(args.len(), 2);
1133        assert!(matches!(&args[1], Expr::FunctionCall { .. }));
1134    }
1135
1136    #[test]
1137    fn duration_literal_parses_as_text() {
1138        let e = parse("time_bucket(5m)");
1139        let Expr::FunctionCall { name, args, .. } = e else {
1140            panic!("expected FunctionCall, got {e:?}");
1141        };
1142        assert_eq!(name.to_uppercase(), "TIME_BUCKET");
1143        assert_eq!(args.len(), 1);
1144        assert!(
1145            matches!(&args[0], Expr::Literal { value: Value::Text(s), .. } if s.as_ref() == "5m"),
1146            "expected Text(\"5m\"), got {:?}",
1147            args[0]
1148        );
1149    }
1150
1151    #[test]
1152    fn placeholder_dollar_one() {
1153        let e = parse("$1");
1154        match e {
1155            Expr::Parameter { index: 0, .. } => {}
1156            other => panic!("expected Parameter(0), got {other:?}"),
1157        }
1158    }
1159
1160    #[test]
1161    fn placeholder_dollar_n() {
1162        let e = parse("$7");
1163        match e {
1164            Expr::Parameter { index: 6, .. } => {}
1165            other => panic!("expected Parameter(6), got {other:?}"),
1166        }
1167    }
1168
1169    #[test]
1170    fn placeholder_in_string_literal_is_text() {
1171        // `$1` inside a string literal must NOT parse as a placeholder.
1172        let e = parse("'$1'");
1173        match e {
1174            Expr::Literal {
1175                value: Value::Text(s),
1176                ..
1177            } if s.as_ref() == "$1" => {}
1178            other => panic!("expected text literal '$1', got {other:?}"),
1179        }
1180    }
1181
1182    #[test]
1183    fn placeholder_in_comparison() {
1184        // SELECT-WHERE shape: `id = $1`
1185        let e = parse("id = $1");
1186        let Expr::BinaryOp {
1187            op: BinOp::Eq, rhs, ..
1188        } = e
1189        else {
1190            panic!("root must be Eq");
1191        };
1192        assert!(matches!(*rhs, Expr::Parameter { index: 0, .. }));
1193    }
1194
1195    #[test]
1196    fn placeholder_zero_rejected() {
1197        let mut parser = Parser::new("$0").expect("lexer");
1198        let err = parser.parse_expr().unwrap_err();
1199        assert!(err.to_string().contains("placeholder"));
1200    }
1201
1202    #[test]
1203    fn placeholder_question_single() {
1204        // Lone `?` numbered as parameter 1 (index 0).
1205        let e = parse("?");
1206        match e {
1207            Expr::Parameter { index: 0, .. } => {}
1208            other => panic!("expected Parameter(0), got {other:?}"),
1209        }
1210    }
1211
1212    #[test]
1213    fn placeholder_question_left_to_right() {
1214        // `id = ? AND name = ?` → params 0 and 1
1215        let e = parse("id = ? AND name = ?");
1216        let Expr::BinaryOp {
1217            op: BinOp::And,
1218            lhs,
1219            rhs,
1220            ..
1221        } = e
1222        else {
1223            panic!("root must be And");
1224        };
1225        let Expr::BinaryOp {
1226            op: BinOp::Eq,
1227            rhs: r1,
1228            ..
1229        } = *lhs
1230        else {
1231            panic!("lhs must be Eq");
1232        };
1233        assert!(matches!(*r1, Expr::Parameter { index: 0, .. }));
1234        let Expr::BinaryOp {
1235            op: BinOp::Eq,
1236            rhs: r2,
1237            ..
1238        } = *rhs
1239        else {
1240            panic!("rhs must be Eq");
1241        };
1242        assert!(matches!(*r2, Expr::Parameter { index: 1, .. }));
1243    }
1244
1245    #[test]
1246    fn placeholder_question_in_string_literal_is_text() {
1247        let e = parse("'?'");
1248        match e {
1249            Expr::Literal {
1250                value: Value::Text(s),
1251                ..
1252            } if s.as_ref() == "?" => {}
1253            other => panic!("expected text literal '?', got {other:?}"),
1254        }
1255    }
1256
1257    #[test]
1258    fn placeholder_mixing_question_then_dollar_rejected() {
1259        let mut parser = Parser::new("id = ? AND x = $2").expect("lexer");
1260        let err = parser.parse_expr().err().expect("should fail");
1261        assert!(
1262            err.to_string().contains("mix"),
1263            "expected mixing error, got: {err}"
1264        );
1265    }
1266
1267    #[test]
1268    fn placeholder_mixing_dollar_then_question_rejected() {
1269        let mut parser = Parser::new("id = $1 AND x = ?").expect("lexer");
1270        let err = parser.parse_expr().err().expect("should fail");
1271        assert!(
1272            err.to_string().contains("mix"),
1273            "expected mixing error, got: {err}"
1274        );
1275    }
1276
1277    #[test]
1278    fn placeholder_question_in_comment_ignored() {
1279        // `?` inside an SQL line comment must not bump the counter.
1280        // The expression after the comment is the only param.
1281        let mut parser = Parser::new("-- ? ignored\n  ?").expect("lexer");
1282        let e = parser.parse_expr().expect("parse_expr");
1283        match e {
1284            Expr::Parameter { index: 0, .. } => {}
1285            other => panic!("expected Parameter(0), got {other:?}"),
1286        }
1287    }
1288
1289    #[test]
1290    fn span_tracks_token_range() {
1291        // A literal's span must cover the exact tokens consumed.
1292        let mut parser = Parser::new("123 + 456").expect("lexer");
1293        let e = parser.parse_expr().expect("parse_expr");
1294        let span = e.span();
1295        assert!(!span.is_synthetic(), "root span must be real");
1296        assert!(span.start.offset < span.end.offset);
1297    }
1298}