Skip to main content

reddb_server/storage/query/parser/
expr.rs

1//! Pratt-style parser for the Fase 2 `Expr` AST.
2//!
3//! This module is the Week 2 deliverable of the parser v2 refactor
4//! tracked in `/home/cyber/.claude/plans/squishy-mixing-honey.md`.
5//! It produces `ast::Expr` trees with proper operator precedence,
6//! `Span` tracking from the lexer, and support for the full set of
7//! unary / binary / postfix operators the existing hand-rolled
8//! projection climb covers in Fase 1.3 — plus the missing pieces
9//! (CASE, CAST, parenthesised subexprs, IS NULL, IN, BETWEEN).
10//!
11//! # Design notes
12//!
13//! The parser is now the canonical entry point for SQL expression
14//! parsing in the table-query flow:
15//! - `SELECT` projections parse through `Parser::parse_expr`
16//! - `WHERE` / `HAVING` operands parse through `Parser::parse_expr`
17//! - `ORDER BY` expressions parse through `Parser::parse_expr`
18//!
19//! Some legacy AST slots are still adapter-based (`Projection`,
20//! `Filter`, `GROUP BY` strings), so statement parsing still lowers
21//! `Expr` trees into those older shapes at the boundary.
22//!
23//! # Precedence table (matches PG gram.y modulo features we don't have)
24//!
25//! ```text
26//! prec  operators
27//! ----  ----------------------------------
28//!  10   OR
29//!  20   AND
30//!  25   NOT                      (prefix)
31//!  30   = <> < <= > >=           (comparison)
32//!  32   IS NULL / IS NOT NULL    (postfix)
33//!  33   BETWEEN … AND …          (postfix)
34//!  34   IN (…)                   (postfix)
35//!  40   ||                       (string concat)
36//!  50   + -                      (additive)
37//!  60   * / %                    (multiplicative)
38//!  70   -                        (unary negation)
39//!  80   ::type  CAST(…AS type)   (explicit type coercion)
40//! ```
41//!
42//! Higher precedence binds tighter. The climb uses the classic
43//! "min-precedence" algorithm — `parse_expr_prec(min)` loops consuming
44//! any infix operator whose precedence is ≥ `min`, recursing with
45//! `prec + 1` on the right-hand side for left-associativity.
46
47use super::super::ast::{BinOp, Expr, ExprSubquery, FieldRef, Span, UnaryOp};
48use super::super::lexer::Token;
49use super::error::ParseError;
50use super::Parser;
51use super::PlaceholderMode;
52use crate::storage::schema::{DataType, Value};
53
54fn is_duration_unit(unit: &str) -> bool {
55    matches!(
56        unit.to_ascii_lowercase().as_str(),
57        "ms" | "msec"
58            | "millisecond"
59            | "milliseconds"
60            | "s"
61            | "sec"
62            | "secs"
63            | "second"
64            | "seconds"
65            | "m"
66            | "min"
67            | "mins"
68            | "minute"
69            | "minutes"
70            | "h"
71            | "hr"
72            | "hrs"
73            | "hour"
74            | "hours"
75            | "d"
76            | "day"
77            | "days"
78    )
79}
80
81fn keyword_function_name(token: &Token) -> Option<&'static str> {
82    match token {
83        Token::Count => Some("COUNT"),
84        Token::Sum => Some("SUM"),
85        Token::Avg => Some("AVG"),
86        Token::Min => Some("MIN"),
87        Token::Max => Some("MAX"),
88        Token::First => Some("FIRST"),
89        Token::Last => Some("LAST"),
90        Token::Left => Some("LEFT"),
91        Token::Right => Some("RIGHT"),
92        Token::Contains => Some("CONTAINS"),
93        Token::Kv => Some("KV"),
94        _ => None,
95    }
96}
97
98fn bare_zero_arg_function_name(name: &str) -> Option<&'static str> {
99    match name.to_ascii_uppercase().as_str() {
100        "CURRENT_TIMESTAMP" => Some("CURRENT_TIMESTAMP"),
101        "CURRENT_DATE" => Some("CURRENT_DATE"),
102        "CURRENT_TIME" => Some("CURRENT_TIME"),
103        _ => None,
104    }
105}
106
107impl<'a> Parser<'a> {
108    /// Parse a complete expression at the lowest precedence level.
109    /// Entry point for every caller that wants an `Expr` tree.
110    pub fn parse_expr(&mut self) -> Result<Expr, ParseError> {
111        self.parse_expr_prec(0)
112    }
113
114    pub(crate) fn parse_expr_with_min_precedence(
115        &mut self,
116        min_prec: u8,
117    ) -> Result<Expr, ParseError> {
118        self.parse_expr_prec(min_prec)
119    }
120
121    /// Continue parsing an expression after the caller has already
122    /// materialized the left-hand side atom.
123    pub(crate) fn continue_expr(&mut self, left: Expr, min_prec: u8) -> Result<Expr, ParseError> {
124        self.parse_expr_suffix(left, min_prec)
125    }
126
127    /// Pratt climb: parse a unary atom then consume any infix operators
128    /// whose precedence meets or exceeds `min_prec`.
129    fn parse_expr_prec(&mut self, min_prec: u8) -> Result<Expr, ParseError> {
130        // Depth guard: every recursive descent point in the expr
131        // grammar bottoms out here, so checking once is enough to
132        // catch deeply nested literals like `((((((1))))))` and
133        // boolean chains like `NOT NOT NOT NOT … x`.
134        self.enter_depth()?;
135        let result = (|| {
136            let left = self.parse_expr_unary()?;
137            self.parse_expr_suffix(left, min_prec)
138        })();
139        self.exit_depth();
140        result
141    }
142
143    fn parse_expr_suffix(&mut self, mut left: Expr, min_prec: u8) -> Result<Expr, ParseError> {
144        loop {
145            let Some((op, prec)) = self.peek_binop() else {
146                // Not a standard infix op — check for postfix forms.
147                if min_prec <= 32 {
148                    if let Some(node) = self.try_parse_postfix(&left)? {
149                        left = node;
150                        continue;
151                    }
152                }
153                break;
154            };
155            if prec < min_prec {
156                break;
157            }
158            self.advance()?; // consume the operator token
159            let start_span = self.span_start_of(&left);
160            let rhs = self.parse_expr_prec(prec + 1)?;
161            let end_span = self.span_end_of(&rhs);
162            left = Expr::BinaryOp {
163                op,
164                lhs: Box::new(left),
165                rhs: Box::new(rhs),
166                span: Span::new(start_span, end_span),
167            };
168        }
169        Ok(left)
170    }
171
172    /// Parse a unary-prefix expression or drop through to the atomic
173    /// factor. Handles `NOT`, unary `-`, and `+` (no-op sign).
174    fn parse_expr_unary(&mut self) -> Result<Expr, ParseError> {
175        match self.peek() {
176            Token::Not => {
177                let start = self.position();
178                self.advance()?;
179                let operand = self.parse_expr_prec(25)?;
180                let end = self.span_end_of(&operand);
181                Ok(Expr::UnaryOp {
182                    op: UnaryOp::Not,
183                    operand: Box::new(operand),
184                    span: Span::new(start, end),
185                })
186            }
187            Token::Dash => {
188                let start = self.position();
189                self.advance()?;
190                let operand = self.parse_expr_prec(70)?;
191                let end = self.span_end_of(&operand);
192                Ok(Expr::UnaryOp {
193                    op: UnaryOp::Neg,
194                    operand: Box::new(operand),
195                    span: Span::new(start, end),
196                })
197            }
198            Token::Plus => {
199                // Unary plus is a no-op. Consume and recurse.
200                self.advance()?;
201                self.parse_expr_prec(70)
202            }
203            _ => self.parse_expr_factor(),
204        }
205    }
206
207    /// Parse a single atomic expression factor: literal, column ref,
208    /// parenthesised subexpression, CAST, CASE, or function call.
209    fn parse_expr_factor(&mut self) -> Result<Expr, ParseError> {
210        let start = self.position();
211
212        // Parenthesised subexpression: `( expr )`
213        if self.consume(&Token::LParen)? {
214            if self.check(&Token::Select) {
215                let query = self.parse_select_query()?;
216                self.expect(Token::RParen)?;
217                return Ok(Expr::Subquery {
218                    query: ExprSubquery {
219                        query: Box::new(query),
220                    },
221                    span: Span::new(start, self.position()),
222                });
223            }
224            let inner = self.parse_expr_prec(0)?;
225            self.expect(Token::RParen)?;
226            return Ok(inner);
227        }
228
229        // Literal: true / false / null
230        if self.consume(&Token::True)? {
231            return Ok(Expr::Literal {
232                value: Value::Boolean(true),
233                span: Span::new(start, self.position()),
234            });
235        }
236        if self.consume(&Token::False)? {
237            return Ok(Expr::Literal {
238                value: Value::Boolean(false),
239                span: Span::new(start, self.position()),
240            });
241        }
242        if self.consume(&Token::Null)? {
243            return Ok(Expr::Literal {
244                value: Value::Null,
245                span: Span::new(start, self.position()),
246            });
247        }
248
249        // Numeric literals — with optional duration-unit suffix (e.g. `5m`, `10s`, `2h`).
250        // Duration literals are emitted as Value::Text so downstream code sees "5m" verbatim
251        // (matching the legacy Projection::Column("LIT:5m") path used by time_bucket).
252        if let Token::Integer(n) = *self.peek() {
253            self.advance()?;
254            if let Token::Ident(ref unit) = *self.peek() {
255                if is_duration_unit(unit) {
256                    let duration = format!("{n}{}", unit.to_ascii_lowercase());
257                    self.advance()?;
258                    return Ok(Expr::Literal {
259                        value: Value::text(duration),
260                        span: Span::new(start, self.position()),
261                    });
262                }
263            }
264            return Ok(Expr::Literal {
265                value: Value::Integer(n),
266                span: Span::new(start, self.position()),
267            });
268        }
269        if let Token::Float(n) = *self.peek() {
270            self.advance()?;
271            return Ok(Expr::Literal {
272                value: Value::Float(n),
273                span: Span::new(start, self.position()),
274            });
275        }
276        if let Token::String(ref s) = *self.peek() {
277            let text = s.clone();
278            self.advance()?;
279            return Ok(Expr::Literal {
280                value: Value::text(text),
281                span: Span::new(start, self.position()),
282            });
283        }
284
285        // JSON object `{…}` and array `[…]` literals — delegate to the DML literal parser
286        // which already handles the full JSON value grammar including nested objects.
287        // `JsonLiteral` is the strict-JSON variant emitted by the lexer's sub-mode
288        // when `{` is followed by `"`; both shapes route through `parse_literal_value`.
289        if matches!(
290            self.peek(),
291            Token::LBrace | Token::LBracket | Token::JsonLiteral(_)
292        ) {
293            let value = self
294                .parse_literal_value()
295                .map_err(|e| ParseError::new(e.message, self.position()))?;
296            return Ok(Expr::Literal {
297                value,
298                span: Span::new(start, self.position()),
299            });
300        }
301
302        // `?` positional placeholder — auto-numbered left-to-right.
303        // Immediate `?N` uses an explicit 1-based index. Mixing with
304        // `$N` in one statement is rejected.
305        if self.check(&Token::Question) {
306            let (index, span) = self.parse_question_param_index()?;
307            return Ok(Expr::Parameter { index, span });
308        }
309
310        if self.consume(&Token::Dollar)? {
311            // `$N` positional parameter placeholder (1-based in source,
312            // 0-based in the AST so it matches `Vec<Value>` indexing).
313            // Rejected at parse time when N < 1; gaps and arity are
314            // validated by the binder once the full statement is parsed.
315            if let Token::Integer(n) = *self.peek() {
316                if n < 1 {
317                    return Err(ParseError::new(
318                        "placeholder index must be >= 1".to_string(),
319                        self.position(),
320                    ));
321                }
322                if self.placeholder_mode == PlaceholderMode::Question {
323                    return Err(ParseError::new(
324                        "cannot mix `?` and `$N` placeholders in one statement".to_string(),
325                        self.position(),
326                    ));
327                }
328                self.placeholder_mode = PlaceholderMode::Dollar;
329                self.advance()?;
330                return Ok(Expr::Parameter {
331                    index: (n - 1) as usize,
332                    span: Span::new(start, self.position()),
333                });
334            }
335            let path = self.parse_dollar_ref_path()?;
336            let path_lc = path.to_ascii_lowercase();
337            let (name, key) = if let Some(rest) = path_lc.strip_prefix("secret.") {
338                ("__SECRET_REF", format!("red.vault/{rest}"))
339            } else if path_lc.starts_with("red.secret.") {
340                let rest = path_lc.trim_start_matches("red.secret.");
341                ("__SECRET_REF", format!("red.vault/{rest}"))
342            } else if let Some(rest) = path_lc.strip_prefix("config.") {
343                ("CONFIG", format!("red.config/{rest}"))
344            } else if path_lc.starts_with("red.config.") {
345                let rest = path_lc.trim_start_matches("red.config.");
346                ("CONFIG", format!("red.config/{rest}"))
347            } else {
348                return Err(ParseError::new(
349                    format!(
350                        "unknown $ reference `${path}`; expected $secret.*, $red.secret.*, $config.*, or $red.config.*"
351                    ),
352                    self.position(),
353                ));
354            };
355            return Ok(Expr::FunctionCall {
356                name: name.to_string(),
357                args: vec![Expr::Literal {
358                    value: Value::text(key),
359                    span: Span::new(start, self.position()),
360                }],
361                span: Span::new(start, self.position()),
362            });
363        }
364
365        if let Some(name) = keyword_function_name(self.peek()) {
366            if matches!(self.peek_next()?, Token::LParen) {
367                self.advance()?; // consume the keyword token
368                return self.parse_function_call_expr_with_name(start, name.to_string());
369            }
370        }
371
372        // Identifier-led constructs: function call, CAST, CASE, column.
373        //
374        // We commit to consuming the identifier immediately and then
375        // inspect the NEXT token to decide shape. This avoids needing
376        // two-token lookahead on the parser. If the next token is `(`
377        // it's a function call; if `.` it's a qualified column ref;
378        // otherwise it's a bare column ref.
379        if let Token::Ident(ref name) = *self.peek() {
380            let name_upper = name.to_uppercase();
381
382            // CAST(expr AS type) — must test before consuming because
383            // CAST is not a reserved keyword; users could legitimately
384            // have a column literally named `cast`. Distinguish by
385            // looking at whether the identifier equals CAST AND is
386            // immediately followed by `(`. Since we can't two-step
387            // lookahead, handle CAST by parsing the ident, then if the
388            // uppercased name is CAST and the next token is `(`,
389            // switch to the CAST form; otherwise the saved name
390            // becomes the first segment of a column ref.
391            if name_upper == "CASE" {
392                return self.parse_case_expr(start);
393            }
394
395            let saved_name = name.clone();
396            self.advance()?; // consume the identifier unconditionally
397
398            // Function call / CAST: IDENT (
399            if matches!(self.peek(), Token::LParen) {
400                return self.parse_function_call_expr_with_name(start, saved_name);
401            }
402
403            if let Some(function_name) = bare_zero_arg_function_name(&saved_name) {
404                let end = self.position();
405                return Ok(Expr::FunctionCall {
406                    name: function_name.to_string(),
407                    args: Vec::new(),
408                    span: Span::new(start, end),
409                });
410            }
411
412            // Qualified column: IDENT.IDENT[.IDENT …]
413            if matches!(self.peek(), Token::Dot) {
414                let mut segments = vec![saved_name];
415                while self.consume(&Token::Dot)? {
416                    segments.push(self.expect_ident_or_keyword()?);
417                }
418                let field = FieldRef::TableColumn {
419                    table: segments.remove(0),
420                    column: segments.join("."),
421                };
422                let end = self.position();
423                return Ok(Expr::Column {
424                    field,
425                    span: Span::new(start, end),
426                });
427            }
428
429            // Bare column reference with empty table name.
430            let field = FieldRef::TableColumn {
431                table: String::new(),
432                column: saved_name,
433            };
434            let end = self.position();
435            return Ok(Expr::Column {
436                field,
437                span: Span::new(start, end),
438            });
439        }
440
441        // Default: column reference (optionally qualified: table.column).
442        // Reached only when the leading token is not an Ident. Falls
443        // through to parse_field_ref which handles keyword-shaped
444        // column names.
445        let field = self.parse_field_ref()?;
446        let end = self.position();
447        Ok(Expr::Column {
448            field,
449            span: Span::new(start, end),
450        })
451    }
452
453    fn parse_dollar_ref_path(&mut self) -> Result<String, ParseError> {
454        let mut path = self.expect_ident_or_keyword()?;
455        while self.consume(&Token::Dot)? {
456            let next = self.expect_ident_or_keyword()?;
457            path = format!("{path}.{next}");
458        }
459        Ok(path)
460    }
461
462    fn parse_function_call_expr_with_name(
463        &mut self,
464        start: crate::storage::query::lexer::Position,
465        function_name: String,
466    ) -> Result<Expr, ParseError> {
467        self.expect(Token::LParen)?;
468
469        if function_name.eq_ignore_ascii_case("CAST") {
470            let inner = self.parse_expr_prec(0)?;
471            self.expect(Token::As)?;
472            let type_name = self.expect_ident_or_keyword()?;
473            self.expect(Token::RParen)?;
474            let end = self.position();
475            let Some(target) = DataType::from_sql_name(&type_name) else {
476                return Err(ParseError::new(
477                    // F-05: `type_name` is caller-controlled identifier text.
478                    // Render via `{:?}` so embedded CR/LF/NUL/quotes are
479                    // escaped before reaching downstream serialization sinks.
480                    format!("unknown type name {type_name:?} in CAST"),
481                    self.position(),
482                ));
483            };
484            return Ok(Expr::Cast {
485                inner: Box::new(inner),
486                target,
487                span: Span::new(start, end),
488            });
489        }
490
491        if function_name.eq_ignore_ascii_case("TRIM") {
492            let (name, args) = self.parse_trim_expr_args()?;
493            self.expect(Token::RParen)?;
494            let end = self.position();
495            return Ok(Expr::FunctionCall {
496                name,
497                args,
498                span: Span::new(start, end),
499            });
500        }
501
502        if function_name.eq_ignore_ascii_case("POSITION") {
503            let args = self.parse_position_expr_args()?;
504            self.expect(Token::RParen)?;
505            let end = self.position();
506            return Ok(Expr::FunctionCall {
507                name: function_name,
508                args,
509                span: Span::new(start, end),
510            });
511        }
512
513        if function_name.eq_ignore_ascii_case("SUBSTRING") {
514            let args = self.parse_substring_expr_args()?;
515            self.expect(Token::RParen)?;
516            let end = self.position();
517            return Ok(Expr::FunctionCall {
518                name: function_name,
519                args,
520                span: Span::new(start, end),
521            });
522        }
523
524        if function_name.eq_ignore_ascii_case("COUNT") {
525            if self.consume(&Token::Distinct)? {
526                let arg = self.parse_expr_prec(0)?;
527                self.expect(Token::RParen)?;
528                let end = self.position();
529                return Ok(Expr::FunctionCall {
530                    name: "COUNT_DISTINCT".to_string(),
531                    args: vec![arg],
532                    span: Span::new(start, end),
533                });
534            }
535
536            if self.consume(&Token::Star)? {
537                self.expect(Token::RParen)?;
538                let end = self.position();
539                return Ok(Expr::FunctionCall {
540                    name: function_name,
541                    args: vec![Expr::Column {
542                        field: FieldRef::TableColumn {
543                            table: String::new(),
544                            column: "*".to_string(),
545                        },
546                        span: Span::synthetic(),
547                    }],
548                    span: Span::new(start, end),
549                });
550            }
551        }
552
553        let mut args = Vec::new();
554        if !self.check(&Token::RParen) {
555            loop {
556                args.push(self.parse_expr_prec(0)?);
557                if !self.consume(&Token::Comma)? {
558                    break;
559                }
560            }
561        }
562        self.expect(Token::RParen)?;
563        let end = self.position();
564        Ok(Expr::FunctionCall {
565            name: function_name,
566            args,
567            span: Span::new(start, end),
568        })
569    }
570
571    /// Parse `CASE WHEN cond THEN val [WHEN …] [ELSE val] END`.
572    /// Assumes the caller has already peeked `CASE`.
573    fn parse_case_expr(
574        &mut self,
575        start: crate::storage::query::lexer::Position,
576    ) -> Result<Expr, ParseError> {
577        self.advance()?; // consume CASE
578        let mut branches: Vec<(Expr, Expr)> = Vec::new();
579        loop {
580            if !self.consume_ident_ci("WHEN")? {
581                break;
582            }
583            let cond = self.parse_expr_prec(0)?;
584            if !self.consume_ident_ci("THEN")? {
585                return Err(ParseError::new(
586                    "expected THEN after CASE WHEN condition".to_string(),
587                    self.position(),
588                ));
589            }
590            let then_val = self.parse_expr_prec(0)?;
591            branches.push((cond, then_val));
592        }
593        if branches.is_empty() {
594            return Err(ParseError::new(
595                "CASE must have at least one WHEN branch".to_string(),
596                self.position(),
597            ));
598        }
599        let else_ = if self.consume_ident_ci("ELSE")? {
600            Some(Box::new(self.parse_expr_prec(0)?))
601        } else {
602            None
603        };
604        if !self.consume_ident_ci("END")? {
605            return Err(ParseError::new(
606                "expected END to close CASE expression".to_string(),
607                self.position(),
608            ));
609        }
610        let end = self.position();
611        Ok(Expr::Case {
612            branches,
613            else_,
614            span: Span::new(start, end),
615        })
616    }
617
618    fn parse_trim_expr_args(&mut self) -> Result<(String, Vec<Expr>), ParseError> {
619        let mut function_name = "TRIM".to_string();
620
621        if self.consume_ident_ci("LEADING")? {
622            function_name = "LTRIM".to_string();
623        } else if self.consume_ident_ci("TRAILING")? {
624            function_name = "RTRIM".to_string();
625        } else if self.consume_ident_ci("BOTH")? {
626            function_name = "TRIM".to_string();
627        }
628
629        if self.consume(&Token::From)? {
630            let source = self.parse_expr_prec(0)?;
631            return Ok((function_name, vec![source]));
632        }
633
634        let first = self.parse_expr_prec(0)?;
635
636        if self.consume(&Token::Comma)? {
637            let second = self.parse_expr_prec(0)?;
638            return Ok((function_name, vec![first, second]));
639        }
640
641        if self.consume(&Token::From)? {
642            let source = self.parse_expr_prec(0)?;
643            return Ok((function_name, vec![source, first]));
644        }
645
646        Ok((function_name, vec![first]))
647    }
648
649    /// PostgreSQL-style `POSITION(substr IN string)` or plain
650    /// `POSITION(substr, string)` lowered to the ordinary two-argument
651    /// function form.
652    fn parse_position_expr_args(&mut self) -> Result<Vec<Expr>, ParseError> {
653        // `IN` is also a postfix operator in the main expression grammar, so
654        // parse the first operand above postfix-IN precedence and then consume
655        // the function's `IN` keyword explicitly.
656        let needle = self.parse_expr_prec(35)?;
657        if !self.consume(&Token::Comma)? {
658            self.expect(Token::In)?;
659        }
660        let haystack = self.parse_expr_prec(0)?;
661        Ok(vec![needle, haystack])
662    }
663
664    /// PostgreSQL-style `SUBSTRING` syntax:
665    /// - `SUBSTRING(expr FROM start [FOR count])`
666    /// - `SUBSTRING(expr FOR count [FROM start])`
667    /// - plain function-call form `SUBSTRING(expr, start[, count])`
668    ///
669    /// The SQL-syntax variants are desugared to the comma-arg form so the
670    /// rest of the stack sees the same `Expr::FunctionCall` shape.
671    fn parse_substring_expr_args(&mut self) -> Result<Vec<Expr>, ParseError> {
672        let source = self.parse_expr_prec(0)?;
673
674        if self.consume(&Token::Comma)? {
675            let mut args = vec![source];
676            loop {
677                args.push(self.parse_expr_prec(0)?);
678                if !self.consume(&Token::Comma)? {
679                    break;
680                }
681            }
682            return Ok(args);
683        }
684
685        if self.consume(&Token::From)? {
686            let start = self.parse_expr_prec(0)?;
687            if self.consume(&Token::For)? {
688                let count = self.parse_expr_prec(0)?;
689                return Ok(vec![source, start, count]);
690            }
691            return Ok(vec![source, start]);
692        }
693
694        if self.consume(&Token::For)? {
695            let count = self.parse_expr_prec(0)?;
696            if self.consume(&Token::From)? {
697                let start = self.parse_expr_prec(0)?;
698                return Ok(vec![source, start, count]);
699            }
700            return Ok(vec![source, Expr::lit(Value::Integer(1)), count]);
701        }
702
703        Ok(vec![source])
704    }
705
706    /// Try to consume a postfix operator on top of the already-parsed
707    /// `left` expression: `IS [NOT] NULL`, `[NOT] BETWEEN … AND …`,
708    /// `[NOT] IN (…)`. Returns `Ok(None)` if no postfix follows.
709    ///
710    /// NOT at this position is unambiguous — prefix `NOT` is always
711    /// consumed at `parse_expr_unary` level before reaching postfix.
712    /// So seeing `NOT` here means the user wrote `x NOT BETWEEN …`
713    /// or `x NOT IN …`; we consume it eagerly and require BETWEEN
714    /// or IN to follow.
715    fn try_parse_postfix(&mut self, left: &Expr) -> Result<Option<Expr>, ParseError> {
716        let start = self.span_start_of(left);
717
718        // IS [NOT] NULL
719        if self.consume(&Token::Is)? {
720            let negated = self.consume(&Token::Not)?;
721            self.expect(Token::Null)?;
722            let end = self.position();
723            return Ok(Some(Expr::IsNull {
724                operand: Box::new(left.clone()),
725                negated,
726                span: Span::new(start, end),
727            }));
728        }
729
730        // Detect NOT BETWEEN / NOT IN. NOT is consumed eagerly — we
731        // don't have two-token lookahead and the grammar guarantees
732        // no other valid postfix starts with NOT.
733        let negated = if matches!(self.peek(), Token::Not) {
734            self.advance()?;
735            if !matches!(self.peek(), Token::Between | Token::In) {
736                return Err(ParseError::new(
737                    "expected BETWEEN or IN after postfix NOT".to_string(),
738                    self.position(),
739                ));
740            }
741            true
742        } else {
743            false
744        };
745
746        // BETWEEN low AND high
747        if self.consume(&Token::Between)? {
748            let low = self.parse_expr_prec(34)?;
749            self.expect(Token::And)?;
750            let high = self.parse_expr_prec(34)?;
751            let end = self.position();
752            return Ok(Some(Expr::Between {
753                target: Box::new(left.clone()),
754                low: Box::new(low),
755                high: Box::new(high),
756                negated,
757                span: Span::new(start, end),
758            }));
759        }
760
761        // IN (v1, v2, …)
762        if self.consume(&Token::In)? {
763            self.expect(Token::LParen)?;
764            let mut values = Vec::new();
765            if self.check(&Token::Select) {
766                let query = self.parse_select_query()?;
767                values.push(Expr::Subquery {
768                    query: ExprSubquery {
769                        query: Box::new(query),
770                    },
771                    span: Span::new(self.span_start_of(left), self.position()),
772                });
773            } else if !self.check(&Token::RParen) {
774                loop {
775                    values.push(self.parse_expr_prec(0)?);
776                    if !self.consume(&Token::Comma)? {
777                        break;
778                    }
779                }
780            }
781            self.expect(Token::RParen)?;
782            let end = self.position();
783            return Ok(Some(Expr::InList {
784                target: Box::new(left.clone()),
785                values,
786                negated,
787                span: Span::new(start, end),
788            }));
789        }
790
791        if negated {
792            // Unreachable because the early-return above already
793            // validated NOT is followed by BETWEEN or IN. Guarded
794            // to keep callers loud if the grammar grows later.
795            return Err(ParseError::new(
796                "internal: NOT consumed without BETWEEN/IN follow".to_string(),
797                self.position(),
798            ));
799        }
800        Ok(None)
801    }
802
803    /// Peek the current token and translate it into a `BinOp` plus
804    /// its precedence. Returns `None` if the token is not a recognised
805    /// infix operator — the caller then tries postfix handling.
806    fn peek_binop(&self) -> Option<(BinOp, u8)> {
807        let op = match self.peek() {
808            Token::Or => BinOp::Or,
809            Token::And => BinOp::And,
810            Token::Eq => BinOp::Eq,
811            Token::Ne => BinOp::Ne,
812            Token::Lt => BinOp::Lt,
813            Token::Le => BinOp::Le,
814            Token::Gt => BinOp::Gt,
815            Token::Ge => BinOp::Ge,
816            Token::DoublePipe => BinOp::Concat,
817            Token::Plus => BinOp::Add,
818            Token::Dash => BinOp::Sub,
819            Token::Star => BinOp::Mul,
820            Token::Slash => BinOp::Div,
821            Token::Percent => BinOp::Mod,
822            _ => return None,
823        };
824        Some((op, op.precedence()))
825    }
826
827    /// Return the start position of an expression's span. Handles the
828    /// synthetic case by falling back to the current parser cursor,
829    /// which is good enough for the Pratt climb since the caller just
830    /// parsed the atom.
831    fn span_start_of(&self, expr: &Expr) -> crate::storage::query::lexer::Position {
832        let s = expr.span();
833        if s.is_synthetic() {
834            self.position()
835        } else {
836            s.start
837        }
838    }
839
840    /// Return the end position of an expression's span — same
841    /// synthetic fallback as `span_start_of`.
842    fn span_end_of(&self, expr: &Expr) -> crate::storage::query::lexer::Position {
843        let s = expr.span();
844        if s.is_synthetic() {
845            self.position()
846        } else {
847            s.end
848        }
849    }
850}
851
852// Avoid `unused` lints in partial-migration builds where the analyzer
853// still does not consume every expression shape directly.
854#[allow(dead_code)]
855fn _expr_module_used(_: Expr) {}
856
857#[cfg(test)]
858mod tests {
859    use super::*;
860    use crate::storage::query::ast::FieldRef;
861
862    fn parse(input: &str) -> Expr {
863        let mut parser = Parser::new(input).expect("lexer init");
864        let expr = parser.parse_expr().expect("parse_expr");
865        expr
866    }
867
868    #[test]
869    fn literal_integer() {
870        let e = parse("42");
871        match e {
872            Expr::Literal {
873                value: Value::Integer(42),
874                ..
875            } => {}
876            other => panic!("expected Integer(42), got {other:?}"),
877        }
878    }
879
880    #[test]
881    fn literal_float() {
882        let e = parse("3.14");
883        match e {
884            Expr::Literal {
885                value: Value::Float(f),
886                ..
887            } => assert!((f - 3.14).abs() < 1e-9),
888            other => panic!("expected float literal, got {other:?}"),
889        }
890    }
891
892    #[test]
893    fn literal_string() {
894        let e = parse("'hello'");
895        match e {
896            Expr::Literal {
897                value: Value::Text(ref s),
898                ..
899            } if s.as_ref() == "hello" => {}
900            other => panic!("expected Text(hello), got {other:?}"),
901        }
902    }
903
904    #[test]
905    fn literal_booleans_and_null() {
906        assert!(matches!(
907            parse("TRUE"),
908            Expr::Literal {
909                value: Value::Boolean(true),
910                ..
911            }
912        ));
913        assert!(matches!(
914            parse("FALSE"),
915            Expr::Literal {
916                value: Value::Boolean(false),
917                ..
918            }
919        ));
920        assert!(matches!(
921            parse("NULL"),
922            Expr::Literal {
923                value: Value::Null,
924                ..
925            }
926        ));
927    }
928
929    #[test]
930    fn bare_column() {
931        let e = parse("user_id");
932        match e {
933            Expr::Column {
934                field: FieldRef::TableColumn { column, .. },
935                ..
936            } => {
937                assert_eq!(column, "user_id");
938            }
939            other => panic!("expected column, got {other:?}"),
940        }
941    }
942
943    #[test]
944    fn arithmetic_precedence_mul_over_add() {
945        // a + b * c  →  Add(a, Mul(b, c))
946        let e = parse("a + b * c");
947        let Expr::BinaryOp {
948            op: BinOp::Add,
949            rhs,
950            ..
951        } = e
952        else {
953            panic!("root must be Add");
954        };
955        let Expr::BinaryOp { op: BinOp::Mul, .. } = *rhs else {
956            panic!("rhs must be Mul");
957        };
958    }
959
960    #[test]
961    fn arithmetic_left_associativity() {
962        // a - b - c  →  Sub(Sub(a, b), c)
963        let e = parse("a - b - c");
964        let Expr::BinaryOp {
965            op: BinOp::Sub,
966            lhs,
967            ..
968        } = e
969        else {
970            panic!("root must be Sub");
971        };
972        let Expr::BinaryOp { op: BinOp::Sub, .. } = *lhs else {
973            panic!("lhs must be Sub (left-assoc)");
974        };
975    }
976
977    #[test]
978    fn parenthesised_override() {
979        // (a + b) * c  →  Mul(Add(a, b), c)
980        let e = parse("(a + b) * c");
981        let Expr::BinaryOp {
982            op: BinOp::Mul,
983            lhs,
984            ..
985        } = e
986        else {
987            panic!("root must be Mul");
988        };
989        let Expr::BinaryOp { op: BinOp::Add, .. } = *lhs else {
990            panic!("lhs must be Add");
991        };
992    }
993
994    #[test]
995    fn comparison_binds_weaker_than_arith() {
996        // a + 1 = b - 2
997        //   →  Eq(Add(a, 1), Sub(b, 2))
998        let e = parse("a + 1 = b - 2");
999        let Expr::BinaryOp {
1000            op: BinOp::Eq,
1001            lhs,
1002            rhs,
1003            ..
1004        } = e
1005        else {
1006            panic!("root must be Eq");
1007        };
1008        assert!(matches!(*lhs, Expr::BinaryOp { op: BinOp::Add, .. }));
1009        assert!(matches!(*rhs, Expr::BinaryOp { op: BinOp::Sub, .. }));
1010    }
1011
1012    #[test]
1013    fn and_binds_tighter_than_or() {
1014        // a OR b AND c  →  Or(a, And(b, c))
1015        let e = parse("a OR b AND c");
1016        let Expr::BinaryOp {
1017            op: BinOp::Or, rhs, ..
1018        } = e
1019        else {
1020            panic!("root must be Or");
1021        };
1022        assert!(matches!(*rhs, Expr::BinaryOp { op: BinOp::And, .. }));
1023    }
1024
1025    #[test]
1026    fn unary_negation() {
1027        let e = parse("-a");
1028        let Expr::UnaryOp {
1029            op: UnaryOp::Neg, ..
1030        } = e
1031        else {
1032            panic!("expected unary Neg");
1033        };
1034    }
1035
1036    #[test]
1037    fn unary_not() {
1038        let e = parse("NOT a");
1039        let Expr::UnaryOp {
1040            op: UnaryOp::Not, ..
1041        } = e
1042        else {
1043            panic!("expected unary Not");
1044        };
1045    }
1046
1047    #[test]
1048    fn concat_operator() {
1049        let e = parse("'hello' || name");
1050        let Expr::BinaryOp {
1051            op: BinOp::Concat, ..
1052        } = e
1053        else {
1054            panic!("expected Concat");
1055        };
1056    }
1057
1058    #[test]
1059    fn cast_expr() {
1060        let e = parse("CAST(age AS TEXT)");
1061        let Expr::Cast { target, .. } = e else {
1062            panic!("expected Cast");
1063        };
1064        assert_eq!(target, DataType::Text);
1065    }
1066
1067    #[test]
1068    fn case_expr() {
1069        let e = parse("CASE WHEN a = 1 THEN 'one' WHEN a = 2 THEN 'two' ELSE 'other' END");
1070        let Expr::Case {
1071            branches, else_, ..
1072        } = e
1073        else {
1074            panic!("expected Case");
1075        };
1076        assert_eq!(branches.len(), 2);
1077        assert!(else_.is_some());
1078    }
1079
1080    #[test]
1081    fn is_null_postfix() {
1082        let e = parse("name IS NULL");
1083        assert!(matches!(e, Expr::IsNull { negated: false, .. }));
1084    }
1085
1086    #[test]
1087    fn is_not_null_postfix() {
1088        let e = parse("name IS NOT NULL");
1089        assert!(matches!(e, Expr::IsNull { negated: true, .. }));
1090    }
1091
1092    #[test]
1093    fn between_with_columns() {
1094        let e = parse("temp BETWEEN min_t AND max_t");
1095        let Expr::Between {
1096            target,
1097            low,
1098            high,
1099            negated,
1100            ..
1101        } = e
1102        else {
1103            panic!("expected Between");
1104        };
1105        assert!(!negated);
1106        assert!(matches!(*target, Expr::Column { .. }));
1107        assert!(matches!(*low, Expr::Column { .. }));
1108        assert!(matches!(*high, Expr::Column { .. }));
1109    }
1110
1111    #[test]
1112    fn not_between_negates() {
1113        let e = parse("temp NOT BETWEEN 0 AND 100");
1114        let Expr::Between { negated: true, .. } = e else {
1115            panic!("expected negated Between");
1116        };
1117    }
1118
1119    #[test]
1120    fn in_list_literal() {
1121        let e = parse("status IN (1, 2, 3)");
1122        let Expr::InList {
1123            values, negated, ..
1124        } = e
1125        else {
1126            panic!("expected InList");
1127        };
1128        assert!(!negated);
1129        assert_eq!(values.len(), 3);
1130    }
1131
1132    #[test]
1133    fn not_in_list() {
1134        let e = parse("status NOT IN (1, 2)");
1135        let Expr::InList { negated: true, .. } = e else {
1136            panic!("expected negated InList");
1137        };
1138    }
1139
1140    #[test]
1141    fn function_call_with_args() {
1142        let e = parse("UPPER(name)");
1143        let Expr::FunctionCall { name, args, .. } = e else {
1144            panic!("expected FunctionCall");
1145        };
1146        assert_eq!(name, "UPPER");
1147        assert_eq!(args.len(), 1);
1148    }
1149
1150    #[test]
1151    fn nested_function_call() {
1152        let e = parse("COALESCE(a, UPPER(b))");
1153        let Expr::FunctionCall { name, args, .. } = e else {
1154            panic!("expected FunctionCall");
1155        };
1156        assert_eq!(name, "COALESCE");
1157        assert_eq!(args.len(), 2);
1158        assert!(matches!(&args[1], Expr::FunctionCall { .. }));
1159    }
1160
1161    #[test]
1162    fn duration_literal_parses_as_text() {
1163        let e = parse("time_bucket(5m)");
1164        let Expr::FunctionCall { name, args, .. } = e else {
1165            panic!("expected FunctionCall, got {e:?}");
1166        };
1167        assert_eq!(name.to_uppercase(), "TIME_BUCKET");
1168        assert_eq!(args.len(), 1);
1169        assert!(
1170            matches!(&args[0], Expr::Literal { value: Value::Text(s), .. } if s.as_ref() == "5m"),
1171            "expected Text(\"5m\"), got {:?}",
1172            args[0]
1173        );
1174    }
1175
1176    #[test]
1177    fn placeholder_dollar_one() {
1178        let e = parse("$1");
1179        match e {
1180            Expr::Parameter { index: 0, .. } => {}
1181            other => panic!("expected Parameter(0), got {other:?}"),
1182        }
1183    }
1184
1185    #[test]
1186    fn placeholder_dollar_n() {
1187        let e = parse("$7");
1188        match e {
1189            Expr::Parameter { index: 6, .. } => {}
1190            other => panic!("expected Parameter(6), got {other:?}"),
1191        }
1192    }
1193
1194    #[test]
1195    fn placeholder_in_string_literal_is_text() {
1196        // `$1` inside a string literal must NOT parse as a placeholder.
1197        let e = parse("'$1'");
1198        match e {
1199            Expr::Literal {
1200                value: Value::Text(s),
1201                ..
1202            } if s.as_ref() == "$1" => {}
1203            other => panic!("expected text literal '$1', got {other:?}"),
1204        }
1205    }
1206
1207    #[test]
1208    fn placeholder_in_comparison() {
1209        // SELECT-WHERE shape: `id = $1`
1210        let e = parse("id = $1");
1211        let Expr::BinaryOp {
1212            op: BinOp::Eq, rhs, ..
1213        } = e
1214        else {
1215            panic!("root must be Eq");
1216        };
1217        assert!(matches!(*rhs, Expr::Parameter { index: 0, .. }));
1218    }
1219
1220    #[test]
1221    fn placeholder_zero_rejected() {
1222        let mut parser = Parser::new("$0").expect("lexer");
1223        let err = parser.parse_expr().unwrap_err();
1224        assert!(err.to_string().contains("placeholder"));
1225    }
1226
1227    #[test]
1228    fn placeholder_question_single() {
1229        // Lone `?` numbered as parameter 1 (index 0).
1230        let e = parse("?");
1231        match e {
1232            Expr::Parameter { index: 0, .. } => {}
1233            other => panic!("expected Parameter(0), got {other:?}"),
1234        }
1235    }
1236
1237    #[test]
1238    fn placeholder_question_numbered() {
1239        let e = parse("?7");
1240        match e {
1241            Expr::Parameter { index: 6, .. } => {}
1242            other => panic!("expected Parameter(6), got {other:?}"),
1243        }
1244    }
1245
1246    #[test]
1247    fn placeholder_question_numbered_zero_rejected() {
1248        let mut parser = Parser::new("?0").expect("lexer");
1249        let err = parser.parse_expr().unwrap_err();
1250        assert!(err.to_string().contains("placeholder"));
1251    }
1252
1253    #[test]
1254    fn placeholder_question_left_to_right() {
1255        // `id = ? AND name = ?` → params 0 and 1
1256        let e = parse("id = ? AND name = ?");
1257        let Expr::BinaryOp {
1258            op: BinOp::And,
1259            lhs,
1260            rhs,
1261            ..
1262        } = e
1263        else {
1264            panic!("root must be And");
1265        };
1266        let Expr::BinaryOp {
1267            op: BinOp::Eq,
1268            rhs: r1,
1269            ..
1270        } = *lhs
1271        else {
1272            panic!("lhs must be Eq");
1273        };
1274        assert!(matches!(*r1, Expr::Parameter { index: 0, .. }));
1275        let Expr::BinaryOp {
1276            op: BinOp::Eq,
1277            rhs: r2,
1278            ..
1279        } = *rhs
1280        else {
1281            panic!("rhs must be Eq");
1282        };
1283        assert!(matches!(*r2, Expr::Parameter { index: 1, .. }));
1284    }
1285
1286    #[test]
1287    fn placeholder_question_in_string_literal_is_text() {
1288        let e = parse("'?'");
1289        match e {
1290            Expr::Literal {
1291                value: Value::Text(s),
1292                ..
1293            } if s.as_ref() == "?" => {}
1294            other => panic!("expected text literal '?', got {other:?}"),
1295        }
1296    }
1297
1298    #[test]
1299    fn placeholder_mixing_question_then_dollar_rejected() {
1300        let mut parser = Parser::new("id = ? AND x = $2").expect("lexer");
1301        let err = parser.parse_expr().err().expect("should fail");
1302        assert!(
1303            err.to_string().contains("mix"),
1304            "expected mixing error, got: {err}"
1305        );
1306    }
1307
1308    #[test]
1309    fn placeholder_mixing_dollar_then_question_rejected() {
1310        let mut parser = Parser::new("id = $1 AND x = ?").expect("lexer");
1311        let err = parser.parse_expr().err().expect("should fail");
1312        assert!(
1313            err.to_string().contains("mix"),
1314            "expected mixing error, got: {err}"
1315        );
1316    }
1317
1318    #[test]
1319    fn placeholder_question_in_comment_ignored() {
1320        // `?` inside an SQL line comment must not bump the counter.
1321        // The expression after the comment is the only param.
1322        let mut parser = Parser::new("-- ? ignored\n  ?").expect("lexer");
1323        let e = parser.parse_expr().expect("parse_expr");
1324        match e {
1325            Expr::Parameter { index: 0, .. } => {}
1326            other => panic!("expected Parameter(0), got {other:?}"),
1327        }
1328    }
1329
1330    #[test]
1331    fn span_tracks_token_range() {
1332        // A literal's span must cover the exact tokens consumed.
1333        let mut parser = Parser::new("123 + 456").expect("lexer");
1334        let e = parser.parse_expr().expect("parse_expr");
1335        let span = e.span();
1336        assert!(!span.is_synthetic(), "root span must be real");
1337        assert!(span.start.offset < span.end.offset);
1338    }
1339}