Skip to main content

reddb_rql/parser/
expr.rs

1//! Pratt-style parser for the Fase 2 `Expr` AST.
2//!
3//! This module is the Week 2 deliverable of the parser v2 refactor
4//! tracked in `/home/cyber/.claude/plans/squishy-mixing-honey.md`.
5//! It produces `ast::Expr` trees with proper operator precedence,
6//! `Span` tracking from the lexer, and support for the full set of
7//! unary / binary / postfix operators the existing hand-rolled
8//! projection climb covers in Fase 1.3 — plus the missing pieces
9//! (CASE, CAST, parenthesised subexprs, IS NULL, IN, BETWEEN).
10//!
11//! # Design notes
12//!
13//! The parser is now the canonical entry point for SQL expression
14//! parsing in the table-query flow:
15//! - `SELECT` projections parse through `Parser::parse_expr`
16//! - `WHERE` / `HAVING` operands parse through `Parser::parse_expr`
17//! - `ORDER BY` expressions parse through `Parser::parse_expr`
18//!
19//! Some legacy AST slots are still adapter-based (`Projection`,
20//! `Filter`, `GROUP BY` strings), so statement parsing still lowers
21//! `Expr` trees into those older shapes at the boundary.
22//!
23//! # Precedence table (matches PG gram.y modulo features we don't have)
24//!
25//! ```text
26//! prec  operators
27//! ----  ----------------------------------
28//!  10   OR
29//!  20   AND
30//!  25   NOT                      (prefix)
31//!  30   = <> < <= > >=           (comparison)
32//!  32   IS NULL / IS NOT NULL    (postfix)
33//!  33   BETWEEN … AND …          (postfix)
34//!  34   IN (…)                   (postfix)
35//!  40   ||                       (string concat)
36//!  50   + -                      (additive)
37//!  60   * / %                    (multiplicative)
38//!  70   -                        (unary negation)
39//!  80   ::type  CAST(…AS type)   (explicit type coercion)
40//! ```
41//!
42//! Higher precedence binds tighter. The climb uses the classic
43//! "min-precedence" algorithm — `parse_expr_prec(min)` loops consuming
44//! any infix operator whose precedence is ≥ `min`, recursing with
45//! `prec + 1` on the right-hand side for left-associativity.
46
47use super::error::ParseError;
48use super::Parser;
49use super::PlaceholderMode;
50use crate::ast::{BinOp, Expr, ExprSubquery, FieldRef, Span, UnaryOp};
51use crate::lexer::Token;
52use reddb_types::types::{DataType, Value};
53
54fn is_duration_unit(unit: &str) -> bool {
55    matches!(
56        unit.to_ascii_lowercase().as_str(),
57        "ms" | "msec"
58            | "millisecond"
59            | "milliseconds"
60            | "s"
61            | "sec"
62            | "secs"
63            | "second"
64            | "seconds"
65            | "m"
66            | "min"
67            | "mins"
68            | "minute"
69            | "minutes"
70            | "h"
71            | "hr"
72            | "hrs"
73            | "hour"
74            | "hours"
75            | "d"
76            | "day"
77            | "days"
78    )
79}
80
81fn keyword_function_name(token: &Token) -> Option<&'static str> {
82    match token {
83        Token::Count => Some("COUNT"),
84        Token::Sum => Some("SUM"),
85        Token::Avg => Some("AVG"),
86        Token::Min => Some("MIN"),
87        Token::Max => Some("MAX"),
88        Token::First => Some("FIRST"),
89        Token::Last => Some("LAST"),
90        Token::Left => Some("LEFT"),
91        Token::Right => Some("RIGHT"),
92        Token::Contains => Some("CONTAINS"),
93        Token::Kv => Some("KV"),
94        _ => None,
95    }
96}
97
98/// Whether `name` may appear as the function in `fn(...) OVER (...)`.
99/// Window-only functions plus the standard aggregates (which behave as
100/// window aggregates when an OVER clause is attached). Mirrored loosely
101/// from PG's pg_proc catalog — slice 7a only validates lexical eligibility,
102/// runtime support arrives with the analytics executor.
103fn is_window_eligible_function(name: &str) -> bool {
104    matches!(
105        name.to_ascii_uppercase().as_str(),
106        // Window-only.
107        "LAG"
108            | "LEAD"
109            | "ROW_NUMBER"
110            | "RANK"
111            | "DENSE_RANK"
112            | "PERCENT_RANK"
113            | "CUME_DIST"
114            | "NTILE"
115            | "FIRST_VALUE"
116            | "LAST_VALUE"
117            | "NTH_VALUE"
118            // Aggregates valid in window position.
119            | "COUNT"
120            | "SUM"
121            | "AVG"
122            | "MIN"
123            | "MAX"
124            | "STDDEV"
125            | "VARIANCE"
126            | "MEDIAN"
127            | "PERCENTILE"
128            | "GROUP_CONCAT"
129            | "STRING_AGG"
130            | "FIRST"
131            | "LAST"
132            | "ARRAY_AGG"
133            | "COUNT_DISTINCT"
134    )
135}
136
137fn bare_zero_arg_function_name(name: &str) -> Option<&'static str> {
138    match name.to_ascii_uppercase().as_str() {
139        "CURRENT_TIMESTAMP" => Some("CURRENT_TIMESTAMP"),
140        "CURRENT_DATE" => Some("CURRENT_DATE"),
141        "CURRENT_TIME" => Some("CURRENT_TIME"),
142        _ => None,
143    }
144}
145
146impl<'a> Parser<'a> {
147    /// Parse a complete expression at the lowest precedence level.
148    /// Entry point for every caller that wants an `Expr` tree.
149    pub fn parse_expr(&mut self) -> Result<Expr, ParseError> {
150        self.parse_expr_prec(0)
151    }
152
153    pub(crate) fn parse_expr_with_min_precedence(
154        &mut self,
155        min_prec: u8,
156    ) -> Result<Expr, ParseError> {
157        self.parse_expr_prec(min_prec)
158    }
159
160    /// Continue parsing an expression after the caller has already
161    /// materialized the left-hand side atom.
162    pub(crate) fn continue_expr(&mut self, left: Expr, min_prec: u8) -> Result<Expr, ParseError> {
163        self.parse_expr_suffix(left, min_prec)
164    }
165
166    /// Pratt climb: parse a unary atom then consume any infix operators
167    /// whose precedence meets or exceeds `min_prec`.
168    fn parse_expr_prec(&mut self, min_prec: u8) -> Result<Expr, ParseError> {
169        // Depth guard: every recursive descent point in the expr
170        // grammar bottoms out here, so checking once is enough to
171        // catch deeply nested literals like `((((((1))))))` and
172        // boolean chains like `NOT NOT NOT NOT … x`.
173        self.enter_depth()?;
174        let result = (|| {
175            let left = self.parse_expr_unary()?;
176            self.parse_expr_suffix(left, min_prec)
177        })();
178        self.exit_depth();
179        result
180    }
181
182    fn parse_expr_suffix(&mut self, mut left: Expr, min_prec: u8) -> Result<Expr, ParseError> {
183        loop {
184            let Some((op, prec)) = self.peek_binop() else {
185                // Not a standard infix op — check for postfix forms.
186                if min_prec <= 32 {
187                    if let Some(node) = self.try_parse_postfix(&left)? {
188                        left = node;
189                        continue;
190                    }
191                }
192                break;
193            };
194            if prec < min_prec {
195                break;
196            }
197            self.advance()?; // consume the operator token
198            let start_span = self.span_start_of(&left);
199            let rhs = self.parse_expr_prec(prec + 1)?;
200            let end_span = self.span_end_of(&rhs);
201            left = Expr::BinaryOp {
202                op,
203                lhs: Box::new(left),
204                rhs: Box::new(rhs),
205                span: Span::new(start_span, end_span),
206            };
207        }
208        Ok(left)
209    }
210
211    /// Parse a unary-prefix expression or drop through to the atomic
212    /// factor. Handles `NOT`, unary `-`, and `+` (no-op sign).
213    fn parse_expr_unary(&mut self) -> Result<Expr, ParseError> {
214        match self.peek() {
215            Token::Not => {
216                let start = self.position();
217                self.advance()?;
218                let operand = self.parse_expr_prec(25)?;
219                let end = self.span_end_of(&operand);
220                Ok(Expr::UnaryOp {
221                    op: UnaryOp::Not,
222                    operand: Box::new(operand),
223                    span: Span::new(start, end),
224                })
225            }
226            Token::Dash => {
227                let start = self.position();
228                self.advance()?;
229                let operand = self.parse_expr_prec(70)?;
230                let end = self.span_end_of(&operand);
231                Ok(Expr::UnaryOp {
232                    op: UnaryOp::Neg,
233                    operand: Box::new(operand),
234                    span: Span::new(start, end),
235                })
236            }
237            Token::Plus => {
238                // Unary plus is a no-op. Consume and recurse.
239                self.advance()?;
240                self.parse_expr_prec(70)
241            }
242            _ => self.parse_expr_factor(),
243        }
244    }
245
246    /// Parse a single atomic expression factor: literal, column ref,
247    /// parenthesised subexpression, CAST, CASE, or function call.
248    fn parse_expr_factor(&mut self) -> Result<Expr, ParseError> {
249        let start = self.position();
250
251        // Parenthesised subexpression: `( expr )`
252        if self.consume(&Token::LParen)? {
253            if self.check(&Token::Select) {
254                let query = self.parse_select_query()?;
255                self.expect(Token::RParen)?;
256                return Ok(Expr::Subquery {
257                    query: ExprSubquery {
258                        query: Box::new(query),
259                    },
260                    span: Span::new(start, self.position()),
261                });
262            }
263            let inner = self.parse_expr_prec(0)?;
264            self.expect(Token::RParen)?;
265            return Ok(inner);
266        }
267
268        // Literal: true / false / null
269        if self.consume(&Token::True)? {
270            return Ok(Expr::Literal {
271                value: Value::Boolean(true),
272                span: Span::new(start, self.position()),
273            });
274        }
275        if self.consume(&Token::False)? {
276            return Ok(Expr::Literal {
277                value: Value::Boolean(false),
278                span: Span::new(start, self.position()),
279            });
280        }
281        if self.consume(&Token::Null)? {
282            return Ok(Expr::Literal {
283                value: Value::Null,
284                span: Span::new(start, self.position()),
285            });
286        }
287
288        // Numeric literals — with optional duration-unit suffix (e.g. `5m`, `10s`, `2h`).
289        // Duration literals are emitted as Value::Text so downstream code sees "5m" verbatim
290        // (matching the legacy Projection::Column("LIT:5m") path used by time_bucket).
291        if let Token::Integer(n) = *self.peek() {
292            self.advance()?;
293            if let Token::Ident(ref unit) = *self.peek() {
294                if is_duration_unit(unit) {
295                    let duration = format!("{n}{}", unit.to_ascii_lowercase());
296                    self.advance()?;
297                    return Ok(Expr::Literal {
298                        value: Value::text(duration),
299                        span: Span::new(start, self.position()),
300                    });
301                }
302            }
303            return Ok(Expr::Literal {
304                value: Value::Integer(n),
305                span: Span::new(start, self.position()),
306            });
307        }
308        if let Token::Float(n) = *self.peek() {
309            self.advance()?;
310            return Ok(Expr::Literal {
311                value: Value::Float(n),
312                span: Span::new(start, self.position()),
313            });
314        }
315        if let Token::String(ref s) = *self.peek() {
316            let text = s.clone();
317            self.advance()?;
318            return Ok(Expr::Literal {
319                value: Value::text(text),
320                span: Span::new(start, self.position()),
321            });
322        }
323
324        // JSON object `{…}` and array `[…]` literals — delegate to the DML literal parser
325        // which already handles the full JSON value grammar including nested objects.
326        // `JsonLiteral` is the strict-JSON variant emitted by the lexer's sub-mode
327        // when `{` is followed by `"`; both shapes route through `parse_literal_value`.
328        if matches!(
329            self.peek(),
330            Token::LBrace | Token::LBracket | Token::JsonLiteral(_)
331        ) {
332            let value = self
333                .parse_literal_value()
334                .map_err(|e| ParseError::new(e.message, self.position()))?;
335            return Ok(Expr::Literal {
336                value,
337                span: Span::new(start, self.position()),
338            });
339        }
340
341        // `?` positional placeholder — auto-numbered left-to-right.
342        // Immediate `?N` uses an explicit 1-based index. Mixing with
343        // `$N` in one statement is rejected.
344        if self.check(&Token::Question) {
345            let (index, span) = self.parse_question_param_index()?;
346            return Ok(Expr::Parameter { index, span });
347        }
348
349        if self.consume(&Token::Dollar)? {
350            // `$N` positional parameter placeholder (1-based in source,
351            // 0-based in the AST so it matches `Vec<Value>` indexing).
352            // Rejected at parse time when N < 1; gaps and arity are
353            // validated by the binder once the full statement is parsed.
354            if let Token::Integer(n) = *self.peek() {
355                if n < 1 {
356                    return Err(ParseError::new(
357                        "placeholder index must be >= 1".to_string(),
358                        self.position(),
359                    ));
360                }
361                if self.placeholder_mode == PlaceholderMode::Question {
362                    return Err(ParseError::new(
363                        "cannot mix `?` and `$N` placeholders in one statement".to_string(),
364                        self.position(),
365                    ));
366                }
367                self.placeholder_mode = PlaceholderMode::Dollar;
368                self.advance()?;
369                return Ok(Expr::Parameter {
370                    index: (n - 1) as usize,
371                    span: Span::new(start, self.position()),
372                });
373            }
374            let path = self.parse_dollar_ref_path()?;
375            let path_lc = path.to_ascii_lowercase();
376            let (name, key) = if let Some(rest) = path_lc.strip_prefix("secret.") {
377                ("__SECRET_REF", format!("red.vault/{rest}"))
378            } else if let Some(rest) = path_lc
379                .strip_prefix("red.secret.")
380                .or_else(|| path_lc.strip_prefix("red.secrets."))
381            {
382                ("__SECRET_REF", format!("red.vault/{rest}"))
383            } else if let Some(rest) = path_lc.strip_prefix("config.") {
384                ("CONFIG", format!("red.config/{rest}"))
385            } else if path_lc.starts_with("red.config.") {
386                let rest = path_lc.trim_start_matches("red.config.");
387                ("CONFIG", format!("red.config/{rest}"))
388            } else {
389                return Err(ParseError::new(
390                    format!(
391                        "unknown $ reference `${path}`; expected $secret.*, $red.secret.*, $red.secrets.*, $config.*, or $red.config.*"
392                    ),
393                    self.position(),
394                ));
395            };
396            return Ok(Expr::FunctionCall {
397                name: name.to_string(),
398                args: vec![Expr::Literal {
399                    value: Value::text(key),
400                    span: Span::new(start, self.position()),
401                }],
402                span: Span::new(start, self.position()),
403            });
404        }
405
406        if let Some(name) = keyword_function_name(self.peek()) {
407            if matches!(self.peek_next()?, Token::LParen) {
408                self.advance()?; // consume the keyword token
409                return self.parse_function_call_expr_with_name(start, name.to_string());
410            }
411        }
412
413        // Identifier-led constructs: function call, CAST, CASE, column.
414        //
415        // We commit to consuming the identifier immediately and then
416        // inspect the NEXT token to decide shape. This avoids needing
417        // two-token lookahead on the parser. If the next token is `(`
418        // it's a function call; if `.` it's a qualified column ref;
419        // otherwise it's a bare column ref.
420        if let Token::Ident(ref name) = *self.peek() {
421            let name_upper = name.to_uppercase();
422
423            // CAST(expr AS type) — must test before consuming because
424            // CAST is not a reserved keyword; users could legitimately
425            // have a column literally named `cast`. Distinguish by
426            // looking at whether the identifier equals CAST AND is
427            // immediately followed by `(`. Since we can't two-step
428            // lookahead, handle CAST by parsing the ident, then if the
429            // uppercased name is CAST and the next token is `(`,
430            // switch to the CAST form; otherwise the saved name
431            // becomes the first segment of a column ref.
432            if name_upper == "CASE" {
433                return self.parse_case_expr(start);
434            }
435
436            let saved_name = name.clone();
437            self.advance()?; // consume the identifier unconditionally
438
439            // Function call / CAST: IDENT (
440            if matches!(self.peek(), Token::LParen) {
441                return self.parse_function_call_expr_with_name(start, saved_name);
442            }
443
444            if let Some(function_name) = bare_zero_arg_function_name(&saved_name) {
445                let end = self.position();
446                return Ok(Expr::FunctionCall {
447                    name: function_name.to_string(),
448                    args: Vec::new(),
449                    span: Span::new(start, end),
450                });
451            }
452
453            // Qualified column: IDENT.IDENT[.IDENT …]
454            if matches!(self.peek(), Token::Dot) {
455                let mut segments = vec![saved_name];
456                while self.consume(&Token::Dot)? {
457                    segments.push(self.expect_ident_or_keyword()?);
458                }
459                let field = FieldRef::TableColumn {
460                    table: segments.remove(0),
461                    column: segments.join("."),
462                };
463                let end = self.position();
464                return Ok(Expr::Column {
465                    field,
466                    span: Span::new(start, end),
467                });
468            }
469
470            // Bare column reference with empty table name.
471            let field = FieldRef::TableColumn {
472                table: String::new(),
473                column: saved_name,
474            };
475            let end = self.position();
476            return Ok(Expr::Column {
477                field,
478                span: Span::new(start, end),
479            });
480        }
481
482        // Default: column reference (optionally qualified: table.column).
483        // Reached only when the leading token is not an Ident. Falls
484        // through to parse_field_ref which handles keyword-shaped
485        // column names.
486        let field = self.parse_field_ref()?;
487        let end = self.position();
488        Ok(Expr::Column {
489            field,
490            span: Span::new(start, end),
491        })
492    }
493
494    fn parse_dollar_ref_path(&mut self) -> Result<String, ParseError> {
495        let mut path = self.expect_ident_or_keyword()?;
496        while self.consume(&Token::Dot)? {
497            let next = self.expect_ident_or_keyword()?;
498            path = format!("{path}.{next}");
499        }
500        Ok(path)
501    }
502
503    fn parse_function_call_expr_with_name(
504        &mut self,
505        start: crate::lexer::Position,
506        function_name: String,
507    ) -> Result<Expr, ParseError> {
508        let call = self.parse_function_call_expr_with_name_inner(start, function_name)?;
509        // Issue #589 slice 7a: `fn(args) OVER (...)` lifts a plain
510        // FunctionCall into a WindowFunctionCall carrying the OVER
511        // clause. CAST and other shapes that don't return a
512        // FunctionCall are rejected by `parse_over_clause_for` so the
513        // user gets a clear error rather than silent acceptance.
514        if matches!(self.peek(), Token::Over) {
515            return self.lift_to_window_call(start, call);
516        }
517        Ok(call)
518    }
519
520    fn parse_function_call_expr_with_name_inner(
521        &mut self,
522        start: crate::lexer::Position,
523        function_name: String,
524    ) -> Result<Expr, ParseError> {
525        self.expect(Token::LParen)?;
526
527        if function_name.eq_ignore_ascii_case("CAST") {
528            let inner = self.parse_expr_prec(0)?;
529            self.expect(Token::As)?;
530            let type_name = self.expect_ident_or_keyword()?;
531            self.expect(Token::RParen)?;
532            let end = self.position();
533            let Some(target) = DataType::from_sql_name(&type_name) else {
534                return Err(ParseError::new(
535                    // F-05: `type_name` is caller-controlled identifier text.
536                    // Render via `{:?}` so embedded CR/LF/NUL/quotes are
537                    // escaped before reaching downstream serialization sinks.
538                    format!("unknown type name {type_name:?} in CAST"),
539                    self.position(),
540                ));
541            };
542            return Ok(Expr::Cast {
543                inner: Box::new(inner),
544                target,
545                span: Span::new(start, end),
546            });
547        }
548
549        if function_name.eq_ignore_ascii_case("TRIM") {
550            let (name, args) = self.parse_trim_expr_args()?;
551            self.expect(Token::RParen)?;
552            let end = self.position();
553            return Ok(Expr::FunctionCall {
554                name,
555                args,
556                span: Span::new(start, end),
557            });
558        }
559
560        if function_name.eq_ignore_ascii_case("POSITION") {
561            let args = self.parse_position_expr_args()?;
562            self.expect(Token::RParen)?;
563            let end = self.position();
564            return Ok(Expr::FunctionCall {
565                name: function_name,
566                args,
567                span: Span::new(start, end),
568            });
569        }
570
571        if function_name.eq_ignore_ascii_case("SUBSTRING") {
572            let args = self.parse_substring_expr_args()?;
573            self.expect(Token::RParen)?;
574            let end = self.position();
575            return Ok(Expr::FunctionCall {
576                name: function_name,
577                args,
578                span: Span::new(start, end),
579            });
580        }
581
582        if function_name.eq_ignore_ascii_case("COUNT") {
583            if self.consume(&Token::Distinct)? {
584                let arg = self.parse_expr_prec(0)?;
585                self.expect(Token::RParen)?;
586                let end = self.position();
587                return Ok(Expr::FunctionCall {
588                    name: "COUNT_DISTINCT".to_string(),
589                    args: vec![arg],
590                    span: Span::new(start, end),
591                });
592            }
593
594            if self.consume(&Token::Star)? {
595                self.expect(Token::RParen)?;
596                let end = self.position();
597                return Ok(Expr::FunctionCall {
598                    name: function_name,
599                    args: vec![Expr::Column {
600                        field: FieldRef::TableColumn {
601                            table: String::new(),
602                            column: "*".to_string(),
603                        },
604                        span: Span::synthetic(),
605                    }],
606                    span: Span::new(start, end),
607                });
608            }
609        }
610
611        // CONFIG()/KV() take bare dotted config paths as arguments
612        // (e.g. `CONFIG(red.ai.default.provider, openai)`,
613        // `KV(cfg, default.role, guest)`). Parsed through the generic
614        // expression grammar these become column references — and a
615        // keyword segment like `default` would be folded to `DEFAULT`,
616        // breaking the case-sensitive config-key lookup, while a
617        // source-free `SELECT CONFIG(...)` would fail with "unknown
618        // column". Capture each path-shaped argument as a lowercased
619        // string literal instead so it matches stored keys (which
620        // `SET CONFIG` also lowercases) and never resolves as a column.
621        if function_name.eq_ignore_ascii_case("CONFIG") || function_name.eq_ignore_ascii_case("KV")
622        {
623            let mut args = Vec::new();
624            if !self.check(&Token::RParen) {
625                loop {
626                    args.push(self.parse_config_kv_arg(start)?);
627                    if !self.consume(&Token::Comma)? {
628                        break;
629                    }
630                }
631            }
632            self.expect(Token::RParen)?;
633            let end = self.position();
634            return Ok(Expr::FunctionCall {
635                name: function_name,
636                args,
637                span: Span::new(start, end),
638            });
639        }
640
641        let mut args = Vec::new();
642        if !self.check(&Token::RParen) {
643            loop {
644                args.push(self.parse_expr_prec(0)?);
645                if !self.consume(&Token::Comma)? {
646                    break;
647                }
648            }
649        }
650        self.expect(Token::RParen)?;
651        let end = self.position();
652        Ok(Expr::FunctionCall {
653            name: function_name,
654            args,
655            span: Span::new(start, end),
656        })
657    }
658
659    /// Parse a single CONFIG()/KV() argument. A bare identifier or
660    /// dotted path (including keyword-shaped segments) becomes a
661    /// lowercased string literal — the config-key form. Anything else
662    /// (quoted string, number, `?`/`$N` placeholder, parenthesised
663    /// expression) falls through to the normal expression grammar so
664    /// dynamic defaults still work.
665    fn parse_config_kv_arg(&mut self, start: crate::lexer::Position) -> Result<Expr, ParseError> {
666        // Literals, placeholders and parenthesised sub-expressions are
667        // real expressions (dynamic defaults); everything else that can
668        // open an argument here is an identifier or keyword that forms a
669        // bare config path.
670        let mut is_expression_start = matches!(
671            self.peek(),
672            Token::String(_)
673                | Token::Integer(_)
674                | Token::Float(_)
675                | Token::Dollar
676                | Token::Question
677                | Token::LParen
678        );
679        // A bare identifier immediately followed by `(` is a nested
680        // function call (e.g. a dynamic default), not a config path.
681        if matches!(self.peek(), Token::Ident(_)) && matches!(self.peek_next()?, Token::LParen) {
682            is_expression_start = true;
683        }
684        if !is_expression_start && !self.check(&Token::RParen) {
685            let mut path = self.expect_ident_or_keyword()?;
686            while self.consume(&Token::Dot)? {
687                let next = self.expect_ident_or_keyword()?;
688                path = format!("{path}.{next}");
689            }
690            let end = self.position();
691            return Ok(Expr::Literal {
692                value: Value::text(path.to_ascii_lowercase()),
693                span: Span::new(start, end),
694            });
695        }
696        self.parse_expr_prec(0)
697    }
698
699    /// Wrap a freshly-parsed `Expr::FunctionCall` in
700    /// `Expr::WindowFunctionCall` by consuming the trailing `OVER (...)`
701    /// clause. The caller has already confirmed the next token is
702    /// `OVER`. Rejects:
703    /// - CAST(...) OVER (...) and other non-FunctionCall shapes.
704    /// - Function names that are neither window-only nor aggregates.
705    fn lift_to_window_call(
706        &mut self,
707        start: crate::lexer::Position,
708        call: Expr,
709    ) -> Result<Expr, ParseError> {
710        let (name, args) = match call {
711            Expr::FunctionCall { name, args, .. } => (name, args),
712            other => {
713                return Err(ParseError::new(
714                    format!(
715                        "OVER may only follow a function call, got {:?}",
716                        std::mem::discriminant(&other)
717                    ),
718                    self.position(),
719                ));
720            }
721        };
722        if !is_window_eligible_function(&name) {
723            return Err(ParseError::new(
724                format!(
725                    "function `{}` cannot be used with an OVER clause; \
726                     expected a window function (LAG, LEAD, ROW_NUMBER, \
727                     RANK, DENSE_RANK) or an aggregate",
728                    name.to_uppercase()
729                ),
730                self.position(),
731            ));
732        }
733        let window = self.parse_over_clause()?;
734        let end = self.position();
735        Ok(Expr::WindowFunctionCall {
736            name,
737            args,
738            window,
739            span: Span::new(start, end),
740        })
741    }
742
743    /// Parse the `OVER ( [PARTITION BY ...] [ORDER BY ...] [frame] )`
744    /// clause. The leading `OVER` keyword is consumed here.
745    fn parse_over_clause(&mut self) -> Result<crate::ast::WindowSpec, ParseError> {
746        self.expect(Token::Over)?;
747        self.expect(Token::LParen)?;
748
749        let mut spec = crate::ast::WindowSpec::default();
750
751        if self.consume(&Token::Partition)? {
752            self.expect(Token::By)?;
753            loop {
754                spec.partition_by.push(self.parse_expr_prec(0)?);
755                if !self.consume(&Token::Comma)? {
756                    break;
757                }
758            }
759        }
760
761        if self.consume(&Token::Order)? {
762            self.expect(Token::By)?;
763            loop {
764                let expr = self.parse_expr_prec(0)?;
765                let ascending = if self.consume(&Token::Desc)? {
766                    false
767                } else {
768                    self.consume(&Token::Asc)?;
769                    true
770                };
771                // NULLS FIRST / LAST defaults mirror PG: nulls last for
772                // ASC, nulls first for DESC. Explicit clause overrides.
773                let mut nulls_first = !ascending;
774                if self.consume(&Token::Nulls)? {
775                    if self.consume(&Token::First)? {
776                        nulls_first = true;
777                    } else if self.consume(&Token::Last)? {
778                        nulls_first = false;
779                    } else {
780                        return Err(ParseError::new(
781                            "expected FIRST or LAST after NULLS".to_string(),
782                            self.position(),
783                        ));
784                    }
785                }
786                spec.order_by.push(crate::ast::WindowOrderItem {
787                    expr,
788                    ascending,
789                    nulls_first,
790                });
791                if !self.consume(&Token::Comma)? {
792                    break;
793                }
794            }
795        }
796
797        if matches!(self.peek(), Token::Rows | Token::Range) {
798            spec.frame = Some(self.parse_window_frame()?);
799        }
800
801        self.expect(Token::RParen)?;
802        Ok(spec)
803    }
804
805    fn parse_window_frame(&mut self) -> Result<crate::ast::WindowFrame, ParseError> {
806        let unit = if self.consume(&Token::Rows)? {
807            crate::ast::WindowFrameUnit::Rows
808        } else if self.consume(&Token::Range)? {
809            crate::ast::WindowFrameUnit::Range
810        } else {
811            return Err(ParseError::new(
812                "expected ROWS or RANGE in window frame".to_string(),
813                self.position(),
814            ));
815        };
816
817        if self.consume(&Token::Between)? {
818            let start = self.parse_window_frame_bound()?;
819            self.expect(Token::And)?;
820            let end = self.parse_window_frame_bound()?;
821            Ok(crate::ast::WindowFrame {
822                unit,
823                start,
824                end: Some(end),
825            })
826        } else {
827            let start = self.parse_window_frame_bound()?;
828            Ok(crate::ast::WindowFrame {
829                unit,
830                start,
831                end: None,
832            })
833        }
834    }
835
836    fn parse_window_frame_bound(&mut self) -> Result<crate::ast::WindowFrameBound, ParseError> {
837        use crate::ast::WindowFrameBound;
838        if self.consume(&Token::Unbounded)? {
839            if self.consume(&Token::Preceding)? {
840                return Ok(WindowFrameBound::UnboundedPreceding);
841            }
842            if self.consume(&Token::Following)? {
843                return Ok(WindowFrameBound::UnboundedFollowing);
844            }
845            return Err(ParseError::new(
846                "expected PRECEDING or FOLLOWING after UNBOUNDED".to_string(),
847                self.position(),
848            ));
849        }
850        if self.consume(&Token::Current)? {
851            self.expect(Token::Row)?;
852            return Ok(WindowFrameBound::CurrentRow);
853        }
854        // Numeric / expression offset: `N PRECEDING` / `N FOLLOWING`.
855        let offset = self.parse_expr_prec(0)?;
856        if self.consume(&Token::Preceding)? {
857            return Ok(WindowFrameBound::Preceding(Box::new(offset)));
858        }
859        if self.consume(&Token::Following)? {
860            return Ok(WindowFrameBound::Following(Box::new(offset)));
861        }
862        Err(ParseError::new(
863            "expected PRECEDING or FOLLOWING after frame offset".to_string(),
864            self.position(),
865        ))
866    }
867
868    /// Parse both CASE forms:
869    /// - searched: `CASE WHEN cond THEN val [WHEN …] [ELSE val] END`
870    /// - simple:   `CASE expr WHEN val THEN val [WHEN …] [ELSE val] END`
871    ///
872    /// The simple form is desugared into the searched form: each
873    /// `WHEN <value>` becomes the equality condition `<selector> = <value>`,
874    /// which preserves SQL's three-valued comparison semantics (a NULL
875    /// selector never matches a WHEN value) without growing the `Expr::Case`
876    /// AST or the executor.
877    ///
878    /// Assumes the caller has already peeked `CASE`.
879    fn parse_case_expr(&mut self, start: crate::lexer::Position) -> Result<Expr, ParseError> {
880        self.advance()?; // consume CASE
881                         // Simple CASE: a selector expression precedes the first WHEN.
882        let selector = if matches!(self.peek(), Token::Ident(id) if id.eq_ignore_ascii_case("WHEN"))
883        {
884            None
885        } else {
886            Some(self.parse_expr_prec(0)?)
887        };
888        let mut branches: Vec<(Expr, Expr)> = Vec::new();
889        loop {
890            if !self.consume_ident_ci("WHEN")? {
891                break;
892            }
893            let when_val = self.parse_expr_prec(0)?;
894            // Searched form keeps the WHEN expression as the condition;
895            // simple form rewrites it to `selector = when_val`.
896            let cond = match &selector {
897                None => when_val,
898                Some(sel) => {
899                    let span = Span::new(sel.span().start, when_val.span().end);
900                    Expr::BinaryOp {
901                        op: BinOp::Eq,
902                        lhs: Box::new(sel.clone()),
903                        rhs: Box::new(when_val),
904                        span,
905                    }
906                }
907            };
908            if !self.consume_ident_ci("THEN")? {
909                return Err(ParseError::new(
910                    "expected THEN after CASE WHEN condition".to_string(),
911                    self.position(),
912                ));
913            }
914            let then_val = self.parse_expr_prec(0)?;
915            branches.push((cond, then_val));
916        }
917        if branches.is_empty() {
918            return Err(ParseError::new(
919                "CASE must have at least one WHEN branch".to_string(),
920                self.position(),
921            ));
922        }
923        let else_ = if self.consume_ident_ci("ELSE")? {
924            Some(Box::new(self.parse_expr_prec(0)?))
925        } else {
926            None
927        };
928        if !self.consume_ident_ci("END")? {
929            return Err(ParseError::new(
930                "expected END to close CASE expression".to_string(),
931                self.position(),
932            ));
933        }
934        let end = self.position();
935        Ok(Expr::Case {
936            branches,
937            else_,
938            span: Span::new(start, end),
939        })
940    }
941
942    fn parse_trim_expr_args(&mut self) -> Result<(String, Vec<Expr>), ParseError> {
943        let mut function_name = "TRIM".to_string();
944
945        if self.consume_ident_ci("LEADING")? {
946            function_name = "LTRIM".to_string();
947        } else if self.consume_ident_ci("TRAILING")? {
948            function_name = "RTRIM".to_string();
949        } else if self.consume_ident_ci("BOTH")? {
950            function_name = "TRIM".to_string();
951        }
952
953        if self.consume(&Token::From)? {
954            let source = self.parse_expr_prec(0)?;
955            return Ok((function_name, vec![source]));
956        }
957
958        let first = self.parse_expr_prec(0)?;
959
960        if self.consume(&Token::Comma)? {
961            let second = self.parse_expr_prec(0)?;
962            return Ok((function_name, vec![first, second]));
963        }
964
965        if self.consume(&Token::From)? {
966            let source = self.parse_expr_prec(0)?;
967            return Ok((function_name, vec![source, first]));
968        }
969
970        Ok((function_name, vec![first]))
971    }
972
973    /// PostgreSQL-style `POSITION(substr IN string)` or plain
974    /// `POSITION(substr, string)` lowered to the ordinary two-argument
975    /// function form.
976    fn parse_position_expr_args(&mut self) -> Result<Vec<Expr>, ParseError> {
977        // `IN` is also a postfix operator in the main expression grammar, so
978        // parse the first operand above postfix-IN precedence and then consume
979        // the function's `IN` keyword explicitly.
980        let needle = self.parse_expr_prec(35)?;
981        if !self.consume(&Token::Comma)? {
982            self.expect(Token::In)?;
983        }
984        let haystack = self.parse_expr_prec(0)?;
985        Ok(vec![needle, haystack])
986    }
987
988    /// PostgreSQL-style `SUBSTRING` syntax:
989    /// - `SUBSTRING(expr FROM start [FOR count])`
990    /// - `SUBSTRING(expr FOR count [FROM start])`
991    /// - plain function-call form `SUBSTRING(expr, start[, count])`
992    ///
993    /// The SQL-syntax variants are desugared to the comma-arg form so the
994    /// rest of the stack sees the same `Expr::FunctionCall` shape.
995    fn parse_substring_expr_args(&mut self) -> Result<Vec<Expr>, ParseError> {
996        let source = self.parse_expr_prec(0)?;
997
998        if self.consume(&Token::Comma)? {
999            let mut args = vec![source];
1000            loop {
1001                args.push(self.parse_expr_prec(0)?);
1002                if !self.consume(&Token::Comma)? {
1003                    break;
1004                }
1005            }
1006            return Ok(args);
1007        }
1008
1009        if self.consume(&Token::From)? {
1010            let start = self.parse_expr_prec(0)?;
1011            if self.consume(&Token::For)? {
1012                let count = self.parse_expr_prec(0)?;
1013                return Ok(vec![source, start, count]);
1014            }
1015            return Ok(vec![source, start]);
1016        }
1017
1018        if self.consume(&Token::For)? {
1019            let count = self.parse_expr_prec(0)?;
1020            if self.consume(&Token::From)? {
1021                let start = self.parse_expr_prec(0)?;
1022                return Ok(vec![source, start, count]);
1023            }
1024            return Ok(vec![source, Expr::lit(Value::Integer(1)), count]);
1025        }
1026
1027        Ok(vec![source])
1028    }
1029
1030    /// Try to consume a postfix operator on top of the already-parsed
1031    /// `left` expression: `IS [NOT] NULL`, `[NOT] BETWEEN … AND …`,
1032    /// `[NOT] IN (…)`. Returns `Ok(None)` if no postfix follows.
1033    ///
1034    /// NOT at this position is unambiguous — prefix `NOT` is always
1035    /// consumed at `parse_expr_unary` level before reaching postfix.
1036    /// So seeing `NOT` here means the user wrote `x NOT BETWEEN …`
1037    /// or `x NOT IN …`; we consume it eagerly and require BETWEEN
1038    /// or IN to follow.
1039    fn try_parse_postfix(&mut self, left: &Expr) -> Result<Option<Expr>, ParseError> {
1040        let start = self.span_start_of(left);
1041
1042        // IS [NOT] NULL
1043        if self.consume(&Token::Is)? {
1044            let negated = self.consume(&Token::Not)?;
1045            self.expect(Token::Null)?;
1046            let end = self.position();
1047            return Ok(Some(Expr::IsNull {
1048                operand: Box::new(left.clone()),
1049                negated,
1050                span: Span::new(start, end),
1051            }));
1052        }
1053
1054        // Detect NOT BETWEEN / NOT IN. NOT is consumed eagerly — we
1055        // don't have two-token lookahead and the grammar guarantees
1056        // no other valid postfix starts with NOT.
1057        let negated = if matches!(self.peek(), Token::Not) {
1058            self.advance()?;
1059            if !matches!(self.peek(), Token::Between | Token::In) {
1060                return Err(ParseError::new(
1061                    "expected BETWEEN or IN after postfix NOT".to_string(),
1062                    self.position(),
1063                ));
1064            }
1065            true
1066        } else {
1067            false
1068        };
1069
1070        // BETWEEN low AND high
1071        if self.consume(&Token::Between)? {
1072            let low = self.parse_expr_prec(34)?;
1073            self.expect(Token::And)?;
1074            let high = self.parse_expr_prec(34)?;
1075            let end = self.position();
1076            return Ok(Some(Expr::Between {
1077                target: Box::new(left.clone()),
1078                low: Box::new(low),
1079                high: Box::new(high),
1080                negated,
1081                span: Span::new(start, end),
1082            }));
1083        }
1084
1085        // IN (v1, v2, …)
1086        if self.consume(&Token::In)? {
1087            self.expect(Token::LParen)?;
1088            let mut values = Vec::new();
1089            if self.check(&Token::Select) {
1090                let query = self.parse_select_query()?;
1091                values.push(Expr::Subquery {
1092                    query: ExprSubquery {
1093                        query: Box::new(query),
1094                    },
1095                    span: Span::new(self.span_start_of(left), self.position()),
1096                });
1097            } else if !self.check(&Token::RParen) {
1098                loop {
1099                    values.push(self.parse_expr_prec(0)?);
1100                    if !self.consume(&Token::Comma)? {
1101                        break;
1102                    }
1103                }
1104            }
1105            self.expect(Token::RParen)?;
1106            let end = self.position();
1107            return Ok(Some(Expr::InList {
1108                target: Box::new(left.clone()),
1109                values,
1110                negated,
1111                span: Span::new(start, end),
1112            }));
1113        }
1114
1115        if negated {
1116            // Unreachable because the early-return above already
1117            // validated NOT is followed by BETWEEN or IN. Guarded
1118            // to keep callers loud if the grammar grows later.
1119            return Err(ParseError::new(
1120                "internal: NOT consumed without BETWEEN/IN follow".to_string(),
1121                self.position(),
1122            ));
1123        }
1124        Ok(None)
1125    }
1126
1127    /// Peek the current token and translate it into a `BinOp` plus
1128    /// its precedence. Returns `None` if the token is not a recognised
1129    /// infix operator — the caller then tries postfix handling.
1130    fn peek_binop(&self) -> Option<(BinOp, u8)> {
1131        let op = match self.peek() {
1132            Token::Or => BinOp::Or,
1133            Token::And => BinOp::And,
1134            Token::Eq => BinOp::Eq,
1135            Token::Ne => BinOp::Ne,
1136            Token::Lt => BinOp::Lt,
1137            Token::Le => BinOp::Le,
1138            Token::Gt => BinOp::Gt,
1139            Token::Ge => BinOp::Ge,
1140            Token::DoublePipe => BinOp::Concat,
1141            Token::Plus => BinOp::Add,
1142            Token::Dash => BinOp::Sub,
1143            Token::Star => BinOp::Mul,
1144            Token::Slash => BinOp::Div,
1145            Token::Percent => BinOp::Mod,
1146            _ => return None,
1147        };
1148        Some((op, op.precedence()))
1149    }
1150
1151    /// Return the start position of an expression's span. Handles the
1152    /// synthetic case by falling back to the current parser cursor,
1153    /// which is good enough for the Pratt climb since the caller just
1154    /// parsed the atom.
1155    fn span_start_of(&self, expr: &Expr) -> crate::lexer::Position {
1156        let s = expr.span();
1157        if s.is_synthetic() {
1158            self.position()
1159        } else {
1160            s.start
1161        }
1162    }
1163
1164    /// Return the end position of an expression's span — same
1165    /// synthetic fallback as `span_start_of`.
1166    fn span_end_of(&self, expr: &Expr) -> crate::lexer::Position {
1167        let s = expr.span();
1168        if s.is_synthetic() {
1169            self.position()
1170        } else {
1171            s.end
1172        }
1173    }
1174}
1175
1176// Avoid `unused` lints in partial-migration builds where the analyzer
1177// still does not consume every expression shape directly.
1178#[allow(dead_code)]
1179fn _expr_module_used(_: Expr) {}
1180
1181#[cfg(test)]
1182mod tests {
1183    use super::*;
1184    use crate::ast::FieldRef;
1185
1186    fn parse(input: &str) -> Expr {
1187        let mut parser = Parser::new(input).expect("lexer init");
1188        let expr = parser.parse_expr().expect("parse_expr");
1189        expr
1190    }
1191
1192    #[test]
1193    fn literal_integer() {
1194        let e = parse("42");
1195        match e {
1196            Expr::Literal {
1197                value: Value::Integer(42),
1198                ..
1199            } => {}
1200            other => panic!("expected Integer(42), got {other:?}"),
1201        }
1202    }
1203
1204    #[test]
1205    fn literal_float() {
1206        let e = parse("3.14");
1207        match e {
1208            Expr::Literal {
1209                value: Value::Float(f),
1210                ..
1211            } => assert!((f - 3.14).abs() < 1e-9),
1212            other => panic!("expected float literal, got {other:?}"),
1213        }
1214    }
1215
1216    #[test]
1217    fn literal_string() {
1218        let e = parse("'hello'");
1219        match e {
1220            Expr::Literal {
1221                value: Value::Text(ref s),
1222                ..
1223            } if s.as_ref() == "hello" => {}
1224            other => panic!("expected Text(hello), got {other:?}"),
1225        }
1226    }
1227
1228    #[test]
1229    fn literal_booleans_and_null() {
1230        assert!(matches!(
1231            parse("TRUE"),
1232            Expr::Literal {
1233                value: Value::Boolean(true),
1234                ..
1235            }
1236        ));
1237        assert!(matches!(
1238            parse("FALSE"),
1239            Expr::Literal {
1240                value: Value::Boolean(false),
1241                ..
1242            }
1243        ));
1244        assert!(matches!(
1245            parse("NULL"),
1246            Expr::Literal {
1247                value: Value::Null,
1248                ..
1249            }
1250        ));
1251    }
1252
1253    #[test]
1254    fn bare_column() {
1255        let e = parse("user_id");
1256        match e {
1257            Expr::Column {
1258                field: FieldRef::TableColumn { column, .. },
1259                ..
1260            } => {
1261                assert_eq!(column, "user_id");
1262            }
1263            other => panic!("expected column, got {other:?}"),
1264        }
1265    }
1266
1267    #[test]
1268    fn arithmetic_precedence_mul_over_add() {
1269        // a + b * c  →  Add(a, Mul(b, c))
1270        let e = parse("a + b * c");
1271        let Expr::BinaryOp {
1272            op: BinOp::Add,
1273            rhs,
1274            ..
1275        } = e
1276        else {
1277            panic!("root must be Add");
1278        };
1279        let Expr::BinaryOp { op: BinOp::Mul, .. } = *rhs else {
1280            panic!("rhs must be Mul");
1281        };
1282    }
1283
1284    #[test]
1285    fn arithmetic_left_associativity() {
1286        // a - b - c  →  Sub(Sub(a, b), c)
1287        let e = parse("a - b - c");
1288        let Expr::BinaryOp {
1289            op: BinOp::Sub,
1290            lhs,
1291            ..
1292        } = e
1293        else {
1294            panic!("root must be Sub");
1295        };
1296        let Expr::BinaryOp { op: BinOp::Sub, .. } = *lhs else {
1297            panic!("lhs must be Sub (left-assoc)");
1298        };
1299    }
1300
1301    #[test]
1302    fn parenthesised_override() {
1303        // (a + b) * c  →  Mul(Add(a, b), c)
1304        let e = parse("(a + b) * c");
1305        let Expr::BinaryOp {
1306            op: BinOp::Mul,
1307            lhs,
1308            ..
1309        } = e
1310        else {
1311            panic!("root must be Mul");
1312        };
1313        let Expr::BinaryOp { op: BinOp::Add, .. } = *lhs else {
1314            panic!("lhs must be Add");
1315        };
1316    }
1317
1318    #[test]
1319    fn comparison_binds_weaker_than_arith() {
1320        // a + 1 = b - 2
1321        //   →  Eq(Add(a, 1), Sub(b, 2))
1322        let e = parse("a + 1 = b - 2");
1323        let Expr::BinaryOp {
1324            op: BinOp::Eq,
1325            lhs,
1326            rhs,
1327            ..
1328        } = e
1329        else {
1330            panic!("root must be Eq");
1331        };
1332        assert!(matches!(*lhs, Expr::BinaryOp { op: BinOp::Add, .. }));
1333        assert!(matches!(*rhs, Expr::BinaryOp { op: BinOp::Sub, .. }));
1334    }
1335
1336    #[test]
1337    fn and_binds_tighter_than_or() {
1338        // a OR b AND c  →  Or(a, And(b, c))
1339        let e = parse("a OR b AND c");
1340        let Expr::BinaryOp {
1341            op: BinOp::Or, rhs, ..
1342        } = e
1343        else {
1344            panic!("root must be Or");
1345        };
1346        assert!(matches!(*rhs, Expr::BinaryOp { op: BinOp::And, .. }));
1347    }
1348
1349    #[test]
1350    fn unary_negation() {
1351        let e = parse("-a");
1352        let Expr::UnaryOp {
1353            op: UnaryOp::Neg, ..
1354        } = e
1355        else {
1356            panic!("expected unary Neg");
1357        };
1358    }
1359
1360    #[test]
1361    fn unary_not() {
1362        let e = parse("NOT a");
1363        let Expr::UnaryOp {
1364            op: UnaryOp::Not, ..
1365        } = e
1366        else {
1367            panic!("expected unary Not");
1368        };
1369    }
1370
1371    #[test]
1372    fn concat_operator() {
1373        let e = parse("'hello' || name");
1374        let Expr::BinaryOp {
1375            op: BinOp::Concat, ..
1376        } = e
1377        else {
1378            panic!("expected Concat");
1379        };
1380    }
1381
1382    #[test]
1383    fn cast_expr() {
1384        let e = parse("CAST(age AS TEXT)");
1385        let Expr::Cast { target, .. } = e else {
1386            panic!("expected Cast");
1387        };
1388        assert_eq!(target, DataType::Text);
1389    }
1390
1391    #[test]
1392    fn case_expr() {
1393        let e = parse("CASE WHEN a = 1 THEN 'one' WHEN a = 2 THEN 'two' ELSE 'other' END");
1394        let Expr::Case {
1395            branches, else_, ..
1396        } = e
1397        else {
1398            panic!("expected Case");
1399        };
1400        assert_eq!(branches.len(), 2);
1401        assert!(else_.is_some());
1402    }
1403
1404    #[test]
1405    fn simple_case_desugars_to_equality() {
1406        let e = parse("CASE id WHEN 1 THEN 'one' WHEN 2 THEN 'two' ELSE 'many' END");
1407        let Expr::Case {
1408            branches, else_, ..
1409        } = e
1410        else {
1411            panic!("expected Case");
1412        };
1413        assert_eq!(branches.len(), 2);
1414        assert!(else_.is_some());
1415        // Each WHEN value is rewritten to `selector = value`.
1416        for (cond, _) in &branches {
1417            let Expr::BinaryOp { op, lhs, .. } = cond else {
1418                panic!("expected desugared equality condition");
1419            };
1420            assert_eq!(*op, BinOp::Eq);
1421            assert!(matches!(**lhs, Expr::Column { .. }));
1422        }
1423    }
1424
1425    #[test]
1426    fn is_null_postfix() {
1427        let e = parse("name IS NULL");
1428        assert!(matches!(e, Expr::IsNull { negated: false, .. }));
1429    }
1430
1431    #[test]
1432    fn is_not_null_postfix() {
1433        let e = parse("name IS NOT NULL");
1434        assert!(matches!(e, Expr::IsNull { negated: true, .. }));
1435    }
1436
1437    #[test]
1438    fn between_with_columns() {
1439        let e = parse("temp BETWEEN min_t AND max_t");
1440        let Expr::Between {
1441            target,
1442            low,
1443            high,
1444            negated,
1445            ..
1446        } = e
1447        else {
1448            panic!("expected Between");
1449        };
1450        assert!(!negated);
1451        assert!(matches!(*target, Expr::Column { .. }));
1452        assert!(matches!(*low, Expr::Column { .. }));
1453        assert!(matches!(*high, Expr::Column { .. }));
1454    }
1455
1456    #[test]
1457    fn not_between_negates() {
1458        let e = parse("temp NOT BETWEEN 0 AND 100");
1459        let Expr::Between { negated: true, .. } = e else {
1460            panic!("expected negated Between");
1461        };
1462    }
1463
1464    #[test]
1465    fn in_list_literal() {
1466        let e = parse("status IN (1, 2, 3)");
1467        let Expr::InList {
1468            values, negated, ..
1469        } = e
1470        else {
1471            panic!("expected InList");
1472        };
1473        assert!(!negated);
1474        assert_eq!(values.len(), 3);
1475    }
1476
1477    #[test]
1478    fn not_in_list() {
1479        let e = parse("status NOT IN (1, 2)");
1480        let Expr::InList { negated: true, .. } = e else {
1481            panic!("expected negated InList");
1482        };
1483    }
1484
1485    #[test]
1486    fn function_call_with_args() {
1487        let e = parse("UPPER(name)");
1488        let Expr::FunctionCall { name, args, .. } = e else {
1489            panic!("expected FunctionCall");
1490        };
1491        assert_eq!(name, "UPPER");
1492        assert_eq!(args.len(), 1);
1493    }
1494
1495    #[test]
1496    fn nested_function_call() {
1497        let e = parse("COALESCE(a, UPPER(b))");
1498        let Expr::FunctionCall { name, args, .. } = e else {
1499            panic!("expected FunctionCall");
1500        };
1501        assert_eq!(name, "COALESCE");
1502        assert_eq!(args.len(), 2);
1503        assert!(matches!(&args[1], Expr::FunctionCall { .. }));
1504    }
1505
1506    #[test]
1507    fn duration_literal_parses_as_text() {
1508        let e = parse("time_bucket(5m)");
1509        let Expr::FunctionCall { name, args, .. } = e else {
1510            panic!("expected FunctionCall, got {e:?}");
1511        };
1512        assert_eq!(name.to_uppercase(), "TIME_BUCKET");
1513        assert_eq!(args.len(), 1);
1514        assert!(
1515            matches!(&args[0], Expr::Literal { value: Value::Text(s), .. } if s.as_ref() == "5m"),
1516            "expected Text(\"5m\"), got {:?}",
1517            args[0]
1518        );
1519    }
1520
1521    #[test]
1522    fn placeholder_dollar_one() {
1523        let e = parse("$1");
1524        match e {
1525            Expr::Parameter { index: 0, .. } => {}
1526            other => panic!("expected Parameter(0), got {other:?}"),
1527        }
1528    }
1529
1530    #[test]
1531    fn placeholder_dollar_n() {
1532        let e = parse("$7");
1533        match e {
1534            Expr::Parameter { index: 6, .. } => {}
1535            other => panic!("expected Parameter(6), got {other:?}"),
1536        }
1537    }
1538
1539    #[test]
1540    fn placeholder_in_string_literal_is_text() {
1541        // `$1` inside a string literal must NOT parse as a placeholder.
1542        let e = parse("'$1'");
1543        match e {
1544            Expr::Literal {
1545                value: Value::Text(s),
1546                ..
1547            } if s.as_ref() == "$1" => {}
1548            other => panic!("expected text literal '$1', got {other:?}"),
1549        }
1550    }
1551
1552    #[test]
1553    fn placeholder_in_comparison() {
1554        // SELECT-WHERE shape: `id = $1`
1555        let e = parse("id = $1");
1556        let Expr::BinaryOp {
1557            op: BinOp::Eq, rhs, ..
1558        } = e
1559        else {
1560            panic!("root must be Eq");
1561        };
1562        assert!(matches!(*rhs, Expr::Parameter { index: 0, .. }));
1563    }
1564
1565    #[test]
1566    fn placeholder_zero_rejected() {
1567        let mut parser = Parser::new("$0").expect("lexer");
1568        let err = parser.parse_expr().unwrap_err();
1569        assert!(err.to_string().contains("placeholder"));
1570    }
1571
1572    #[test]
1573    fn placeholder_question_single() {
1574        // Lone `?` numbered as parameter 1 (index 0).
1575        let e = parse("?");
1576        match e {
1577            Expr::Parameter { index: 0, .. } => {}
1578            other => panic!("expected Parameter(0), got {other:?}"),
1579        }
1580    }
1581
1582    #[test]
1583    fn placeholder_question_numbered() {
1584        let e = parse("?7");
1585        match e {
1586            Expr::Parameter { index: 6, .. } => {}
1587            other => panic!("expected Parameter(6), got {other:?}"),
1588        }
1589    }
1590
1591    #[test]
1592    fn placeholder_question_numbered_zero_rejected() {
1593        let mut parser = Parser::new("?0").expect("lexer");
1594        let err = parser.parse_expr().unwrap_err();
1595        assert!(err.to_string().contains("placeholder"));
1596    }
1597
1598    #[test]
1599    fn placeholder_question_left_to_right() {
1600        // `id = ? AND name = ?` → params 0 and 1
1601        let e = parse("id = ? AND name = ?");
1602        let Expr::BinaryOp {
1603            op: BinOp::And,
1604            lhs,
1605            rhs,
1606            ..
1607        } = e
1608        else {
1609            panic!("root must be And");
1610        };
1611        let Expr::BinaryOp {
1612            op: BinOp::Eq,
1613            rhs: r1,
1614            ..
1615        } = *lhs
1616        else {
1617            panic!("lhs must be Eq");
1618        };
1619        assert!(matches!(*r1, Expr::Parameter { index: 0, .. }));
1620        let Expr::BinaryOp {
1621            op: BinOp::Eq,
1622            rhs: r2,
1623            ..
1624        } = *rhs
1625        else {
1626            panic!("rhs must be Eq");
1627        };
1628        assert!(matches!(*r2, Expr::Parameter { index: 1, .. }));
1629    }
1630
1631    #[test]
1632    fn placeholder_question_in_string_literal_is_text() {
1633        let e = parse("'?'");
1634        match e {
1635            Expr::Literal {
1636                value: Value::Text(s),
1637                ..
1638            } if s.as_ref() == "?" => {}
1639            other => panic!("expected text literal '?', got {other:?}"),
1640        }
1641    }
1642
1643    #[test]
1644    fn placeholder_mixing_question_then_dollar_rejected() {
1645        let mut parser = Parser::new("id = ? AND x = $2").expect("lexer");
1646        let err = parser.parse_expr().err().expect("should fail");
1647        assert!(
1648            err.to_string().contains("mix"),
1649            "expected mixing error, got: {err}"
1650        );
1651    }
1652
1653    #[test]
1654    fn placeholder_mixing_dollar_then_question_rejected() {
1655        let mut parser = Parser::new("id = $1 AND x = ?").expect("lexer");
1656        let err = parser.parse_expr().err().expect("should fail");
1657        assert!(
1658            err.to_string().contains("mix"),
1659            "expected mixing error, got: {err}"
1660        );
1661    }
1662
1663    #[test]
1664    fn placeholder_question_in_comment_ignored() {
1665        // `?` inside an SQL line comment must not bump the counter.
1666        // The expression after the comment is the only param.
1667        let mut parser = Parser::new("-- ? ignored\n  ?").expect("lexer");
1668        let e = parser.parse_expr().expect("parse_expr");
1669        match e {
1670            Expr::Parameter { index: 0, .. } => {}
1671            other => panic!("expected Parameter(0), got {other:?}"),
1672        }
1673    }
1674
1675    #[test]
1676    fn unary_plus_is_noop() {
1677        let e = parse("+42");
1678        assert!(matches!(
1679            e,
1680            Expr::Literal {
1681                value: Value::Integer(42),
1682                ..
1683            }
1684        ));
1685    }
1686
1687    #[test]
1688    fn parenthesised_select_becomes_subquery_expr() {
1689        let e = parse("(SELECT 1)");
1690        assert!(matches!(e, Expr::Subquery { .. }));
1691    }
1692
1693    #[test]
1694    fn bare_zero_arg_current_functions_parse_as_calls() {
1695        for (input, expected) in [
1696            ("CURRENT_TIMESTAMP", "CURRENT_TIMESTAMP"),
1697            ("CURRENT_DATE", "CURRENT_DATE"),
1698            ("CURRENT_TIME", "CURRENT_TIME"),
1699        ] {
1700            let e = parse(input);
1701            let Expr::FunctionCall { name, args, .. } = e else {
1702                panic!("expected FunctionCall for {input}");
1703            };
1704            assert_eq!(name, expected);
1705            assert!(args.is_empty());
1706        }
1707    }
1708
1709    #[test]
1710    fn keyword_function_names_parse_as_calls() {
1711        for (input, expected_len) in [
1712            ("COUNT(*)", 1),
1713            ("SUM(amount)", 1),
1714            ("LEFT(name, 2)", 2),
1715            ("RIGHT(name, 2)", 2),
1716            ("CONTAINS(body, 'red')", 2),
1717            ("KV(cfg, path)", 2),
1718        ] {
1719            let e = parse(input);
1720            let Expr::FunctionCall { args, .. } = e else {
1721                panic!("expected FunctionCall for {input}");
1722            };
1723            assert_eq!(args.len(), expected_len, "{input}");
1724        }
1725    }
1726
1727    #[test]
1728    fn count_distinct_lowers_to_count_distinct_function() {
1729        let e = parse("COUNT(DISTINCT user_id)");
1730        let Expr::FunctionCall { name, args, .. } = e else {
1731            panic!("expected FunctionCall");
1732        };
1733        assert_eq!(name, "COUNT_DISTINCT");
1734        assert_eq!(args.len(), 1);
1735    }
1736
1737    #[test]
1738    fn dollar_secret_and_config_refs_become_function_calls() {
1739        for (input, expected_name, expected_key) in [
1740            ("$secret.api_key", "__SECRET_REF", "red.vault/api_key"),
1741            ("$red.secret.api_key", "__SECRET_REF", "red.vault/api_key"),
1742            ("$red.secrets.api_key", "__SECRET_REF", "red.vault/api_key"),
1743            ("$config.ai.provider", "CONFIG", "red.config/ai.provider"),
1744            (
1745                "$red.config.ai.provider",
1746                "CONFIG",
1747                "red.config/ai.provider",
1748            ),
1749        ] {
1750            let e = parse(input);
1751            let Expr::FunctionCall { name, args, .. } = e else {
1752                panic!("expected FunctionCall for {input}");
1753            };
1754            assert_eq!(name, expected_name);
1755            assert!(matches!(
1756                &args[..],
1757                [Expr::Literal { value: Value::Text(key), .. }] if key.as_ref() == expected_key
1758            ));
1759        }
1760    }
1761
1762    #[test]
1763    fn dollar_ref_rejects_unknown_namespace() {
1764        let mut parser = Parser::new("$tenant.id").expect("lexer");
1765        let err = parser
1766            .parse_expr()
1767            .expect_err("unknown namespace should fail");
1768        assert!(err.to_string().contains("unknown $ reference"));
1769    }
1770
1771    #[test]
1772    fn config_and_kv_bare_path_args_lowercase_to_text() {
1773        let e = parse("CONFIG(Red.AI.Default.Provider, 'openai')");
1774        let Expr::FunctionCall { name, args, .. } = e else {
1775            panic!("expected FunctionCall");
1776        };
1777        assert_eq!(name, "CONFIG");
1778        assert_eq!(args.len(), 2);
1779        assert!(matches!(
1780            &args[0],
1781            Expr::Literal { value: Value::Text(path), .. }
1782                if path.as_ref() == "red.ai.default.provider"
1783        ));
1784        assert!(matches!(
1785            &args[1],
1786            Expr::Literal { value: Value::Text(provider), .. } if provider.as_ref() == "openai"
1787        ));
1788
1789        let e = parse("KV(cfg, default.role, LOWER(name))");
1790        let Expr::FunctionCall { name, args, .. } = e else {
1791            panic!("expected FunctionCall");
1792        };
1793        assert_eq!(name, "KV");
1794        assert!(matches!(
1795            &args[0],
1796            Expr::Literal { value: Value::Text(path), .. } if path.as_ref() == "cfg"
1797        ));
1798        assert!(matches!(
1799            &args[1],
1800            Expr::Literal { value: Value::Text(path), .. } if path.as_ref() == "default.role"
1801        ));
1802        assert!(matches!(&args[2], Expr::FunctionCall { name, .. } if name == "LOWER"));
1803    }
1804
1805    #[test]
1806    fn cast_rejects_unknown_type_name() {
1807        let mut parser = Parser::new("CAST(age AS BOGUS_TYPE)").expect("lexer");
1808        let err = parser
1809            .parse_expr()
1810            .expect_err("unknown cast target should fail");
1811        assert!(err.to_string().contains("unknown type name"));
1812    }
1813
1814    #[test]
1815    fn trim_position_and_substring_sql_forms_lower_to_function_args() {
1816        let e = parse("TRIM(LEADING 'x' FROM name)");
1817        let Expr::FunctionCall { name, args, .. } = e else {
1818            panic!("expected trim function");
1819        };
1820        assert_eq!(name, "LTRIM");
1821        assert_eq!(args.len(), 2);
1822
1823        let e = parse("TRIM(TRAILING FROM name)");
1824        let Expr::FunctionCall { name, args, .. } = e else {
1825            panic!("expected trim function");
1826        };
1827        assert_eq!(name, "RTRIM");
1828        assert_eq!(args.len(), 1);
1829
1830        let e = parse("POSITION('x' IN name)");
1831        let Expr::FunctionCall { name, args, .. } = e else {
1832            panic!("expected position function");
1833        };
1834        assert_eq!(name, "POSITION");
1835        assert_eq!(args.len(), 2);
1836
1837        let e = parse("POSITION('x', name)");
1838        let Expr::FunctionCall { args, .. } = e else {
1839            panic!("expected position function");
1840        };
1841        assert_eq!(args.len(), 2);
1842
1843        let e = parse("SUBSTRING(name FROM 2 FOR 3)");
1844        let Expr::FunctionCall { name, args, .. } = e else {
1845            panic!("expected substring function");
1846        };
1847        assert_eq!(name, "SUBSTRING");
1848        assert_eq!(args.len(), 3);
1849
1850        let e = parse("SUBSTRING(name FOR 3)");
1851        let Expr::FunctionCall { args, .. } = e else {
1852            panic!("expected substring function");
1853        };
1854        assert_eq!(args.len(), 3);
1855        assert!(matches!(
1856            args[1],
1857            Expr::Literal {
1858                value: Value::Integer(1),
1859                ..
1860            }
1861        ));
1862    }
1863
1864    #[test]
1865    fn postfix_in_accepts_subquery_and_empty_list() {
1866        let e = parse("id IN (SELECT user_id FROM users)");
1867        let Expr::InList { values, .. } = e else {
1868            panic!("expected InList");
1869        };
1870        assert!(matches!(&values[..], [Expr::Subquery { .. }]));
1871
1872        let e = parse("id IN ()");
1873        let Expr::InList { values, .. } = e else {
1874            panic!("expected InList");
1875        };
1876        assert!(values.is_empty());
1877    }
1878
1879    #[test]
1880    fn postfix_not_requires_between_or_in() {
1881        let mut parser = Parser::new("status NOT NULL").expect("lexer");
1882        let err = parser.parse_expr().expect_err("postfix NOT should fail");
1883        assert!(err.to_string().contains("BETWEEN or IN"));
1884    }
1885
1886    #[test]
1887    fn case_reports_missing_then_end_and_empty_branch() {
1888        for input in [
1889            "CASE END",
1890            "CASE WHEN a = 1 'one' END",
1891            "CASE WHEN a = 1 THEN 'one'",
1892        ] {
1893            let mut parser = Parser::new(input).expect("lexer");
1894            assert!(
1895                parser.parse_expr().is_err(),
1896                "expected CASE parse failure for {input}"
1897            );
1898        }
1899    }
1900
1901    #[test]
1902    fn span_tracks_token_range() {
1903        // A literal's span must cover the exact tokens consumed.
1904        let mut parser = Parser::new("123 + 456").expect("lexer");
1905        let e = parser.parse_expr().expect("parse_expr");
1906        let span = e.span();
1907        assert!(!span.is_synthetic(), "root span must be real");
1908        assert!(span.start.offset < span.end.offset);
1909    }
1910
1911    // ====================================================================
1912    // Window OVER clause — issue #589 slice 7a
1913    // ====================================================================
1914
1915    fn try_parse(input: &str) -> Result<Expr, ParseError> {
1916        let mut parser = Parser::new(input).expect("lexer init");
1917        parser.parse_expr()
1918    }
1919
1920    #[test]
1921    fn window_lag_partition_and_order() {
1922        let e = parse("LAG(ts) OVER (PARTITION BY user_id ORDER BY ts)");
1923        let Expr::WindowFunctionCall {
1924            name, args, window, ..
1925        } = e
1926        else {
1927            panic!("expected WindowFunctionCall");
1928        };
1929        assert_eq!(name.to_uppercase(), "LAG");
1930        assert_eq!(args.len(), 1);
1931        assert_eq!(window.partition_by.len(), 1);
1932        assert_eq!(window.order_by.len(), 1);
1933        assert!(window.order_by[0].ascending);
1934        assert!(window.frame.is_none());
1935    }
1936
1937    #[test]
1938    fn window_row_number_empty_over() {
1939        let e = parse("ROW_NUMBER() OVER ()");
1940        let Expr::WindowFunctionCall {
1941            name, args, window, ..
1942        } = e
1943        else {
1944            panic!("expected WindowFunctionCall");
1945        };
1946        assert_eq!(name.to_uppercase(), "ROW_NUMBER");
1947        assert!(args.is_empty());
1948        assert!(window.partition_by.is_empty());
1949        assert!(window.order_by.is_empty());
1950        assert!(window.frame.is_none());
1951    }
1952
1953    #[test]
1954    fn window_sum_with_frame_rows_between() {
1955        let e = parse(
1956            "SUM(amount) OVER (PARTITION BY user_id ORDER BY ts \
1957             ROWS BETWEEN 2 PRECEDING AND CURRENT ROW)",
1958        );
1959        let Expr::WindowFunctionCall { name, window, .. } = e else {
1960            panic!("expected WindowFunctionCall");
1961        };
1962        assert_eq!(name.to_uppercase(), "SUM");
1963        let frame = window.frame.expect("frame present");
1964        assert!(matches!(frame.unit, crate::ast::WindowFrameUnit::Rows));
1965        assert!(matches!(
1966            frame.start,
1967            crate::ast::WindowFrameBound::Preceding(_)
1968        ));
1969        assert!(matches!(
1970            frame.end,
1971            Some(crate::ast::WindowFrameBound::CurrentRow)
1972        ));
1973    }
1974
1975    #[test]
1976    fn window_rank_order_desc_multiple_keys() {
1977        let e = parse("RANK() OVER (ORDER BY score DESC, ts)");
1978        let Expr::WindowFunctionCall { window, .. } = e else {
1979            panic!("expected WindowFunctionCall");
1980        };
1981        assert_eq!(window.order_by.len(), 2);
1982        assert!(!window.order_by[0].ascending);
1983        assert!(window.order_by[1].ascending);
1984    }
1985
1986    #[test]
1987    fn window_unbounded_preceding_following_frame() {
1988        let e = parse(
1989            "AVG(x) OVER (ORDER BY t \
1990             RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)",
1991        );
1992        let Expr::WindowFunctionCall { window, .. } = e else {
1993            panic!("expected WindowFunctionCall");
1994        };
1995        let frame = window.frame.expect("frame present");
1996        assert!(matches!(frame.unit, crate::ast::WindowFrameUnit::Range));
1997        assert!(matches!(
1998            frame.start,
1999            crate::ast::WindowFrameBound::UnboundedPreceding
2000        ));
2001        assert!(matches!(
2002            frame.end,
2003            Some(crate::ast::WindowFrameBound::UnboundedFollowing)
2004        ));
2005    }
2006
2007    #[test]
2008    fn window_rejects_non_window_function() {
2009        // UPPER is a scalar function, not eligible for OVER.
2010        let err = try_parse("UPPER(name) OVER (PARTITION BY id)")
2011            .err()
2012            .expect("should reject scalar OVER");
2013        let msg = err.to_string();
2014        assert!(
2015            msg.contains("UPPER") || msg.contains("upper"),
2016            "error should mention function name, got: {msg}"
2017        );
2018        assert!(msg.to_ascii_uppercase().contains("OVER") || msg.contains("window"));
2019    }
2020
2021    #[test]
2022    fn window_rejects_missing_open_paren() {
2023        let err = try_parse("LAG(ts) OVER PARTITION BY user_id")
2024            .err()
2025            .expect("should reject");
2026        let msg = err.to_string();
2027        assert!(
2028            msg.contains("(") || msg.to_ascii_uppercase().contains("EXPECTED"),
2029            "got: {msg}"
2030        );
2031    }
2032
2033    #[test]
2034    fn window_rejects_invalid_frame_syntax() {
2035        // CURRENT without ROW is malformed.
2036        let err = try_parse("LAG(ts) OVER (ORDER BY ts ROWS CURRENT)")
2037            .err()
2038            .expect("should reject");
2039        let msg = err.to_string();
2040        assert!(
2041            !msg.is_empty(),
2042            "expected non-empty error for malformed frame"
2043        );
2044    }
2045
2046    #[test]
2047    fn window_first_value_with_partition_only() {
2048        let e = parse("FIRST_VALUE(price) OVER (PARTITION BY symbol)");
2049        let Expr::WindowFunctionCall {
2050            name, window, args, ..
2051        } = e
2052        else {
2053            panic!("expected WindowFunctionCall");
2054        };
2055        assert_eq!(name.to_uppercase(), "FIRST_VALUE");
2056        assert_eq!(args.len(), 1);
2057        assert_eq!(window.partition_by.len(), 1);
2058        assert!(window.order_by.is_empty());
2059    }
2060
2061    #[test]
2062    fn window_order_nulls_first_and_last() {
2063        let e = parse("SUM(x) OVER (ORDER BY score ASC NULLS FIRST, ts DESC NULLS LAST)");
2064        let Expr::WindowFunctionCall { window, .. } = e else {
2065            panic!("expected WindowFunctionCall");
2066        };
2067        assert_eq!(window.order_by.len(), 2);
2068        assert!(window.order_by[0].ascending);
2069        assert!(window.order_by[0].nulls_first);
2070        assert!(!window.order_by[1].ascending);
2071        assert!(!window.order_by[1].nulls_first);
2072    }
2073
2074    #[test]
2075    fn window_single_bound_frames() {
2076        let e = parse("SUM(x) OVER (ORDER BY ts ROWS 3 PRECEDING)");
2077        let Expr::WindowFunctionCall { window, .. } = e else {
2078            panic!("expected WindowFunctionCall");
2079        };
2080        let frame = window.frame.expect("frame");
2081        assert!(matches!(
2082            frame.start,
2083            crate::ast::WindowFrameBound::Preceding(_)
2084        ));
2085        assert!(frame.end.is_none());
2086
2087        let e = parse("SUM(x) OVER (ORDER BY ts RANGE 1 FOLLOWING)");
2088        let Expr::WindowFunctionCall { window, .. } = e else {
2089            panic!("expected WindowFunctionCall");
2090        };
2091        let frame = window.frame.expect("frame");
2092        assert!(matches!(
2093            frame.start,
2094            crate::ast::WindowFrameBound::Following(_)
2095        ));
2096        assert!(frame.end.is_none());
2097    }
2098
2099    #[test]
2100    fn window_reports_nulls_and_frame_bound_errors() {
2101        for input in [
2102            "SUM(x) OVER (ORDER BY score NULLS MIDDLE)",
2103            "SUM(x) OVER (ORDER BY score ROWS UNBOUNDED)",
2104            "SUM(x) OVER (ORDER BY score ROWS 3)",
2105        ] {
2106            let err = try_parse(input).expect_err("window syntax should fail");
2107            assert!(!err.to_string().is_empty(), "{input}");
2108        }
2109    }
2110}