reddb_server/storage/query/parser/
expr.rs

1//! Pratt-style parser for the Fase 2 `Expr` AST.
2//!
3//! This module is the Week 2 deliverable of the parser v2 refactor
4//! tracked in `/home/cyber/.claude/plans/squishy-mixing-honey.md`.
5//! It produces `ast::Expr` trees with proper operator precedence,
6//! `Span` tracking from the lexer, and support for the full set of
7//! unary / binary / postfix operators the existing hand-rolled
8//! projection climb covers in Fase 1.3 — plus the missing pieces
9//! (CASE, CAST, parenthesised subexprs, IS NULL, IN, BETWEEN).
10//!
11//! # Design notes
12//!
13//! The parser is now the canonical entry point for SQL expression
14//! parsing in the table-query flow:
15//! - `SELECT` projections parse through `Parser::parse_expr`
16//! - `WHERE` / `HAVING` operands parse through `Parser::parse_expr`
17//! - `ORDER BY` expressions parse through `Parser::parse_expr`
18//!
19//! Some legacy AST slots are still adapter-based (`Projection`,
20//! `Filter`, `GROUP BY` strings), so statement parsing still lowers
21//! `Expr` trees into those older shapes at the boundary.
22//!
23//! # Precedence table (matches PG gram.y modulo features we don't have)
24//!
25//! ```text
26//! prec  operators
27//! ----  ----------------------------------
28//!  10   OR
29//!  20   AND
30//!  25   NOT                      (prefix)
31//!  30   = <> < <= > >=           (comparison)
32//!  32   IS NULL / IS NOT NULL    (postfix)
33//!  33   BETWEEN … AND …          (postfix)
34//!  34   IN (…)                   (postfix)
35//!  40   ||                       (string concat)
36//!  50   + -                      (additive)
37//!  60   * / %                    (multiplicative)
38//!  70   -                        (unary negation)
39//!  80   ::type  CAST(…AS type)   (explicit type coercion)
40//! ```
41//!
42//! Higher precedence binds tighter. The climb uses the classic
43//! "min-precedence" algorithm — `parse_expr_prec(min)` loops consuming
44//! any infix operator whose precedence is ≥ `min`, recursing with
45//! `prec + 1` on the right-hand side for left-associativity.
46
47use super::super::ast::{BinOp, Expr, ExprSubquery, FieldRef, Span, UnaryOp};
48use super::super::lexer::Token;
49use super::error::ParseError;
50use super::Parser;
51use super::PlaceholderMode;
52use crate::storage::schema::{DataType, Value};
53
54fn is_duration_unit(unit: &str) -> bool {
55    matches!(
56        unit.to_ascii_lowercase().as_str(),
57        "ms" | "msec"
58            | "millisecond"
59            | "milliseconds"
60            | "s"
61            | "sec"
62            | "secs"
63            | "second"
64            | "seconds"
65            | "m"
66            | "min"
67            | "mins"
68            | "minute"
69            | "minutes"
70            | "h"
71            | "hr"
72            | "hrs"
73            | "hour"
74            | "hours"
75            | "d"
76            | "day"
77            | "days"
78    )
79}
80
81fn keyword_function_name(token: &Token) -> Option<&'static str> {
82    match token {
83        Token::Count => Some("COUNT"),
84        Token::Sum => Some("SUM"),
85        Token::Avg => Some("AVG"),
86        Token::Min => Some("MIN"),
87        Token::Max => Some("MAX"),
88        Token::First => Some("FIRST"),
89        Token::Last => Some("LAST"),
90        Token::Left => Some("LEFT"),
91        Token::Right => Some("RIGHT"),
92        Token::Contains => Some("CONTAINS"),
93        Token::Kv => Some("KV"),
94        _ => None,
95    }
96}
97
98impl<'a> Parser<'a> {
99    /// Parse a complete expression at the lowest precedence level.
100    /// Entry point for every caller that wants an `Expr` tree.
101    pub fn parse_expr(&mut self) -> Result<Expr, ParseError> {
102        self.parse_expr_prec(0)
103    }
104
105    pub(crate) fn parse_expr_with_min_precedence(
106        &mut self,
107        min_prec: u8,
108    ) -> Result<Expr, ParseError> {
109        self.parse_expr_prec(min_prec)
110    }
111
112    /// Continue parsing an expression after the caller has already
113    /// materialized the left-hand side atom.
114    pub(crate) fn continue_expr(&mut self, left: Expr, min_prec: u8) -> Result<Expr, ParseError> {
115        self.parse_expr_suffix(left, min_prec)
116    }
117
118    /// Pratt climb: parse a unary atom then consume any infix operators
119    /// whose precedence meets or exceeds `min_prec`.
120    fn parse_expr_prec(&mut self, min_prec: u8) -> Result<Expr, ParseError> {
121        // Depth guard: every recursive descent point in the expr
122        // grammar bottoms out here, so checking once is enough to
123        // catch deeply nested literals like `((((((1))))))` and
124        // boolean chains like `NOT NOT NOT NOT … x`.
125        self.enter_depth()?;
126        let result = (|| {
127            let left = self.parse_expr_unary()?;
128            self.parse_expr_suffix(left, min_prec)
129        })();
130        self.exit_depth();
131        result
132    }
133
134    fn parse_expr_suffix(&mut self, mut left: Expr, min_prec: u8) -> Result<Expr, ParseError> {
135        loop {
136            let Some((op, prec)) = self.peek_binop() else {
137                // Not a standard infix op — check for postfix forms.
138                if min_prec <= 32 {
139                    if let Some(node) = self.try_parse_postfix(&left)? {
140                        left = node;
141                        continue;
142                    }
143                }
144                break;
145            };
146            if prec < min_prec {
147                break;
148            }
149            self.advance()?; // consume the operator token
150            let start_span = self.span_start_of(&left);
151            let rhs = self.parse_expr_prec(prec + 1)?;
152            let end_span = self.span_end_of(&rhs);
153            left = Expr::BinaryOp {
154                op,
155                lhs: Box::new(left),
156                rhs: Box::new(rhs),
157                span: Span::new(start_span, end_span),
158            };
159        }
160        Ok(left)
161    }
162
163    /// Parse a unary-prefix expression or drop through to the atomic
164    /// factor. Handles `NOT`, unary `-`, and `+` (no-op sign).
165    fn parse_expr_unary(&mut self) -> Result<Expr, ParseError> {
166        match self.peek() {
167            Token::Not => {
168                let start = self.position();
169                self.advance()?;
170                let operand = self.parse_expr_prec(25)?;
171                let end = self.span_end_of(&operand);
172                Ok(Expr::UnaryOp {
173                    op: UnaryOp::Not,
174                    operand: Box::new(operand),
175                    span: Span::new(start, end),
176                })
177            }
178            Token::Dash => {
179                let start = self.position();
180                self.advance()?;
181                let operand = self.parse_expr_prec(70)?;
182                let end = self.span_end_of(&operand);
183                Ok(Expr::UnaryOp {
184                    op: UnaryOp::Neg,
185                    operand: Box::new(operand),
186                    span: Span::new(start, end),
187                })
188            }
189            Token::Plus => {
190                // Unary plus is a no-op. Consume and recurse.
191                self.advance()?;
192                self.parse_expr_prec(70)
193            }
194            _ => self.parse_expr_factor(),
195        }
196    }
197
198    /// Parse a single atomic expression factor: literal, column ref,
199    /// parenthesised subexpression, CAST, CASE, or function call.
200    fn parse_expr_factor(&mut self) -> Result<Expr, ParseError> {
201        let start = self.position();
202
203        // Parenthesised subexpression: `( expr )`
204        if self.consume(&Token::LParen)? {
205            if self.check(&Token::Select) {
206                let query = self.parse_select_query()?;
207                self.expect(Token::RParen)?;
208                return Ok(Expr::Subquery {
209                    query: ExprSubquery {
210                        query: Box::new(query),
211                    },
212                    span: Span::new(start, self.position()),
213                });
214            }
215            let inner = self.parse_expr_prec(0)?;
216            self.expect(Token::RParen)?;
217            return Ok(inner);
218        }
219
220        // Literal: true / false / null
221        if self.consume(&Token::True)? {
222            return Ok(Expr::Literal {
223                value: Value::Boolean(true),
224                span: Span::new(start, self.position()),
225            });
226        }
227        if self.consume(&Token::False)? {
228            return Ok(Expr::Literal {
229                value: Value::Boolean(false),
230                span: Span::new(start, self.position()),
231            });
232        }
233        if self.consume(&Token::Null)? {
234            return Ok(Expr::Literal {
235                value: Value::Null,
236                span: Span::new(start, self.position()),
237            });
238        }
239
240        // Numeric literals — with optional duration-unit suffix (e.g. `5m`, `10s`, `2h`).
241        // Duration literals are emitted as Value::Text so downstream code sees "5m" verbatim
242        // (matching the legacy Projection::Column("LIT:5m") path used by time_bucket).
243        if let Token::Integer(n) = *self.peek() {
244            self.advance()?;
245            if let Token::Ident(ref unit) = *self.peek() {
246                if is_duration_unit(unit) {
247                    let duration = format!("{n}{}", unit.to_ascii_lowercase());
248                    self.advance()?;
249                    return Ok(Expr::Literal {
250                        value: Value::text(duration),
251                        span: Span::new(start, self.position()),
252                    });
253                }
254            }
255            return Ok(Expr::Literal {
256                value: Value::Integer(n),
257                span: Span::new(start, self.position()),
258            });
259        }
260        if let Token::Float(n) = *self.peek() {
261            self.advance()?;
262            return Ok(Expr::Literal {
263                value: Value::Float(n),
264                span: Span::new(start, self.position()),
265            });
266        }
267        if let Token::String(ref s) = *self.peek() {
268            let text = s.clone();
269            self.advance()?;
270            return Ok(Expr::Literal {
271                value: Value::text(text),
272                span: Span::new(start, self.position()),
273            });
274        }
275
276        // JSON object `{…}` and array `[…]` literals — delegate to the DML literal parser
277        // which already handles the full JSON value grammar including nested objects.
278        // `JsonLiteral` is the strict-JSON variant emitted by the lexer's sub-mode
279        // when `{` is followed by `"`; both shapes route through `parse_literal_value`.
280        if matches!(
281            self.peek(),
282            Token::LBrace | Token::LBracket | Token::JsonLiteral(_)
283        ) {
284            let value = self
285                .parse_literal_value()
286                .map_err(|e| ParseError::new(e.message, self.position()))?;
287            return Ok(Expr::Literal {
288                value,
289                span: Span::new(start, self.position()),
290            });
291        }
292
293        // `?` positional placeholder — auto-numbered left-to-right.
294        // Immediate `?N` uses an explicit 1-based index. Mixing with
295        // `$N` in one statement is rejected.
296        if self.check(&Token::Question) {
297            let (index, span) = self.parse_question_param_index()?;
298            return Ok(Expr::Parameter { index, span });
299        }
300
301        if self.consume(&Token::Dollar)? {
302            // `$N` positional parameter placeholder (1-based in source,
303            // 0-based in the AST so it matches `Vec<Value>` indexing).
304            // Rejected at parse time when N < 1; gaps and arity are
305            // validated by the binder once the full statement is parsed.
306            if let Token::Integer(n) = *self.peek() {
307                if n < 1 {
308                    return Err(ParseError::new(
309                        "placeholder index must be >= 1".to_string(),
310                        self.position(),
311                    ));
312                }
313                if self.placeholder_mode == PlaceholderMode::Question {
314                    return Err(ParseError::new(
315                        "cannot mix `?` and `$N` placeholders in one statement".to_string(),
316                        self.position(),
317                    ));
318                }
319                self.placeholder_mode = PlaceholderMode::Dollar;
320                self.advance()?;
321                return Ok(Expr::Parameter {
322                    index: (n - 1) as usize,
323                    span: Span::new(start, self.position()),
324                });
325            }
326            let path = self.parse_dollar_ref_path()?;
327            let path_lc = path.to_ascii_lowercase();
328            let (name, key) = if let Some(rest) = path_lc.strip_prefix("secret.") {
329                ("__SECRET_REF", format!("red.vault/{rest}"))
330            } else if path_lc.starts_with("red.secret.") {
331                let rest = path_lc.trim_start_matches("red.secret.");
332                ("__SECRET_REF", format!("red.vault/{rest}"))
333            } else if let Some(rest) = path_lc.strip_prefix("config.") {
334                ("CONFIG", format!("red.config/{rest}"))
335            } else if path_lc.starts_with("red.config.") {
336                let rest = path_lc.trim_start_matches("red.config.");
337                ("CONFIG", format!("red.config/{rest}"))
338            } else {
339                return Err(ParseError::new(
340                    format!(
341                        "unknown $ reference `${path}`; expected $secret.*, $red.secret.*, $config.*, or $red.config.*"
342                    ),
343                    self.position(),
344                ));
345            };
346            return Ok(Expr::FunctionCall {
347                name: name.to_string(),
348                args: vec![Expr::Literal {
349                    value: Value::text(key),
350                    span: Span::new(start, self.position()),
351                }],
352                span: Span::new(start, self.position()),
353            });
354        }
355
356        if let Some(name) = keyword_function_name(self.peek()) {
357            if matches!(self.peek_next()?, Token::LParen) {
358                self.advance()?; // consume the keyword token
359                return self.parse_function_call_expr_with_name(start, name.to_string());
360            }
361        }
362
363        // Identifier-led constructs: function call, CAST, CASE, column.
364        //
365        // We commit to consuming the identifier immediately and then
366        // inspect the NEXT token to decide shape. This avoids needing
367        // two-token lookahead on the parser. If the next token is `(`
368        // it's a function call; if `.` it's a qualified column ref;
369        // otherwise it's a bare column ref.
370        if let Token::Ident(ref name) = *self.peek() {
371            let name_upper = name.to_uppercase();
372
373            // CAST(expr AS type) — must test before consuming because
374            // CAST is not a reserved keyword; users could legitimately
375            // have a column literally named `cast`. Distinguish by
376            // looking at whether the identifier equals CAST AND is
377            // immediately followed by `(`. Since we can't two-step
378            // lookahead, handle CAST by parsing the ident, then if the
379            // uppercased name is CAST and the next token is `(`,
380            // switch to the CAST form; otherwise the saved name
381            // becomes the first segment of a column ref.
382            if name_upper == "CASE" {
383                return self.parse_case_expr(start);
384            }
385
386            let saved_name = name.clone();
387            self.advance()?; // consume the identifier unconditionally
388
389            // Function call / CAST: IDENT (
390            if matches!(self.peek(), Token::LParen) {
391                return self.parse_function_call_expr_with_name(start, saved_name);
392            }
393
394            // Qualified column: IDENT.IDENT[.IDENT …]
395            if matches!(self.peek(), Token::Dot) {
396                let mut segments = vec![saved_name];
397                while self.consume(&Token::Dot)? {
398                    segments.push(self.expect_ident_or_keyword()?);
399                }
400                let field = FieldRef::TableColumn {
401                    table: segments.remove(0),
402                    column: segments.join("."),
403                };
404                let end = self.position();
405                return Ok(Expr::Column {
406                    field,
407                    span: Span::new(start, end),
408                });
409            }
410
411            // Bare column reference with empty table name.
412            let field = FieldRef::TableColumn {
413                table: String::new(),
414                column: saved_name,
415            };
416            let end = self.position();
417            return Ok(Expr::Column {
418                field,
419                span: Span::new(start, end),
420            });
421        }
422
423        // Default: column reference (optionally qualified: table.column).
424        // Reached only when the leading token is not an Ident. Falls
425        // through to parse_field_ref which handles keyword-shaped
426        // column names.
427        let field = self.parse_field_ref()?;
428        let end = self.position();
429        Ok(Expr::Column {
430            field,
431            span: Span::new(start, end),
432        })
433    }
434
435    fn parse_dollar_ref_path(&mut self) -> Result<String, ParseError> {
436        let mut path = self.expect_ident_or_keyword()?;
437        while self.consume(&Token::Dot)? {
438            let next = self.expect_ident_or_keyword()?;
439            path = format!("{path}.{next}");
440        }
441        Ok(path)
442    }
443
444    fn parse_function_call_expr_with_name(
445        &mut self,
446        start: crate::storage::query::lexer::Position,
447        function_name: String,
448    ) -> Result<Expr, ParseError> {
449        self.expect(Token::LParen)?;
450
451        if function_name.eq_ignore_ascii_case("CAST") {
452            let inner = self.parse_expr_prec(0)?;
453            self.expect(Token::As)?;
454            let type_name = self.expect_ident_or_keyword()?;
455            self.expect(Token::RParen)?;
456            let end = self.position();
457            let Some(target) = DataType::from_sql_name(&type_name) else {
458                return Err(ParseError::new(
459                    // F-05: `type_name` is caller-controlled identifier text.
460                    // Render via `{:?}` so embedded CR/LF/NUL/quotes are
461                    // escaped before reaching downstream serialization sinks.
462                    format!("unknown type name {type_name:?} in CAST"),
463                    self.position(),
464                ));
465            };
466            return Ok(Expr::Cast {
467                inner: Box::new(inner),
468                target,
469                span: Span::new(start, end),
470            });
471        }
472
473        if function_name.eq_ignore_ascii_case("TRIM") {
474            let (name, args) = self.parse_trim_expr_args()?;
475            self.expect(Token::RParen)?;
476            let end = self.position();
477            return Ok(Expr::FunctionCall {
478                name,
479                args,
480                span: Span::new(start, end),
481            });
482        }
483
484        if function_name.eq_ignore_ascii_case("POSITION") {
485            let args = self.parse_position_expr_args()?;
486            self.expect(Token::RParen)?;
487            let end = self.position();
488            return Ok(Expr::FunctionCall {
489                name: function_name,
490                args,
491                span: Span::new(start, end),
492            });
493        }
494
495        if function_name.eq_ignore_ascii_case("SUBSTRING") {
496            let args = self.parse_substring_expr_args()?;
497            self.expect(Token::RParen)?;
498            let end = self.position();
499            return Ok(Expr::FunctionCall {
500                name: function_name,
501                args,
502                span: Span::new(start, end),
503            });
504        }
505
506        if function_name.eq_ignore_ascii_case("COUNT") {
507            if self.consume(&Token::Distinct)? {
508                let arg = self.parse_expr_prec(0)?;
509                self.expect(Token::RParen)?;
510                let end = self.position();
511                return Ok(Expr::FunctionCall {
512                    name: "COUNT_DISTINCT".to_string(),
513                    args: vec![arg],
514                    span: Span::new(start, end),
515                });
516            }
517
518            if self.consume(&Token::Star)? {
519                self.expect(Token::RParen)?;
520                let end = self.position();
521                return Ok(Expr::FunctionCall {
522                    name: function_name,
523                    args: vec![Expr::Column {
524                        field: FieldRef::TableColumn {
525                            table: String::new(),
526                            column: "*".to_string(),
527                        },
528                        span: Span::synthetic(),
529                    }],
530                    span: Span::new(start, end),
531                });
532            }
533        }
534
535        let mut args = Vec::new();
536        if !self.check(&Token::RParen) {
537            loop {
538                args.push(self.parse_expr_prec(0)?);
539                if !self.consume(&Token::Comma)? {
540                    break;
541                }
542            }
543        }
544        self.expect(Token::RParen)?;
545        let end = self.position();
546        Ok(Expr::FunctionCall {
547            name: function_name,
548            args,
549            span: Span::new(start, end),
550        })
551    }
552
553    /// Parse `CASE WHEN cond THEN val [WHEN …] [ELSE val] END`.
554    /// Assumes the caller has already peeked `CASE`.
555    fn parse_case_expr(
556        &mut self,
557        start: crate::storage::query::lexer::Position,
558    ) -> Result<Expr, ParseError> {
559        self.advance()?; // consume CASE
560        let mut branches: Vec<(Expr, Expr)> = Vec::new();
561        loop {
562            if !self.consume_ident_ci("WHEN")? {
563                break;
564            }
565            let cond = self.parse_expr_prec(0)?;
566            if !self.consume_ident_ci("THEN")? {
567                return Err(ParseError::new(
568                    "expected THEN after CASE WHEN condition".to_string(),
569                    self.position(),
570                ));
571            }
572            let then_val = self.parse_expr_prec(0)?;
573            branches.push((cond, then_val));
574        }
575        if branches.is_empty() {
576            return Err(ParseError::new(
577                "CASE must have at least one WHEN branch".to_string(),
578                self.position(),
579            ));
580        }
581        let else_ = if self.consume_ident_ci("ELSE")? {
582            Some(Box::new(self.parse_expr_prec(0)?))
583        } else {
584            None
585        };
586        if !self.consume_ident_ci("END")? {
587            return Err(ParseError::new(
588                "expected END to close CASE expression".to_string(),
589                self.position(),
590            ));
591        }
592        let end = self.position();
593        Ok(Expr::Case {
594            branches,
595            else_,
596            span: Span::new(start, end),
597        })
598    }
599
600    fn parse_trim_expr_args(&mut self) -> Result<(String, Vec<Expr>), ParseError> {
601        let mut function_name = "TRIM".to_string();
602
603        if self.consume_ident_ci("LEADING")? {
604            function_name = "LTRIM".to_string();
605        } else if self.consume_ident_ci("TRAILING")? {
606            function_name = "RTRIM".to_string();
607        } else if self.consume_ident_ci("BOTH")? {
608            function_name = "TRIM".to_string();
609        }
610
611        if self.consume(&Token::From)? {
612            let source = self.parse_expr_prec(0)?;
613            return Ok((function_name, vec![source]));
614        }
615
616        let first = self.parse_expr_prec(0)?;
617
618        if self.consume(&Token::Comma)? {
619            let second = self.parse_expr_prec(0)?;
620            return Ok((function_name, vec![first, second]));
621        }
622
623        if self.consume(&Token::From)? {
624            let source = self.parse_expr_prec(0)?;
625            return Ok((function_name, vec![source, first]));
626        }
627
628        Ok((function_name, vec![first]))
629    }
630
631    /// PostgreSQL-style `POSITION(substr IN string)` or plain
632    /// `POSITION(substr, string)` lowered to the ordinary two-argument
633    /// function form.
634    fn parse_position_expr_args(&mut self) -> Result<Vec<Expr>, ParseError> {
635        // `IN` is also a postfix operator in the main expression grammar, so
636        // parse the first operand above postfix-IN precedence and then consume
637        // the function's `IN` keyword explicitly.
638        let needle = self.parse_expr_prec(35)?;
639        if !self.consume(&Token::Comma)? {
640            self.expect(Token::In)?;
641        }
642        let haystack = self.parse_expr_prec(0)?;
643        Ok(vec![needle, haystack])
644    }
645
646    /// PostgreSQL-style `SUBSTRING` syntax:
647    /// - `SUBSTRING(expr FROM start [FOR count])`
648    /// - `SUBSTRING(expr FOR count [FROM start])`
649    /// - plain function-call form `SUBSTRING(expr, start[, count])`
650    ///
651    /// The SQL-syntax variants are desugared to the comma-arg form so the
652    /// rest of the stack sees the same `Expr::FunctionCall` shape.
653    fn parse_substring_expr_args(&mut self) -> Result<Vec<Expr>, ParseError> {
654        let source = self.parse_expr_prec(0)?;
655
656        if self.consume(&Token::Comma)? {
657            let mut args = vec![source];
658            loop {
659                args.push(self.parse_expr_prec(0)?);
660                if !self.consume(&Token::Comma)? {
661                    break;
662                }
663            }
664            return Ok(args);
665        }
666
667        if self.consume(&Token::From)? {
668            let start = self.parse_expr_prec(0)?;
669            if self.consume(&Token::For)? {
670                let count = self.parse_expr_prec(0)?;
671                return Ok(vec![source, start, count]);
672            }
673            return Ok(vec![source, start]);
674        }
675
676        if self.consume(&Token::For)? {
677            let count = self.parse_expr_prec(0)?;
678            if self.consume(&Token::From)? {
679                let start = self.parse_expr_prec(0)?;
680                return Ok(vec![source, start, count]);
681            }
682            return Ok(vec![source, Expr::lit(Value::Integer(1)), count]);
683        }
684
685        Ok(vec![source])
686    }
687
688    /// Try to consume a postfix operator on top of the already-parsed
689    /// `left` expression: `IS [NOT] NULL`, `[NOT] BETWEEN … AND …`,
690    /// `[NOT] IN (…)`. Returns `Ok(None)` if no postfix follows.
691    ///
692    /// NOT at this position is unambiguous — prefix `NOT` is always
693    /// consumed at `parse_expr_unary` level before reaching postfix.
694    /// So seeing `NOT` here means the user wrote `x NOT BETWEEN …`
695    /// or `x NOT IN …`; we consume it eagerly and require BETWEEN
696    /// or IN to follow.
697    fn try_parse_postfix(&mut self, left: &Expr) -> Result<Option<Expr>, ParseError> {
698        let start = self.span_start_of(left);
699
700        // IS [NOT] NULL
701        if self.consume(&Token::Is)? {
702            let negated = self.consume(&Token::Not)?;
703            self.expect(Token::Null)?;
704            let end = self.position();
705            return Ok(Some(Expr::IsNull {
706                operand: Box::new(left.clone()),
707                negated,
708                span: Span::new(start, end),
709            }));
710        }
711
712        // Detect NOT BETWEEN / NOT IN. NOT is consumed eagerly — we
713        // don't have two-token lookahead and the grammar guarantees
714        // no other valid postfix starts with NOT.
715        let negated = if matches!(self.peek(), Token::Not) {
716            self.advance()?;
717            if !matches!(self.peek(), Token::Between | Token::In) {
718                return Err(ParseError::new(
719                    "expected BETWEEN or IN after postfix NOT".to_string(),
720                    self.position(),
721                ));
722            }
723            true
724        } else {
725            false
726        };
727
728        // BETWEEN low AND high
729        if self.consume(&Token::Between)? {
730            let low = self.parse_expr_prec(34)?;
731            self.expect(Token::And)?;
732            let high = self.parse_expr_prec(34)?;
733            let end = self.position();
734            return Ok(Some(Expr::Between {
735                target: Box::new(left.clone()),
736                low: Box::new(low),
737                high: Box::new(high),
738                negated,
739                span: Span::new(start, end),
740            }));
741        }
742
743        // IN (v1, v2, …)
744        if self.consume(&Token::In)? {
745            self.expect(Token::LParen)?;
746            let mut values = Vec::new();
747            if self.check(&Token::Select) {
748                let query = self.parse_select_query()?;
749                values.push(Expr::Subquery {
750                    query: ExprSubquery {
751                        query: Box::new(query),
752                    },
753                    span: Span::new(self.span_start_of(left), self.position()),
754                });
755            } else if !self.check(&Token::RParen) {
756                loop {
757                    values.push(self.parse_expr_prec(0)?);
758                    if !self.consume(&Token::Comma)? {
759                        break;
760                    }
761                }
762            }
763            self.expect(Token::RParen)?;
764            let end = self.position();
765            return Ok(Some(Expr::InList {
766                target: Box::new(left.clone()),
767                values,
768                negated,
769                span: Span::new(start, end),
770            }));
771        }
772
773        if negated {
774            // Unreachable because the early-return above already
775            // validated NOT is followed by BETWEEN or IN. Guarded
776            // to keep callers loud if the grammar grows later.
777            return Err(ParseError::new(
778                "internal: NOT consumed without BETWEEN/IN follow".to_string(),
779                self.position(),
780            ));
781        }
782        Ok(None)
783    }
784
785    /// Peek the current token and translate it into a `BinOp` plus
786    /// its precedence. Returns `None` if the token is not a recognised
787    /// infix operator — the caller then tries postfix handling.
788    fn peek_binop(&self) -> Option<(BinOp, u8)> {
789        let op = match self.peek() {
790            Token::Or => BinOp::Or,
791            Token::And => BinOp::And,
792            Token::Eq => BinOp::Eq,
793            Token::Ne => BinOp::Ne,
794            Token::Lt => BinOp::Lt,
795            Token::Le => BinOp::Le,
796            Token::Gt => BinOp::Gt,
797            Token::Ge => BinOp::Ge,
798            Token::DoublePipe => BinOp::Concat,
799            Token::Plus => BinOp::Add,
800            Token::Dash => BinOp::Sub,
801            Token::Star => BinOp::Mul,
802            Token::Slash => BinOp::Div,
803            Token::Percent => BinOp::Mod,
804            _ => return None,
805        };
806        Some((op, op.precedence()))
807    }
808
809    /// Return the start position of an expression's span. Handles the
810    /// synthetic case by falling back to the current parser cursor,
811    /// which is good enough for the Pratt climb since the caller just
812    /// parsed the atom.
813    fn span_start_of(&self, expr: &Expr) -> crate::storage::query::lexer::Position {
814        let s = expr.span();
815        if s.is_synthetic() {
816            self.position()
817        } else {
818            s.start
819        }
820    }
821
822    /// Return the end position of an expression's span — same
823    /// synthetic fallback as `span_start_of`.
824    fn span_end_of(&self, expr: &Expr) -> crate::storage::query::lexer::Position {
825        let s = expr.span();
826        if s.is_synthetic() {
827            self.position()
828        } else {
829            s.end
830        }
831    }
832}
833
834// Avoid `unused` lints in partial-migration builds where the analyzer
835// still does not consume every expression shape directly.
836#[allow(dead_code)]
837fn _expr_module_used(_: Expr) {}
838
839#[cfg(test)]
840mod tests {
841    use super::*;
842    use crate::storage::query::ast::FieldRef;
843
844    fn parse(input: &str) -> Expr {
845        let mut parser = Parser::new(input).expect("lexer init");
846        let expr = parser.parse_expr().expect("parse_expr");
847        expr
848    }
849
850    #[test]
851    fn literal_integer() {
852        let e = parse("42");
853        match e {
854            Expr::Literal {
855                value: Value::Integer(42),
856                ..
857            } => {}
858            other => panic!("expected Integer(42), got {other:?}"),
859        }
860    }
861
862    #[test]
863    fn literal_float() {
864        let e = parse("3.14");
865        match e {
866            Expr::Literal {
867                value: Value::Float(f),
868                ..
869            } => assert!((f - 3.14).abs() < 1e-9),
870            other => panic!("expected float literal, got {other:?}"),
871        }
872    }
873
874    #[test]
875    fn literal_string() {
876        let e = parse("'hello'");
877        match e {
878            Expr::Literal {
879                value: Value::Text(ref s),
880                ..
881            } if s.as_ref() == "hello" => {}
882            other => panic!("expected Text(hello), got {other:?}"),
883        }
884    }
885
886    #[test]
887    fn literal_booleans_and_null() {
888        assert!(matches!(
889            parse("TRUE"),
890            Expr::Literal {
891                value: Value::Boolean(true),
892                ..
893            }
894        ));
895        assert!(matches!(
896            parse("FALSE"),
897            Expr::Literal {
898                value: Value::Boolean(false),
899                ..
900            }
901        ));
902        assert!(matches!(
903            parse("NULL"),
904            Expr::Literal {
905                value: Value::Null,
906                ..
907            }
908        ));
909    }
910
911    #[test]
912    fn bare_column() {
913        let e = parse("user_id");
914        match e {
915            Expr::Column {
916                field: FieldRef::TableColumn { column, .. },
917                ..
918            } => {
919                assert_eq!(column, "user_id");
920            }
921            other => panic!("expected column, got {other:?}"),
922        }
923    }
924
925    #[test]
926    fn arithmetic_precedence_mul_over_add() {
927        // a + b * c  →  Add(a, Mul(b, c))
928        let e = parse("a + b * c");
929        let Expr::BinaryOp {
930            op: BinOp::Add,
931            rhs,
932            ..
933        } = e
934        else {
935            panic!("root must be Add");
936        };
937        let Expr::BinaryOp { op: BinOp::Mul, .. } = *rhs else {
938            panic!("rhs must be Mul");
939        };
940    }
941
942    #[test]
943    fn arithmetic_left_associativity() {
944        // a - b - c  →  Sub(Sub(a, b), c)
945        let e = parse("a - b - c");
946        let Expr::BinaryOp {
947            op: BinOp::Sub,
948            lhs,
949            ..
950        } = e
951        else {
952            panic!("root must be Sub");
953        };
954        let Expr::BinaryOp { op: BinOp::Sub, .. } = *lhs else {
955            panic!("lhs must be Sub (left-assoc)");
956        };
957    }
958
959    #[test]
960    fn parenthesised_override() {
961        // (a + b) * c  →  Mul(Add(a, b), c)
962        let e = parse("(a + b) * c");
963        let Expr::BinaryOp {
964            op: BinOp::Mul,
965            lhs,
966            ..
967        } = e
968        else {
969            panic!("root must be Mul");
970        };
971        let Expr::BinaryOp { op: BinOp::Add, .. } = *lhs else {
972            panic!("lhs must be Add");
973        };
974    }
975
976    #[test]
977    fn comparison_binds_weaker_than_arith() {
978        // a + 1 = b - 2
979        //   →  Eq(Add(a, 1), Sub(b, 2))
980        let e = parse("a + 1 = b - 2");
981        let Expr::BinaryOp {
982            op: BinOp::Eq,
983            lhs,
984            rhs,
985            ..
986        } = e
987        else {
988            panic!("root must be Eq");
989        };
990        assert!(matches!(*lhs, Expr::BinaryOp { op: BinOp::Add, .. }));
991        assert!(matches!(*rhs, Expr::BinaryOp { op: BinOp::Sub, .. }));
992    }
993
994    #[test]
995    fn and_binds_tighter_than_or() {
996        // a OR b AND c  →  Or(a, And(b, c))
997        let e = parse("a OR b AND c");
998        let Expr::BinaryOp {
999            op: BinOp::Or, rhs, ..
1000        } = e
1001        else {
1002            panic!("root must be Or");
1003        };
1004        assert!(matches!(*rhs, Expr::BinaryOp { op: BinOp::And, .. }));
1005    }
1006
1007    #[test]
1008    fn unary_negation() {
1009        let e = parse("-a");
1010        let Expr::UnaryOp {
1011            op: UnaryOp::Neg, ..
1012        } = e
1013        else {
1014            panic!("expected unary Neg");
1015        };
1016    }
1017
1018    #[test]
1019    fn unary_not() {
1020        let e = parse("NOT a");
1021        let Expr::UnaryOp {
1022            op: UnaryOp::Not, ..
1023        } = e
1024        else {
1025            panic!("expected unary Not");
1026        };
1027    }
1028
1029    #[test]
1030    fn concat_operator() {
1031        let e = parse("'hello' || name");
1032        let Expr::BinaryOp {
1033            op: BinOp::Concat, ..
1034        } = e
1035        else {
1036            panic!("expected Concat");
1037        };
1038    }
1039
1040    #[test]
1041    fn cast_expr() {
1042        let e = parse("CAST(age AS TEXT)");
1043        let Expr::Cast { target, .. } = e else {
1044            panic!("expected Cast");
1045        };
1046        assert_eq!(target, DataType::Text);
1047    }
1048
1049    #[test]
1050    fn case_expr() {
1051        let e = parse("CASE WHEN a = 1 THEN 'one' WHEN a = 2 THEN 'two' ELSE 'other' END");
1052        let Expr::Case {
1053            branches, else_, ..
1054        } = e
1055        else {
1056            panic!("expected Case");
1057        };
1058        assert_eq!(branches.len(), 2);
1059        assert!(else_.is_some());
1060    }
1061
1062    #[test]
1063    fn is_null_postfix() {
1064        let e = parse("name IS NULL");
1065        assert!(matches!(e, Expr::IsNull { negated: false, .. }));
1066    }
1067
1068    #[test]
1069    fn is_not_null_postfix() {
1070        let e = parse("name IS NOT NULL");
1071        assert!(matches!(e, Expr::IsNull { negated: true, .. }));
1072    }
1073
1074    #[test]
1075    fn between_with_columns() {
1076        let e = parse("temp BETWEEN min_t AND max_t");
1077        let Expr::Between {
1078            target,
1079            low,
1080            high,
1081            negated,
1082            ..
1083        } = e
1084        else {
1085            panic!("expected Between");
1086        };
1087        assert!(!negated);
1088        assert!(matches!(*target, Expr::Column { .. }));
1089        assert!(matches!(*low, Expr::Column { .. }));
1090        assert!(matches!(*high, Expr::Column { .. }));
1091    }
1092
1093    #[test]
1094    fn not_between_negates() {
1095        let e = parse("temp NOT BETWEEN 0 AND 100");
1096        let Expr::Between { negated: true, .. } = e else {
1097            panic!("expected negated Between");
1098        };
1099    }
1100
1101    #[test]
1102    fn in_list_literal() {
1103        let e = parse("status IN (1, 2, 3)");
1104        let Expr::InList {
1105            values, negated, ..
1106        } = e
1107        else {
1108            panic!("expected InList");
1109        };
1110        assert!(!negated);
1111        assert_eq!(values.len(), 3);
1112    }
1113
1114    #[test]
1115    fn not_in_list() {
1116        let e = parse("status NOT IN (1, 2)");
1117        let Expr::InList { negated: true, .. } = e else {
1118            panic!("expected negated InList");
1119        };
1120    }
1121
1122    #[test]
1123    fn function_call_with_args() {
1124        let e = parse("UPPER(name)");
1125        let Expr::FunctionCall { name, args, .. } = e else {
1126            panic!("expected FunctionCall");
1127        };
1128        assert_eq!(name, "UPPER");
1129        assert_eq!(args.len(), 1);
1130    }
1131
1132    #[test]
1133    fn nested_function_call() {
1134        let e = parse("COALESCE(a, UPPER(b))");
1135        let Expr::FunctionCall { name, args, .. } = e else {
1136            panic!("expected FunctionCall");
1137        };
1138        assert_eq!(name, "COALESCE");
1139        assert_eq!(args.len(), 2);
1140        assert!(matches!(&args[1], Expr::FunctionCall { .. }));
1141    }
1142
1143    #[test]
1144    fn duration_literal_parses_as_text() {
1145        let e = parse("time_bucket(5m)");
1146        let Expr::FunctionCall { name, args, .. } = e else {
1147            panic!("expected FunctionCall, got {e:?}");
1148        };
1149        assert_eq!(name.to_uppercase(), "TIME_BUCKET");
1150        assert_eq!(args.len(), 1);
1151        assert!(
1152            matches!(&args[0], Expr::Literal { value: Value::Text(s), .. } if s.as_ref() == "5m"),
1153            "expected Text(\"5m\"), got {:?}",
1154            args[0]
1155        );
1156    }
1157
1158    #[test]
1159    fn placeholder_dollar_one() {
1160        let e = parse("$1");
1161        match e {
1162            Expr::Parameter { index: 0, .. } => {}
1163            other => panic!("expected Parameter(0), got {other:?}"),
1164        }
1165    }
1166
1167    #[test]
1168    fn placeholder_dollar_n() {
1169        let e = parse("$7");
1170        match e {
1171            Expr::Parameter { index: 6, .. } => {}
1172            other => panic!("expected Parameter(6), got {other:?}"),
1173        }
1174    }
1175
1176    #[test]
1177    fn placeholder_in_string_literal_is_text() {
1178        // `$1` inside a string literal must NOT parse as a placeholder.
1179        let e = parse("'$1'");
1180        match e {
1181            Expr::Literal {
1182                value: Value::Text(s),
1183                ..
1184            } if s.as_ref() == "$1" => {}
1185            other => panic!("expected text literal '$1', got {other:?}"),
1186        }
1187    }
1188
1189    #[test]
1190    fn placeholder_in_comparison() {
1191        // SELECT-WHERE shape: `id = $1`
1192        let e = parse("id = $1");
1193        let Expr::BinaryOp {
1194            op: BinOp::Eq, rhs, ..
1195        } = e
1196        else {
1197            panic!("root must be Eq");
1198        };
1199        assert!(matches!(*rhs, Expr::Parameter { index: 0, .. }));
1200    }
1201
1202    #[test]
1203    fn placeholder_zero_rejected() {
1204        let mut parser = Parser::new("$0").expect("lexer");
1205        let err = parser.parse_expr().unwrap_err();
1206        assert!(err.to_string().contains("placeholder"));
1207    }
1208
1209    #[test]
1210    fn placeholder_question_single() {
1211        // Lone `?` numbered as parameter 1 (index 0).
1212        let e = parse("?");
1213        match e {
1214            Expr::Parameter { index: 0, .. } => {}
1215            other => panic!("expected Parameter(0), got {other:?}"),
1216        }
1217    }
1218
1219    #[test]
1220    fn placeholder_question_numbered() {
1221        let e = parse("?7");
1222        match e {
1223            Expr::Parameter { index: 6, .. } => {}
1224            other => panic!("expected Parameter(6), got {other:?}"),
1225        }
1226    }
1227
1228    #[test]
1229    fn placeholder_question_numbered_zero_rejected() {
1230        let mut parser = Parser::new("?0").expect("lexer");
1231        let err = parser.parse_expr().unwrap_err();
1232        assert!(err.to_string().contains("placeholder"));
1233    }
1234
1235    #[test]
1236    fn placeholder_question_left_to_right() {
1237        // `id = ? AND name = ?` → params 0 and 1
1238        let e = parse("id = ? AND name = ?");
1239        let Expr::BinaryOp {
1240            op: BinOp::And,
1241            lhs,
1242            rhs,
1243            ..
1244        } = e
1245        else {
1246            panic!("root must be And");
1247        };
1248        let Expr::BinaryOp {
1249            op: BinOp::Eq,
1250            rhs: r1,
1251            ..
1252        } = *lhs
1253        else {
1254            panic!("lhs must be Eq");
1255        };
1256        assert!(matches!(*r1, Expr::Parameter { index: 0, .. }));
1257        let Expr::BinaryOp {
1258            op: BinOp::Eq,
1259            rhs: r2,
1260            ..
1261        } = *rhs
1262        else {
1263            panic!("rhs must be Eq");
1264        };
1265        assert!(matches!(*r2, Expr::Parameter { index: 1, .. }));
1266    }
1267
1268    #[test]
1269    fn placeholder_question_in_string_literal_is_text() {
1270        let e = parse("'?'");
1271        match e {
1272            Expr::Literal {
1273                value: Value::Text(s),
1274                ..
1275            } if s.as_ref() == "?" => {}
1276            other => panic!("expected text literal '?', got {other:?}"),
1277        }
1278    }
1279
1280    #[test]
1281    fn placeholder_mixing_question_then_dollar_rejected() {
1282        let mut parser = Parser::new("id = ? AND x = $2").expect("lexer");
1283        let err = parser.parse_expr().err().expect("should fail");
1284        assert!(
1285            err.to_string().contains("mix"),
1286            "expected mixing error, got: {err}"
1287        );
1288    }
1289
1290    #[test]
1291    fn placeholder_mixing_dollar_then_question_rejected() {
1292        let mut parser = Parser::new("id = $1 AND x = ?").expect("lexer");
1293        let err = parser.parse_expr().err().expect("should fail");
1294        assert!(
1295            err.to_string().contains("mix"),
1296            "expected mixing error, got: {err}"
1297        );
1298    }
1299
1300    #[test]
1301    fn placeholder_question_in_comment_ignored() {
1302        // `?` inside an SQL line comment must not bump the counter.
1303        // The expression after the comment is the only param.
1304        let mut parser = Parser::new("-- ? ignored\n  ?").expect("lexer");
1305        let e = parser.parse_expr().expect("parse_expr");
1306        match e {
1307            Expr::Parameter { index: 0, .. } => {}
1308            other => panic!("expected Parameter(0), got {other:?}"),
1309        }
1310    }
1311
1312    #[test]
1313    fn span_tracks_token_range() {
1314        // A literal's span must cover the exact tokens consumed.
1315        let mut parser = Parser::new("123 + 456").expect("lexer");
1316        let e = parser.parse_expr().expect("parse_expr");
1317        let span = e.span();
1318        assert!(!span.is_synthetic(), "root span must be real");
1319        assert!(span.start.offset < span.end.offset);
1320    }
1321}
reddb_server/storage/query/parser/expr.rs

reddb_server/storage/query/parser/
expr.rs