Skip to main content

spg_sql/
parser.rs

1//! Recursive-descent parser with a Pratt (precedence-climbing) sub-parser for
2//! expressions.
3//!
4//! Precedence (lowest → highest binding):
5//! `OR` (1) `<` `AND` (2) `<` `NOT` unary (3) `<`
6//! comparisons `=` `<>` `<` `<=` `>` `>=` (4) `<`
7//! `+` `-` (5) `<` `*` `/` (6) `<` unary `-` (7) `<` parens / atom.
8//!
9//! This matches PG's behaviour for the operators we support — e.g. `NOT a = b`
10//! parses as `NOT (a = b)` and `-a * b` as `(-a) * b`.
11
12use alloc::boxed::Box;
13use alloc::format;
14use alloc::string::{String, ToString};
15use alloc::vec;
16use alloc::vec::Vec;
17use core::fmt;
18use core::mem;
19
20use crate::ast::{
21    BinOp, CastTarget, ColumnDef, ColumnName, ColumnTypeName, CreateIndexStatement,
22    CreatePublicationStatement, CreateSubscriptionStatement, CreateTableStatement, Expr,
23    ExtractField, FkAction, ForeignKeyConstraint, FrameBound, FrameKind, FromClause, FromJoin,
24    IndexMethod, InsertStatement, JoinKind, Literal, NullTreatment, OrderBy, PublicationScope,
25    SelectItem, SelectStatement, Statement, TableRef, UnOp, UnionKind, VecEncoding, WindowFrame,
26};
27use crate::lexer::{self, LexError, Token};
28
29#[derive(Debug, Clone, PartialEq, Eq)]
30pub struct ParseError {
31    pub message: String,
32    /// Index into the token stream where parsing tripped. Not a byte offset.
33    pub token_pos: usize,
34}
35
36impl fmt::Display for ParseError {
37    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
38        write!(
39            f,
40            "parse error at token #{}: {}",
41            self.token_pos, self.message
42        )
43    }
44}
45
46impl From<LexError> for ParseError {
47    fn from(e: LexError) -> Self {
48        Self {
49            message: format!("lex: {e}"),
50            token_pos: 0,
51        }
52    }
53}
54
55/// Parse exactly one statement, swallow an optional trailing `;`, and require
56/// the token stream to end there.
57pub fn parse_statement(input: &str) -> Result<Statement, ParseError> {
58    let tokens = lexer::tokenize(input)?;
59    let mut p = Parser::new(tokens);
60    let stmt = p.parse_one_statement()?;
61    if matches!(p.peek(), Token::Semicolon) {
62        p.advance();
63    }
64    p.expect_eof()?;
65    Ok(stmt)
66}
67
68struct Parser {
69    tokens: Vec<Token>,
70    pos: usize,
71}
72
73impl Parser {
74    fn new(tokens: Vec<Token>) -> Self {
75        Self { tokens, pos: 0 }
76    }
77
78    fn peek(&self) -> &Token {
79        // tokens always ends with Eof; pos is clamped in advance().
80        &self.tokens[self.pos]
81    }
82
83    fn advance(&mut self) -> Token {
84        let t = mem::replace(&mut self.tokens[self.pos], Token::Eof);
85        if self.pos + 1 < self.tokens.len() {
86            self.pos += 1;
87        }
88        t
89    }
90
91    fn err(&self, message: String) -> ParseError {
92        ParseError {
93            message,
94            token_pos: self.pos,
95        }
96    }
97
98    fn expect_eof(&self) -> Result<(), ParseError> {
99        if matches!(self.peek(), Token::Eof) {
100            Ok(())
101        } else {
102            Err(self.err(format!("expected end of input, got {:?}", self.peek())))
103        }
104    }
105
106    fn expect_ident_like(&mut self) -> Result<String, ParseError> {
107        match self.advance() {
108            Token::Ident(s) | Token::QuotedIdent(s) => Ok(s),
109            other => Err(ParseError {
110                message: format!("expected identifier, got {other:?}"),
111                token_pos: self.pos.saturating_sub(1),
112            }),
113        }
114    }
115
116    #[allow(clippy::too_many_lines)]
117    fn parse_one_statement(&mut self) -> Result<Statement, ParseError> {
118        match self.peek() {
119            Token::Select => self.parse_select_stmt(),
120            // v4.11: `WITH name AS (SELECT ...) [, ...] SELECT ...`.
121            // WITH isn't a reserved token in our lexer — comes through
122            // as `Token::Ident("with")` (case-insensitive).
123            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("with") => {
124                self.advance();
125                self.parse_with_cte_then_select()
126            }
127            // v4.26: `EXPLAIN [ANALYZE] <select>`. Comes through as
128            // an identifier — not a reserved keyword.
129            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("explain") => {
130                self.advance();
131                let mut analyze = false;
132                let mut suggest = false;
133                // v6.8.3 — `EXPLAIN (SUGGEST)` opt-in.
134                if matches!(self.peek(), Token::LParen) {
135                    self.advance();
136                    let opt = match self.peek().clone() {
137                        Token::Ident(s) | Token::QuotedIdent(s) => s,
138                        other => {
139                            return Err(self.err(format!(
140                                "expected option keyword inside EXPLAIN (…), got {other:?}"
141                            )));
142                        }
143                    };
144                    if !opt.eq_ignore_ascii_case("suggest") {
145                        return Err(self.err(format!(
146                            "unknown EXPLAIN option {opt:?}; v6.8.3 supports SUGGEST"
147                        )));
148                    }
149                    self.advance();
150                    if !matches!(self.peek(), Token::RParen) {
151                        return Err(self.err(format!(
152                            "expected ')' after EXPLAIN option, got {:?}",
153                            self.peek()
154                        )));
155                    }
156                    self.advance();
157                    suggest = true;
158                } else if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
159                    && (s.eq_ignore_ascii_case("analyze") || s.eq_ignore_ascii_case("analyse"))
160                {
161                    self.advance();
162                    analyze = true;
163                }
164                let inner = self.parse_select_stmt()?;
165                let Statement::Select(s) = inner else {
166                    return Err(self.err(format!("EXPLAIN body must be a SELECT, got {inner:?}")));
167                };
168                Ok(Statement::Explain(crate::ast::ExplainStatement {
169                    analyze,
170                    inner: Box::new(s),
171                    suggest,
172                }))
173            }
174            Token::Create => self.parse_create_stmt(),
175            Token::Insert => self.parse_insert_stmt(),
176            Token::Begin => {
177                self.advance();
178                Ok(Statement::Begin)
179            }
180            Token::Commit => {
181                self.advance();
182                Ok(Statement::Commit)
183            }
184            Token::Rollback => {
185                self.advance();
186                // `ROLLBACK TO [SAVEPOINT] <name>` returns to that
187                // savepoint without ending the transaction. Bare
188                // `ROLLBACK` drops the whole TX.
189                if matches!(self.peek(), Token::To) {
190                    self.advance();
191                    if matches!(self.peek(), Token::Savepoint) {
192                        self.advance();
193                    }
194                    let name = self.expect_ident_like()?;
195                    Ok(Statement::RollbackToSavepoint(name))
196                } else {
197                    Ok(Statement::Rollback)
198                }
199            }
200            Token::Savepoint => {
201                self.advance();
202                let name = self.expect_ident_like()?;
203                Ok(Statement::Savepoint(name))
204            }
205            Token::Release => {
206                self.advance();
207                // `RELEASE [SAVEPOINT] <name>` — the `SAVEPOINT` keyword
208                // is optional in standard SQL.
209                if matches!(self.peek(), Token::Savepoint) {
210                    self.advance();
211                }
212                let name = self.expect_ident_like()?;
213                Ok(Statement::ReleaseSavepoint(name))
214            }
215            Token::Show => {
216                self.advance();
217                // `SHOW TABLES` / `SHOW USERS` / `SHOW COLUMNS FROM <table>`.
218                // v6.1.2 promoted TABLES to a reserved keyword (for
219                // `CREATE PUBLICATION … FOR ALL TABLES`), so it now
220                // arrives as `Token::Tables` rather than a bare ident.
221                // USERS / COLUMNS remain bare idents.
222                let target = match self.advance() {
223                    Token::Tables => "tables".to_string(),
224                    Token::Ident(s) | Token::QuotedIdent(s) => s.to_ascii_lowercase(),
225                    other => {
226                        return Err(self.err(format!(
227                            "expected SHOW target, got {other:?}"
228                        )));
229                    }
230                };
231                match target.as_str() {
232                    "tables" => Ok(Statement::ShowTables),
233                    "users" => Ok(Statement::ShowUsers),
234                    // v6.1.3 — PUBLICATIONS plural is NOT a reserved
235                    // keyword on its own; it lands here as a bare
236                    // ident. Returning all publications + their
237                    // scope summary.
238                    "publications" => Ok(Statement::ShowPublications),
239                    // v6.1.4 — same shape for SUBSCRIPTIONS plural.
240                    "subscriptions" => Ok(Statement::ShowSubscriptions),
241                    "columns" => {
242                        if !matches!(self.peek(), Token::From) {
243                            return Err(self.err(format!(
244                                "expected FROM after SHOW COLUMNS, got {:?}",
245                                self.peek()
246                            )));
247                        }
248                        self.advance();
249                        let table = self.expect_ident_like()?;
250                        Ok(Statement::ShowColumns(table))
251                    }
252                    other => Err(self.err(format!(
253                        "unknown SHOW target {other:?}; supported: TABLES, COLUMNS, USERS, PUBLICATIONS"
254                    ))),
255                }
256            }
257            // v6.1.2: `DROP` is now a reserved keyword (it dispatches
258            // to DROP USER and DROP PUBLICATION today; DROP TABLE /
259            // DROP INDEX are still SHOW-shaped admin ops). Pre-6.1.2
260            // arrived as a bare ident; tokenising it dedicatedly
261            // keeps the dispatch tree small.
262            Token::Drop => {
263                self.advance();
264                match self.peek() {
265                    Token::Publication => {
266                        self.advance();
267                        let name = self.expect_ident_or_string()?;
268                        Ok(Statement::DropPublication(name))
269                    }
270                    Token::Subscription => {
271                        self.advance();
272                        let name = self.expect_ident_or_string()?;
273                        Ok(Statement::DropSubscription(name))
274                    }
275                    Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("user") => {
276                        self.advance();
277                        let name = self.expect_ident_or_string()?;
278                        Ok(Statement::DropUser(name))
279                    }
280                    other => Err(self.err(format!(
281                        "expected USER / PUBLICATION / SUBSCRIPTION after DROP, got {other:?}"
282                    ))),
283                }
284            }
285            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("update") => {
286                self.advance();
287                self.parse_update_after_keyword()
288            }
289            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("delete") => {
290                self.advance();
291                self.parse_delete_after_keyword()
292            }
293            // v6.0.4: ALTER INDEX <name> REBUILD [WITH (encoding = ...)].
294            // ALTER is not a reserved keyword in the lexer — handled
295            // as a bare ident here.
296            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("alter") => {
297                self.advance();
298                self.parse_alter_after_keyword()
299            }
300            // v6.1.7: WAIT FOR WAL POSITION <pos> [WITH TIMEOUT <ms>].
301            // WAIT / POSITION / TIMEOUT are bare idents — no lexer
302            // additions needed.
303            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("wait") => {
304                self.advance();
305                self.parse_wait_after_keyword()
306            }
307            // v6.2.0: ANALYZE [<table>]. ANALYZE is a bare ident.
308            // Bare ANALYZE → analyse every user table; ANALYZE
309            // <name> → re-stats one. The argument is an optional
310            // ident (or quoted ident); anything else is a parse
311            // error.
312            // v6.7.3 — `COMPACT COLD SEGMENTS`. No arguments, no
313            // `WHERE` filter (carved out per V6_7_DESIGN.md
314            // STABILITY). Lex order: identifier "compact" → "cold"
315            // → "segments". Anything else after `COMPACT` is a
316            // parse error.
317            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("compact") => {
318                self.advance();
319                let next = self.peek().clone();
320                let cold = match next {
321                    Token::Ident(s) | Token::QuotedIdent(s) => s,
322                    _ => {
323                        return Err(
324                            self.err(format!("expected COLD after COMPACT, got {:?}", self.peek()))
325                        );
326                    }
327                };
328                if !cold.eq_ignore_ascii_case("cold") {
329                    return Err(self.err(format!("expected COLD after COMPACT, got {cold:?}")));
330                }
331                self.advance();
332                let next = self.peek().clone();
333                let segments = match next {
334                    Token::Ident(s) | Token::QuotedIdent(s) => s,
335                    _ => {
336                        return Err(self.err(format!(
337                            "expected SEGMENTS after COMPACT COLD, got {:?}",
338                            self.peek()
339                        )));
340                    }
341                };
342                if !segments.eq_ignore_ascii_case("segments") {
343                    return Err(self.err(format!(
344                        "expected SEGMENTS after COMPACT COLD, got {segments:?}"
345                    )));
346                }
347                self.advance();
348                Ok(Statement::CompactColdSegments)
349            }
350            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("analyze") => {
351                self.advance();
352                let target = match self.peek() {
353                    Token::Eof | Token::Semicolon => None,
354                    Token::Ident(_) | Token::QuotedIdent(_) => {
355                        Some(self.expect_ident_like()?)
356                    }
357                    other => {
358                        return Err(self.err(format!(
359                            "expected table name or end of statement after ANALYZE, got {other:?}"
360                        )));
361                    }
362                };
363                Ok(Statement::Analyze(target))
364            }
365            other => Err(self.err(format!(
366                "expected SELECT / CREATE / DROP / INSERT / UPDATE / DELETE / ALTER / BEGIN / COMMIT / \
367                 ROLLBACK / SAVEPOINT / RELEASE / SHOW at start of statement, got {other:?}"
368            ))),
369        }
370    }
371
372    fn parse_create_stmt(&mut self) -> Result<Statement, ParseError> {
373        debug_assert!(matches!(self.peek(), Token::Create));
374        self.advance();
375        match self.peek() {
376            Token::Table => self.parse_create_table_stmt_after_create(),
377            Token::Index => self.parse_create_index_stmt_after_create(),
378            Token::Publication => {
379                self.advance();
380                self.parse_create_publication_after_keyword()
381            }
382            Token::Subscription => {
383                self.advance();
384                self.parse_create_subscription_after_keyword()
385            }
386            // v4.1: CREATE USER 'name' WITH PASSWORD 'pw' [ROLE 'role'].
387            // USER isn't a reserved keyword — we look for the bare
388            // identifier so the lexer doesn't have to grow a token.
389            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("user") => {
390                self.advance();
391                self.parse_create_user_after_keyword()
392            }
393            other => Err(self.err(format!(
394                "expected TABLE / INDEX / USER / PUBLICATION / SUBSCRIPTION after CREATE, got {other:?}"
395            ))),
396        }
397    }
398
399    /// v6.1.2 → v6.1.3 — `CREATE PUBLICATION <name>` body. Accepts:
400    ///   - (no clause) → implicit `FOR ALL TABLES`
401    ///   - `FOR ALL TABLES`
402    ///   - `FOR ALL TABLES EXCEPT t1, t2, …` (v6.1.3)
403    ///   - `FOR TABLE t1, t2, …` (v6.1.3) — `FOR TABLES …` also
404    ///     accepted (PG accepts both forms in PG 19).
405    fn parse_create_publication_after_keyword(&mut self) -> Result<Statement, ParseError> {
406        let name = self.expect_ident_or_string()?;
407        // Bare DDL maps to FOR ALL TABLES — matches the v6.1.2
408        // shape so existing publications keep parsing identically.
409        let scope = if matches!(self.peek(), Token::For) {
410            self.advance();
411            if matches!(self.peek(), Token::All) {
412                self.advance();
413                if !matches!(self.peek(), Token::Tables) {
414                    return Err(self.err(format!(
415                        "expected TABLES after FOR ALL, got {:?}",
416                        self.peek()
417                    )));
418                }
419                self.advance();
420                if matches!(self.peek(), Token::Except) {
421                    self.advance();
422                    let tables = self.parse_publication_table_list()?;
423                    PublicationScope::AllTablesExcept(tables)
424                } else {
425                    PublicationScope::AllTables
426                }
427            } else if matches!(self.peek(), Token::Table | Token::Tables) {
428                // PG 19 accepts both `FOR TABLE …` (singular) and
429                // `FOR TABLES …` (plural); SPG matches.
430                self.advance();
431                let tables = self.parse_publication_table_list()?;
432                PublicationScope::ForTables(tables)
433            } else {
434                return Err(self.err(format!(
435                    "expected ALL TABLES or TABLE <list> after FOR, got {:?}",
436                    self.peek()
437                )));
438            }
439        } else {
440            PublicationScope::AllTables
441        };
442        Ok(Statement::CreatePublication(CreatePublicationStatement {
443            name,
444            scope,
445        }))
446    }
447
448    /// v6.1.3 — Comma-separated identifier list for the publication
449    /// FOR-clause. Requires at least one entry; empty list is a
450    /// parse error (PG behaviour). Quoted idents are accepted; the
451    /// names round-trip through `Display` as `quote_ident(name)`.
452    fn parse_publication_table_list(&mut self) -> Result<Vec<String>, ParseError> {
453        let first = self.expect_ident_like()?;
454        let mut out = alloc::vec![first];
455        while matches!(self.peek(), Token::Comma) {
456            self.advance();
457            out.push(self.expect_ident_like()?);
458        }
459        Ok(out)
460    }
461
462    /// v6.1.4 — `CREATE SUBSCRIPTION <name>
463    ///                 CONNECTION '<conn>'
464    ///                 PUBLICATION <pub> [, <pub> ...]`.
465    ///
466    /// The clause order is fixed (CONNECTION first, then
467    /// PUBLICATION) to match PG. No WITH-options accepted in
468    /// v6.1.4 — `enabled` defaults to true, no other knobs ship.
469    fn parse_create_subscription_after_keyword(&mut self) -> Result<Statement, ParseError> {
470        let name = self.expect_ident_or_string()?;
471        if !matches!(self.peek(), Token::Connection) {
472            return Err(self.err(format!(
473                "expected CONNECTION after CREATE SUBSCRIPTION <name>, got {:?}",
474                self.peek()
475            )));
476        }
477        self.advance();
478        let conn_str = self.expect_string_literal()?;
479        if !matches!(self.peek(), Token::Publication) {
480            return Err(self.err(format!(
481                "expected PUBLICATION after CONNECTION '<conn>', got {:?}",
482                self.peek()
483            )));
484        }
485        self.advance();
486        // Reuse the publication FOR-list parser shape: at least one
487        // identifier, comma-separated.
488        let first = self.expect_ident_like()?;
489        let mut publications = alloc::vec![first];
490        while matches!(self.peek(), Token::Comma) {
491            self.advance();
492            publications.push(self.expect_ident_like()?);
493        }
494        Ok(Statement::CreateSubscription(
495            CreateSubscriptionStatement {
496                name,
497                conn_str,
498                publications,
499            },
500        ))
501    }
502
503    /// v6.1.7 — `WAIT FOR WAL POSITION <pos> [WITH TIMEOUT <ms>]`.
504    /// All keywords after `WAIT` are bare idents in v6.1.x; no
505    /// lexer churn. Both `<pos>` and `<ms>` are positive integers
506    /// that fit `u64`.
507    fn parse_wait_after_keyword(&mut self) -> Result<Statement, ParseError> {
508        // FOR is a v6.1.2-reserved keyword (Token::For). The
509        // other two are bare idents — they've never needed lexer
510        // support and we keep it that way.
511        if !matches!(self.peek(), Token::For) {
512            return Err(self.err(format!(
513                "expected FOR after WAIT, got {:?}",
514                self.peek()
515            )));
516        }
517        self.advance();
518        self.expect_keyword_ident("wal")?;
519        self.expect_keyword_ident("position")?;
520        let pos = self.expect_u64_literal()?;
521        let timeout_ms = if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("with"))
522        {
523            self.advance();
524            self.expect_keyword_ident("timeout")?;
525            Some(self.expect_u64_literal()?)
526        } else {
527            None
528        };
529        Ok(Statement::WaitForWalPosition { pos, timeout_ms })
530    }
531
532    /// v6.1.7 helper — consume a `Token::Integer` and check it
533    /// fits `u64`. WAL positions and millisecond timeouts are
534    /// non-negative.
535    fn expect_u64_literal(&mut self) -> Result<u64, ParseError> {
536        match self.advance() {
537            Token::Integer(n) if n >= 0 => Ok(n as u64),
538            Token::Integer(n) => Err(ParseError {
539                message: format!("expected non-negative integer, got {n}"),
540                token_pos: self.pos.saturating_sub(1),
541            }),
542            other => Err(ParseError {
543                message: format!("expected integer literal, got {other:?}"),
544                token_pos: self.pos.saturating_sub(1),
545            }),
546        }
547    }
548
549    /// `CREATE USER` body — name + WITH PASSWORD '<pw>' + optional
550    /// ROLE '<role>' (defaults to readonly). All string slots accept
551    /// either a quoted ident or a quoted string literal.
552    fn parse_create_user_after_keyword(&mut self) -> Result<Statement, ParseError> {
553        let name = self.expect_ident_or_string()?;
554        self.expect_keyword_ident("with")?;
555        self.expect_keyword_ident("password")?;
556        let password = self.expect_string_literal()?;
557        let role = if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
558            && s.eq_ignore_ascii_case("role")
559        {
560            self.advance();
561            self.expect_string_literal()?
562        } else {
563            "readonly".to_string()
564        };
565        Ok(Statement::CreateUser(crate::ast::CreateUserStatement {
566            name,
567            password,
568            role,
569        }))
570    }
571
572    /// v4.4 `UPDATE <table> SET col = expr [, col = expr]* [WHERE cond]`.
573    /// Caller already consumed the leading `UPDATE` ident.
574    fn parse_update_after_keyword(&mut self) -> Result<Statement, ParseError> {
575        let table = self.expect_ident_like()?;
576        self.expect_keyword_ident("set")?;
577        let mut assignments = Vec::new();
578        loop {
579            let col = self.expect_ident_like()?;
580            if !matches!(self.peek(), Token::Eq) {
581                return Err(self.err(format!(
582                    "expected `=` after column name in UPDATE SET, got {:?}",
583                    self.peek()
584                )));
585            }
586            self.advance();
587            let value = self.parse_expr(0)?;
588            assignments.push((col, value));
589            if matches!(self.peek(), Token::Comma) {
590                self.advance();
591                continue;
592            }
593            break;
594        }
595        let where_ = if matches!(self.peek(), Token::Where) {
596            self.advance();
597            Some(self.parse_expr(0)?)
598        } else {
599            None
600        };
601        Ok(Statement::Update(crate::ast::UpdateStatement {
602            table,
603            assignments,
604            where_,
605        }))
606    }
607
608    /// v4.4 `DELETE FROM <table> [WHERE cond]`. Caller already consumed
609    /// the leading `DELETE` ident.
610    fn parse_delete_after_keyword(&mut self) -> Result<Statement, ParseError> {
611        if !matches!(self.peek(), Token::From) {
612            return Err(self.err(format!("expected FROM after DELETE, got {:?}", self.peek())));
613        }
614        self.advance();
615        let table = self.expect_ident_like()?;
616        let where_ = if matches!(self.peek(), Token::Where) {
617            self.advance();
618            Some(self.parse_expr(0)?)
619        } else {
620            None
621        };
622        Ok(Statement::Delete(crate::ast::DeleteStatement {
623            table,
624            where_,
625        }))
626    }
627
628    /// v6.0.4 — parse the tail of an ALTER statement after the
629    /// leading `ALTER` keyword has been consumed. Only one form is
630    /// supported in v6.0.4:
631    ///
632    /// ```text
633    /// ALTER INDEX <name> REBUILD [WITH (encoding = <enc>)]
634    /// ```
635    fn parse_alter_after_keyword(&mut self) -> Result<Statement, ParseError> {
636        // ALTER INDEX <name> ... | ALTER TABLE <name> SET hot_tier_bytes = <n>
637        match self.advance() {
638            Token::Index => {}
639            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("index") => {}
640            // v6.7.2 — ALTER TABLE t SET hot_tier_bytes = X
641            Token::Table => return self.parse_alter_table_after_keyword(),
642            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("table") => {
643                return self.parse_alter_table_after_keyword();
644            }
645            other => {
646                return Err(self.err(format!("expected INDEX or TABLE after ALTER, got {other:?}")));
647            }
648        }
649        let name = self.expect_ident_like()?;
650        // REBUILD
651        self.expect_keyword_ident("rebuild")?;
652        // Optional: WITH (encoding = <enc>)
653        let encoding = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("with")) {
654            self.advance();
655            if !matches!(self.peek(), Token::LParen) {
656                return Err(self.err(format!(
657                    "expected '(' after WITH in ALTER INDEX REBUILD, got {:?}",
658                    self.peek()
659                )));
660            }
661            self.advance();
662            self.expect_keyword_ident("encoding")?;
663            if !matches!(self.peek(), Token::Eq) {
664                return Err(self.err(format!(
665                    "expected '=' after encoding in ALTER INDEX REBUILD, got {:?}",
666                    self.peek()
667                )));
668            }
669            self.advance();
670            let enc_ident = match self.advance() {
671                Token::Ident(s) | Token::QuotedIdent(s) => s,
672                other => {
673                    return Err(self.err(format!("expected encoding name after =, got {other:?}")));
674                }
675            };
676            let enc = match enc_ident.to_ascii_lowercase().as_str() {
677                "f32" => VecEncoding::F32,
678                "sq8" => VecEncoding::Sq8,
679                "half" => VecEncoding::F16,
680                other => {
681                    return Err(self.err(format!(
682                        "unknown vector encoding {other:?} in ALTER INDEX REBUILD; supported: F32, SQ8, HALF"
683                    )));
684                }
685            };
686            if !matches!(self.peek(), Token::RParen) {
687                return Err(self.err(format!(
688                    "expected ')' after encoding value, got {:?}",
689                    self.peek()
690                )));
691            }
692            self.advance();
693            Some(enc)
694        } else {
695            None
696        };
697        Ok(Statement::AlterIndex(crate::ast::AlterIndexStatement {
698            name,
699            target: crate::ast::AlterIndexTarget::Rebuild { encoding },
700        }))
701    }
702
703    /// v6.7.2 — `ALTER TABLE <name> SET hot_tier_bytes = <n>`. The
704    /// only `SET` form currently supported; future v6.7.x can add
705    /// more SET subjects without changing the dispatch shape.
706    fn parse_alter_table_after_keyword(&mut self) -> Result<Statement, ParseError> {
707        let table_name = self.expect_ident_like()?;
708        // v7.6.8 — dispatch on the next keyword: SET / ADD / DROP.
709        // SET kept identical to v6.7.x. ADD / DROP CONSTRAINT routes
710        // to FK installation / removal.
711        match self.peek() {
712            Token::Ident(s) if s.eq_ignore_ascii_case("set") => {
713                self.advance();
714                let setting = self.expect_ident_like()?;
715                if !setting.eq_ignore_ascii_case("hot_tier_bytes") {
716                    return Err(self.err(alloc::format!(
717                        "ALTER TABLE SET: unknown setting {setting:?}; supported: hot_tier_bytes"
718                    )));
719                }
720                if !matches!(self.peek(), Token::Eq) {
721                    return Err(self.err(alloc::format!(
722                        "expected '=' after hot_tier_bytes, got {:?}",
723                        self.peek()
724                    )));
725                }
726                self.advance();
727                let n = self.expect_u64_literal()?;
728                Ok(Statement::AlterTable(crate::ast::AlterTableStatement {
729                    name: table_name,
730                    target: crate::ast::AlterTableTarget::SetHotTierBytes(n),
731                }))
732            }
733            Token::Ident(s) if s.eq_ignore_ascii_case("add") => {
734                self.advance();
735                // Optional `CONSTRAINT <name>` prefix, then the same
736                // FK clause shape as table-level CREATE TABLE FK.
737                let fk = self.parse_table_level_fk()?;
738                Ok(Statement::AlterTable(crate::ast::AlterTableStatement {
739                    name: table_name,
740                    target: crate::ast::AlterTableTarget::AddForeignKey(fk),
741                }))
742            }
743            Token::Drop => {
744                self.advance();
745                match self.advance() {
746                    Token::Ident(s) if s.eq_ignore_ascii_case("constraint") => {}
747                    other => {
748                        return Err(self.err(alloc::format!(
749                            "expected CONSTRAINT after DROP in ALTER TABLE, got {other:?}"
750                        )));
751                    }
752                }
753                let cname = self.expect_ident_like()?;
754                Ok(Statement::AlterTable(crate::ast::AlterTableStatement {
755                    name: table_name,
756                    target: crate::ast::AlterTableTarget::DropForeignKey(cname),
757                }))
758            }
759            other => Err(self.err(alloc::format!(
760                "expected SET / ADD / DROP in ALTER TABLE, got {other:?}"
761            ))),
762        }
763    }
764
765    /// Consume a bare ident if its lowercase matches `kw`, else err.
766    fn expect_keyword_ident(&mut self, kw: &str) -> Result<(), ParseError> {
767        match self.advance() {
768            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case(kw) => Ok(()),
769            other => Err(ParseError {
770                message: format!("expected {kw:?}, got {other:?}"),
771                token_pos: self.pos.saturating_sub(1),
772            }),
773        }
774    }
775
776    /// Accept either a quoted identifier (`"foo"`) or a quoted string
777    /// literal (`'foo'`) — same shape used by CREATE USER for the
778    /// username slot.
779    fn expect_ident_or_string(&mut self) -> Result<String, ParseError> {
780        match self.advance() {
781            Token::Ident(s) | Token::QuotedIdent(s) | Token::String(s) => Ok(s),
782            other => Err(ParseError {
783                message: format!("expected identifier or string, got {other:?}"),
784                token_pos: self.pos.saturating_sub(1),
785            }),
786        }
787    }
788
789    fn expect_string_literal(&mut self) -> Result<String, ParseError> {
790        match self.advance() {
791            Token::String(s) => Ok(s),
792            other => Err(ParseError {
793                message: format!("expected quoted string, got {other:?}"),
794                token_pos: self.pos.saturating_sub(1),
795            }),
796        }
797    }
798
799    fn parse_select_stmt(&mut self) -> Result<Statement, ParseError> {
800        // Caller dispatches on Token::Select; the inner helper handles
801        // the rest. ORDER BY / LIMIT bind at this top level; UNION peers
802        // get a fresh bare-select parse and may not have their own ORDER
803        // BY / LIMIT.
804        let mut head = self.parse_bare_select()?;
805        while matches!(self.peek(), Token::Union) {
806            self.advance();
807            let kind = if matches!(self.peek(), Token::All) {
808                self.advance();
809                UnionKind::All
810            } else {
811                UnionKind::Distinct
812            };
813            let peer = self.parse_bare_select()?;
814            head.unions.push((kind, peer));
815        }
816        head.order_by = if matches!(self.peek(), Token::Order) {
817            self.advance();
818            if !matches!(self.peek(), Token::By) {
819                return Err(self.err(format!("expected BY after ORDER, got {:?}", self.peek())));
820            }
821            self.advance();
822            // v6.4.0 — multi-key ORDER BY. Loop over comma-separated
823            // `<expr> [ASC|DESC]` items.
824            let mut keys = Vec::new();
825            loop {
826                let expr = self.parse_expr(0)?;
827                let desc = if matches!(self.peek(), Token::Desc) {
828                    self.advance();
829                    true
830                } else if matches!(self.peek(), Token::Asc) {
831                    self.advance();
832                    false
833                } else {
834                    false
835                };
836                keys.push(OrderBy { expr, desc });
837                if matches!(self.peek(), Token::Comma) {
838                    self.advance();
839                } else {
840                    break;
841                }
842            }
843            keys
844        } else {
845            Vec::new()
846        };
847        head.limit = if matches!(self.peek(), Token::Limit) {
848            self.advance();
849            let n = self.expect_u32_literal("LIMIT")?;
850            Some(n)
851        } else {
852            None
853        };
854        head.offset = if matches!(self.peek(), Token::Offset) {
855            self.advance();
856            let n = self.expect_u32_literal("OFFSET")?;
857            Some(n)
858        } else {
859            None
860        };
861        Ok(Statement::Select(head))
862    }
863
864    fn expect_u32_literal(&mut self, label: &str) -> Result<u32, ParseError> {
865        match self.advance() {
866            Token::Integer(n) if n >= 0 => u32::try_from(n).map_err(|_| ParseError {
867                message: format!("{label} value too large: {n}"),
868                token_pos: self.pos.saturating_sub(1),
869            }),
870            other => Err(ParseError {
871                message: format!("expected non-negative integer after {label}, got {other:?}"),
872                token_pos: self.pos.saturating_sub(1),
873            }),
874        }
875    }
876
877    /// Parse one SELECT block without ORDER BY / LIMIT / UNION chaining —
878    /// just `[DISTINCT] items [FROM] [WHERE] [GROUP BY]`. Returned with
879    /// `unions` empty and `order_by` / `limit` `None`; the top-level
880    /// `parse_select_stmt` is responsible for filling those in.
881    fn parse_bare_select(&mut self) -> Result<SelectStatement, ParseError> {
882        if !matches!(self.peek(), Token::Select) {
883            return Err(self.err(format!(
884                "expected SELECT to start a query block, got {:?}",
885                self.peek()
886            )));
887        }
888        self.advance();
889        let distinct = if matches!(self.peek(), Token::Distinct) {
890            self.advance();
891            true
892        } else {
893            false
894        };
895        let items = self.parse_select_list()?;
896        let from = if matches!(self.peek(), Token::From) {
897            self.advance();
898            Some(self.parse_from_clause()?)
899        } else {
900            None
901        };
902        let where_ = if matches!(self.peek(), Token::Where) {
903            self.advance();
904            Some(self.parse_expr(0)?)
905        } else {
906            None
907        };
908        let mut group_by_all = false;
909        let group_by = if matches!(self.peek(), Token::Group) {
910            self.advance();
911            if !matches!(self.peek(), Token::By) {
912                return Err(self.err(format!("expected BY after GROUP, got {:?}", self.peek())));
913            }
914            self.advance();
915            // v6.4.1 — `GROUP BY ALL` shortcut. Planner expands to
916            // every non-aggregate SELECT-list item later.
917            if matches!(self.peek(), Token::All) {
918                self.advance();
919                group_by_all = true;
920                None
921            } else {
922                let mut groups = Vec::new();
923                loop {
924                    groups.push(self.parse_expr(0)?);
925                    if matches!(self.peek(), Token::Comma) {
926                        self.advance();
927                    } else {
928                        break;
929                    }
930                }
931                Some(groups)
932            }
933        } else {
934            None
935        };
936        let having = if matches!(self.peek(), Token::Having) {
937            self.advance();
938            Some(self.parse_expr(0)?)
939        } else {
940            None
941        };
942        Ok(SelectStatement {
943            ctes: Vec::new(),
944            distinct,
945            items,
946            from,
947            where_,
948            group_by,
949            group_by_all,
950            having,
951            unions: Vec::new(),
952            order_by: Vec::new(),
953            limit: None,
954            offset: None,
955        })
956    }
957
958    fn parse_create_table_stmt_after_create(&mut self) -> Result<Statement, ParseError> {
959        // Caller already consumed CREATE; we're sitting on TABLE.
960        debug_assert!(matches!(self.peek(), Token::Table));
961        self.advance();
962        let if_not_exists = self.consume_if_not_exists();
963        let name = self.expect_ident_like()?;
964        if !matches!(self.peek(), Token::LParen) {
965            return Err(self.err(format!(
966                "expected '(' after table name, got {:?}",
967                self.peek()
968            )));
969        }
970        self.advance();
971        let mut columns = Vec::new();
972        let mut foreign_keys: Vec<ForeignKeyConstraint> = Vec::new();
973        loop {
974            // v7.6.0 — distinguish a table-level constraint clause
975            // from a column definition. Constraints start with
976            // `CONSTRAINT <name> ...` or with the bare `FOREIGN KEY (...)`
977            // shape. Anything else is a column.
978            if self.peek_constraint_or_fk_start() {
979                foreign_keys.push(self.parse_table_level_fk()?);
980            } else {
981                let (col, col_level_fk) = self.parse_column_def_with_fk()?;
982                columns.push(col);
983                if let Some(fk) = col_level_fk {
984                    foreign_keys.push(fk);
985                }
986            }
987            match self.peek() {
988                Token::Comma => {
989                    self.advance();
990                }
991                Token::RParen => {
992                    self.advance();
993                    break;
994                }
995                other => {
996                    return Err(
997                        self.err(format!("expected ',' or ')' in column list, got {other:?}"))
998                    );
999                }
1000            }
1001        }
1002        if columns.is_empty() {
1003            return Err(self.err("CREATE TABLE requires at least one column".into()));
1004        }
1005        Ok(Statement::CreateTable(CreateTableStatement {
1006            name,
1007            columns,
1008            if_not_exists,
1009            foreign_keys,
1010        }))
1011    }
1012
1013    /// v7.6.0 — true when the next tokens are `CONSTRAINT <name>
1014    /// FOREIGN KEY` or bare `FOREIGN KEY`. Both introduce a
1015    /// table-level FK; a column def never starts with either keyword
1016    /// (column names are not in this reserved set).
1017    fn peek_constraint_or_fk_start(&self) -> bool {
1018        let is_constraint_kw = matches!(
1019            self.peek(),
1020            Token::Ident(s) if s.eq_ignore_ascii_case("constraint")
1021        );
1022        let is_foreign_kw = matches!(
1023            self.peek(),
1024            Token::Ident(s) if s.eq_ignore_ascii_case("foreign")
1025        );
1026        is_constraint_kw || is_foreign_kw
1027    }
1028
1029    /// v7.6.0 — parse a table-level FK clause:
1030    /// `[CONSTRAINT <name>] FOREIGN KEY (<col>[,<col>]*) REFERENCES
1031    /// <tbl> [(<pcol>[,<pcol>]*)] [ON DELETE <action>] [ON UPDATE <action>]`.
1032    fn parse_table_level_fk(&mut self) -> Result<ForeignKeyConstraint, ParseError> {
1033        let mut name: Option<String> = None;
1034        if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("constraint")) {
1035            self.advance();
1036            name = Some(self.expect_ident_like()?);
1037        }
1038        // `FOREIGN`
1039        match self.advance() {
1040            Token::Ident(s) if s.eq_ignore_ascii_case("foreign") => {}
1041            other => return Err(self.err(format!("expected FOREIGN, got {other:?}"))),
1042        }
1043        // `KEY`
1044        match self.advance() {
1045            Token::Ident(s) if s.eq_ignore_ascii_case("key") => {}
1046            other => return Err(self.err(format!("expected KEY after FOREIGN, got {other:?}"))),
1047        }
1048        // `(col, col, ...)`
1049        if !matches!(self.peek(), Token::LParen) {
1050            return Err(self.err(format!("expected '(' after FOREIGN KEY, got {:?}", self.peek())));
1051        }
1052        self.advance();
1053        let mut columns = Vec::new();
1054        loop {
1055            columns.push(self.expect_ident_like()?);
1056            match self.peek() {
1057                Token::Comma => {
1058                    self.advance();
1059                }
1060                Token::RParen => {
1061                    self.advance();
1062                    break;
1063                }
1064                other => return Err(self.err(format!("expected ',' or ')' in FK column list, got {other:?}"))),
1065            }
1066        }
1067        if columns.is_empty() {
1068            return Err(self.err("FOREIGN KEY requires at least one column".into()));
1069        }
1070        let (parent_table, parent_columns, on_delete, on_update) =
1071            self.parse_references_tail(columns.len())?;
1072        Ok(ForeignKeyConstraint {
1073            name,
1074            columns,
1075            parent_table,
1076            parent_columns,
1077            on_delete,
1078            on_update,
1079        })
1080    }
1081
1082    /// v7.6.0 — parse the tail `REFERENCES <tbl> [(<pcol>...)] [ON
1083    /// DELETE <action>] [ON UPDATE <action>]`. `expected_arity` is
1084    /// the local column count, used to default the parent column
1085    /// list when omitted (SQL spec: parent's PK is implied).
1086    fn parse_references_tail(
1087        &mut self,
1088        expected_arity: usize,
1089    ) -> Result<(String, Vec<String>, FkAction, FkAction), ParseError> {
1090        match self.advance() {
1091            Token::Ident(s) if s.eq_ignore_ascii_case("references") => {}
1092            other => return Err(self.err(format!("expected REFERENCES, got {other:?}"))),
1093        }
1094        let parent_table = self.expect_ident_like()?;
1095        let mut parent_columns: Vec<String> = Vec::new();
1096        if matches!(self.peek(), Token::LParen) {
1097            self.advance();
1098            loop {
1099                parent_columns.push(self.expect_ident_like()?);
1100                match self.peek() {
1101                    Token::Comma => {
1102                        self.advance();
1103                    }
1104                    Token::RParen => {
1105                        self.advance();
1106                        break;
1107                    }
1108                    other => return Err(self.err(format!("expected ',' or ')' in REFERENCES column list, got {other:?}"))),
1109                }
1110            }
1111        }
1112        if !parent_columns.is_empty() && parent_columns.len() != expected_arity {
1113            return Err(self.err(format!(
1114                "FK arity mismatch: {} local column(s) vs {} parent column(s)",
1115                expected_arity,
1116                parent_columns.len()
1117            )));
1118        }
1119        // v7.6.7 — accept and reject `[NOT] DEFERRABLE [INITIALLY
1120        // {DEFERRED | IMMEDIATE}]` so existing PG dumps don't fail
1121        // at parse time. SPG's single-writer model has no deferred
1122        // constraint window, so we surface this as a clean
1123        // unsupported-feature error rather than a syntax error.
1124        loop {
1125            if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("deferrable")) {
1126                return Err(self.err(
1127                    "DEFERRABLE constraints are not supported (SPG is single-writer; \
1128                     constraints are always evaluated immediately at commit)"
1129                        .into(),
1130                ));
1131            }
1132            if matches!(self.peek(), Token::Not) {
1133                let look = self.tokens.get(self.pos + 1);
1134                if matches!(look, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("deferrable")) {
1135                    // NOT DEFERRABLE — accept as the SPG default
1136                    // and consume both tokens silently.
1137                    self.advance();
1138                    self.advance();
1139                    // Optional `INITIALLY IMMEDIATE` clause.
1140                    if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("initially"))
1141                    {
1142                        self.advance();
1143                        match self.advance() {
1144                            Token::Ident(s) if s.eq_ignore_ascii_case("immediate") => {}
1145                            other => {
1146                                return Err(self.err(format!(
1147                                    "expected IMMEDIATE after INITIALLY for NOT DEFERRABLE, \
1148                                     got {other:?}"
1149                                )));
1150                            }
1151                        }
1152                    }
1153                    continue;
1154                }
1155                break;
1156            }
1157            break;
1158        }
1159        // Optional `ON DELETE <action>` and `ON UPDATE <action>` in
1160        // either order, each at most once.
1161        let mut on_delete = FkAction::Restrict;
1162        let mut on_update = FkAction::Restrict;
1163        let mut seen_on_delete = false;
1164        let mut seen_on_update = false;
1165        loop {
1166            if !matches!(self.peek(), Token::On) {
1167                break;
1168            }
1169            self.advance();
1170            let which = self.advance();
1171            let action = self.parse_fk_action()?;
1172            match which {
1173                Token::Ident(ref s) if s.eq_ignore_ascii_case("delete") => {
1174                    if seen_on_delete {
1175                        return Err(self.err("ON DELETE specified twice".into()));
1176                    }
1177                    seen_on_delete = true;
1178                    on_delete = action;
1179                }
1180                Token::Ident(ref s) if s.eq_ignore_ascii_case("update") => {
1181                    if seen_on_update {
1182                        return Err(self.err("ON UPDATE specified twice".into()));
1183                    }
1184                    seen_on_update = true;
1185                    on_update = action;
1186                }
1187                other => {
1188                    return Err(self.err(format!(
1189                        "expected DELETE or UPDATE after ON, got {other:?}"
1190                    )));
1191                }
1192            }
1193        }
1194        Ok((parent_table, parent_columns, on_delete, on_update))
1195    }
1196
1197    /// v7.6.0 — parse `CASCADE | RESTRICT | SET NULL | SET DEFAULT |
1198    /// NO ACTION`.
1199    fn parse_fk_action(&mut self) -> Result<FkAction, ParseError> {
1200        match self.advance() {
1201            Token::Ident(s) if s.eq_ignore_ascii_case("cascade") => Ok(FkAction::Cascade),
1202            Token::Ident(s) if s.eq_ignore_ascii_case("restrict") => Ok(FkAction::Restrict),
1203            Token::Ident(s) if s.eq_ignore_ascii_case("set") => {
1204                match self.advance() {
1205                    Token::Null => Ok(FkAction::SetNull),
1206                    Token::Default => Ok(FkAction::SetDefault),
1207                    other => Err(self.err(format!(
1208                        "expected NULL or DEFAULT after SET in FK action, got {other:?}"
1209                    ))),
1210                }
1211            }
1212            Token::Ident(s) if s.eq_ignore_ascii_case("no") => {
1213                match self.advance() {
1214                    Token::Ident(s) if s.eq_ignore_ascii_case("action") => Ok(FkAction::NoAction),
1215                    other => Err(self.err(format!(
1216                        "expected ACTION after NO in FK action, got {other:?}"
1217                    ))),
1218                }
1219            }
1220            other => Err(self.err(format!(
1221                "expected CASCADE | RESTRICT | SET NULL | SET DEFAULT | NO ACTION, got {other:?}"
1222            ))),
1223        }
1224    }
1225
1226    /// Recognise the optional `IF NOT EXISTS` prefix shared by `CREATE
1227    /// TABLE` and `CREATE INDEX`. Returns `true` if consumed.
1228    fn consume_if_not_exists(&mut self) -> bool {
1229        // `IF` arrives as a bare Ident (we don't reserve it because it
1230        // also appears mid-expression in PG, though we don't support
1231        // those forms yet).
1232        let looks_like_if = matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("if"));
1233        if !looks_like_if {
1234            return false;
1235        }
1236        // Peek one ahead before committing: only consume IF when it's
1237        // actually `IF NOT EXISTS`.
1238        if !matches!(self.tokens.get(self.pos + 1), Some(Token::Not)) {
1239            return false;
1240        }
1241        if !matches!(
1242            self.tokens.get(self.pos + 2),
1243            Some(Token::Ident(s)) if s.eq_ignore_ascii_case("exists")
1244        ) {
1245            return false;
1246        }
1247        self.advance(); // IF
1248        self.advance(); // NOT
1249        self.advance(); // EXISTS
1250        true
1251    }
1252
1253    fn parse_create_index_stmt_after_create(&mut self) -> Result<Statement, ParseError> {
1254        // Caller consumed CREATE; we're on INDEX.
1255        debug_assert!(matches!(self.peek(), Token::Index));
1256        self.advance();
1257        let if_not_exists = self.consume_if_not_exists();
1258        let name = self.expect_ident_like()?;
1259        if !matches!(self.peek(), Token::On) {
1260            return Err(self.err(format!(
1261                "expected ON after CREATE INDEX <name>, got {:?}",
1262                self.peek()
1263            )));
1264        }
1265        self.advance();
1266        let table = self.expect_ident_like()?;
1267        // Optional `USING <method>` — only recognised method in v2.0 is
1268        // `hnsw` (a single-layer NSW graph for kNN). `USING` is the bare
1269        // ident `using` (we don't promote it to a reserved keyword
1270        // because it isn't reserved anywhere else in our SQL surface).
1271        let method = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("using")) {
1272            self.advance();
1273            let m = self.expect_ident_like()?;
1274            match m.to_ascii_lowercase().as_str() {
1275                "hnsw" => IndexMethod::Hnsw,
1276                "btree" => IndexMethod::BTree,
1277                "brin" => IndexMethod::Brin,
1278                other => {
1279                    return Err(self.err(alloc::format!(
1280                        "unknown index method {other:?}; supported: hnsw, btree, brin"
1281                    )));
1282                }
1283            }
1284        } else {
1285            IndexMethod::BTree
1286        };
1287        if !matches!(self.peek(), Token::LParen) {
1288            return Err(self.err(format!(
1289                "expected '(' before indexed column, got {:?}",
1290                self.peek()
1291            )));
1292        }
1293        self.advance();
1294        // v6.8.2 — accept either a bare column ident (legacy) or
1295        // an expression `fn(col, …)` for expression indexes.
1296        // Distinguish by peeking the token *after* the current
1297        // ident: `ident )` is the legacy column-only path;
1298        // anything else triggers the Pratt expression parser.
1299        // (`advance()` uses `mem::replace` to nil out the current
1300        // slot, so we can't save+rewind cleanly — peek-ahead via
1301        // direct index avoids the mutation.)
1302        let (column, expression): (String, Option<Expr>) = match self.peek().clone() {
1303            Token::Ident(s) | Token::QuotedIdent(s)
1304                if matches!(self.tokens.get(self.pos + 1), Some(Token::RParen)) =>
1305            {
1306                self.advance();
1307                (s, None)
1308            }
1309            Token::Ident(_) | Token::QuotedIdent(_) => {
1310                let key_expr = self.parse_expr(0)?;
1311                let primary = extract_first_column(&key_expr).ok_or_else(|| {
1312                    self.err(
1313                        "expression index key must reference at least one column".into(),
1314                    )
1315                })?;
1316                (primary, Some(key_expr))
1317            }
1318            other => {
1319                return Err(self.err(format!(
1320                    "expected column ident or expression, got {other:?}"
1321                )));
1322            }
1323        };
1324        if !matches!(self.peek(), Token::RParen) {
1325            return Err(self.err(format!(
1326                "expected ')' after indexed column / expression, got {:?}",
1327                self.peek()
1328            )));
1329        }
1330        self.advance();
1331        // v6.8.0 — optional `INCLUDE (col1, col2, …)` clause for
1332        // index-only-scan annotation. Bare ident (not a reserved
1333        // keyword) so we test by case-insensitive string match.
1334        let included_columns =
1335            if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("include")) {
1336                self.advance();
1337                if !matches!(self.peek(), Token::LParen) {
1338                    return Err(self.err(format!(
1339                        "expected '(' after INCLUDE, got {:?}",
1340                        self.peek()
1341                    )));
1342                }
1343                self.advance();
1344                let mut cols = Vec::new();
1345                loop {
1346                    cols.push(self.expect_ident_like()?);
1347                    match self.peek() {
1348                        Token::Comma => {
1349                            self.advance();
1350                        }
1351                        Token::RParen => {
1352                            self.advance();
1353                            break;
1354                        }
1355                        other => {
1356                            return Err(self.err(format!(
1357                                "expected ',' or ')' in INCLUDE list, got {other:?}"
1358                            )));
1359                        }
1360                    }
1361                }
1362                cols
1363            } else {
1364                Vec::new()
1365            };
1366        // v6.8.1 — optional `WHERE <expr>` partial-index predicate.
1367        let partial_predicate = if matches!(self.peek(), Token::Where) {
1368            self.advance();
1369            Some(self.parse_expr(0)?)
1370        } else {
1371            None
1372        };
1373        Ok(Statement::CreateIndex(CreateIndexStatement {
1374            name,
1375            table,
1376            column,
1377            method,
1378            if_not_exists,
1379            included_columns,
1380            partial_predicate,
1381            expression,
1382        }))
1383    }
1384
1385    /// v7.6.0 — wraps `parse_column_def` and consumes an optional
1386    /// column-level `REFERENCES ...` clause. The trailing FK is
1387    /// normalised into table-level shape (single-element columns +
1388    /// parent_columns) so the engine sees one uniform constraint list.
1389    fn parse_column_def_with_fk(
1390        &mut self,
1391    ) -> Result<(ColumnDef, Option<ForeignKeyConstraint>), ParseError> {
1392        let col = self.parse_column_def()?;
1393        // Inline form: `col INT REFERENCES tbl(pcol) [ON DELETE ...] [ON UPDATE ...]`.
1394        let inline_references = matches!(
1395            self.peek(),
1396            Token::Ident(s) if s.eq_ignore_ascii_case("references")
1397        );
1398        if !inline_references {
1399            return Ok((col, None));
1400        }
1401        let (parent_table, parent_columns, on_delete, on_update) =
1402            self.parse_references_tail(1)?;
1403        let fk = ForeignKeyConstraint {
1404            name: None,
1405            columns: vec![col.name.clone()],
1406            parent_table,
1407            parent_columns,
1408            on_delete,
1409            on_update,
1410        };
1411        Ok((col, Some(fk)))
1412    }
1413
1414    fn parse_column_def(&mut self) -> Result<ColumnDef, ParseError> {
1415        let name = self.expect_ident_like()?;
1416        // Type keyword arrives as a bare Ident (we did not promote type names
1417        // to keyword tokens — see lexer rationale).
1418        let ty_ident = match self.advance() {
1419            Token::Ident(s) => s,
1420            other => {
1421                return Err(ParseError {
1422                    message: format!("expected column type, got {other:?}"),
1423                    token_pos: self.pos.saturating_sub(1),
1424                });
1425            }
1426        };
1427        let ty = match ty_ident.as_str() {
1428            // MySQL flavours we accept by aliasing to the closest SPG
1429            // type. TINYINT covers MySQL's i8 — held inside SMALLINT
1430            // since SPG doesn't have a dedicated i8. MEDIUMINT (MySQL
1431            // 24-bit) → INT. UNSIGNED modifiers are consumed below
1432            // without semantic effect.
1433            "smallint" | "tinyint" => ColumnTypeName::SmallInt,
1434            // INTEGER is MySQL's spelling for INT; MEDIUMINT widens up.
1435            "int" | "integer" | "mediumint" => ColumnTypeName::Int,
1436            "bigint" => ColumnTypeName::BigInt,
1437            // DOUBLE / REAL are 64-bit IEEE — same as our FLOAT.
1438            "float" | "double" | "real" => ColumnTypeName::Float,
1439            "text" => ColumnTypeName::Text,
1440            "bool" | "boolean" => ColumnTypeName::Bool,
1441            "varchar" => ColumnTypeName::Varchar(self.parse_paren_size("VARCHAR")?),
1442            "char" => ColumnTypeName::Char(self.parse_paren_size("CHAR")?),
1443            "vector" => {
1444                let dim = self.parse_paren_size("VECTOR")?;
1445                let encoding = self.parse_optional_vector_encoding()?;
1446                ColumnTypeName::Vector { dim, encoding }
1447            }
1448            "numeric" => {
1449                let (precision, scale) = self.parse_optional_numeric_params()?;
1450                ColumnTypeName::Numeric(precision, scale)
1451            }
1452            "date" => ColumnTypeName::Date,
1453            // MySQL's `DATETIME` is the same domain as standard
1454            // `TIMESTAMP` — accept both spellings.
1455            "timestamp" | "datetime" => ColumnTypeName::Timestamp,
1456            // v4.9: JSON / JSONB. Stored as raw text — no parse-time
1457            // validation. We accept the JSONB spelling too because
1458            // most PG clients default to it; SPG doesn't distinguish
1459            // the two (no path-operator perf advantage to model).
1460            "json" | "jsonb" => ColumnTypeName::Json,
1461            other => {
1462                return Err(ParseError {
1463                    message: format!("unsupported column type {other:?}"),
1464                    token_pos: self.pos.saturating_sub(1),
1465                });
1466            }
1467        };
1468        // MySQL's `UNSIGNED` modifier sits right after the type
1469        // keyword. SPG doesn't carry a separate unsigned variant —
1470        // accepting the keyword keeps existing schemas compatible
1471        // without changing semantics. Drop it silently.
1472        if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("unsigned")) {
1473            self.advance();
1474        }
1475        // Column constraints: `DEFAULT <expr>`, `NOT NULL`, and the
1476        // MySQL-flavoured `AUTO_INCREMENT` may appear in any order;
1477        // each at most once.
1478        let mut default: Option<Expr> = None;
1479        let mut nullable = true;
1480        let mut nullability_seen = false;
1481        let mut auto_increment = false;
1482        loop {
1483            if matches!(self.peek(), Token::Default) {
1484                if default.is_some() {
1485                    return Err(self.err("DEFAULT specified twice".into()));
1486                }
1487                self.advance();
1488                default = Some(self.parse_expr(0)?);
1489                continue;
1490            }
1491            if matches!(self.peek(), Token::Not) {
1492                if nullability_seen {
1493                    return Err(self.err("NOT NULL specified twice".into()));
1494                }
1495                self.advance();
1496                if !matches!(self.peek(), Token::Null) {
1497                    return Err(self.err(format!(
1498                        "expected NULL after NOT in column def, got {:?}",
1499                        self.peek()
1500                    )));
1501                }
1502                self.advance();
1503                nullable = false;
1504                nullability_seen = true;
1505                continue;
1506            }
1507            // `AUTO_INCREMENT` or its abbreviated form `AUTOINCREMENT`
1508            // arrives as a bare Ident. Match either, case-insensitive.
1509            if let Token::Ident(s) = self.peek()
1510                && (s.eq_ignore_ascii_case("auto_increment")
1511                    || s.eq_ignore_ascii_case("autoincrement"))
1512            {
1513                if auto_increment {
1514                    return Err(self.err("AUTO_INCREMENT specified twice".into()));
1515                }
1516                self.advance();
1517                auto_increment = true;
1518                continue;
1519            }
1520            break;
1521        }
1522        Ok(ColumnDef {
1523            name,
1524            ty,
1525            nullable,
1526            default,
1527            auto_increment,
1528        })
1529    }
1530
1531    /// `NUMERIC` may appear without parameters, with one (precision
1532    /// only, scale=0), or with both. Returns `(precision, scale)` with
1533    /// 0 = unspecified for the bare form.
1534    fn parse_optional_numeric_params(&mut self) -> Result<(u8, u8), ParseError> {
1535        if !matches!(self.peek(), Token::LParen) {
1536            // Bare `NUMERIC` — PG treats this as "unlimited precision";
1537            // we surface it as precision=0 to mean "unconstrained" so
1538            // the engine doesn't need a separate variant.
1539            return Ok((0, 0));
1540        }
1541        self.advance();
1542        let precision = match self.advance() {
1543            Token::Integer(n) if (1..=38).contains(&n) => u8::try_from(n).expect("range-checked"),
1544            other => {
1545                return Err(ParseError {
1546                    message: format!(
1547                        "NUMERIC precision must be an integer in 1..=38, got {other:?}"
1548                    ),
1549                    token_pos: self.pos.saturating_sub(1),
1550                });
1551            }
1552        };
1553        let scale = if matches!(self.peek(), Token::Comma) {
1554            self.advance();
1555            match self.advance() {
1556                Token::Integer(n) if (0..=i64::from(precision)).contains(&n) => {
1557                    u8::try_from(n).expect("range-checked")
1558                }
1559                other => {
1560                    return Err(ParseError {
1561                        message: format!(
1562                            "NUMERIC scale must be a non-negative integer ≤ precision, got {other:?}"
1563                        ),
1564                        token_pos: self.pos.saturating_sub(1),
1565                    });
1566                }
1567            }
1568        } else {
1569            0
1570        };
1571        if !matches!(self.peek(), Token::RParen) {
1572            return Err(self.err(format!(
1573                "expected ')' to close NUMERIC params, got {:?}",
1574                self.peek()
1575            )));
1576        }
1577        self.advance();
1578        Ok((precision, scale))
1579    }
1580
1581    /// Parse `(N)` where `N` is a positive integer literal — used by the
1582    /// `VARCHAR`/`CHAR`/`VECTOR` column types. `label` is the type name
1583    /// for the error message.
1584    /// v6.0.1: parse the optional `USING <encoding>` clause that
1585    /// follows `VECTOR(N)` in a column definition. Missing clause
1586    /// → `VecEncoding::F32` (pre-v6 default). Unknown encoding
1587    /// ident → `ParseError` listing the encodings recognised today.
1588    fn parse_optional_vector_encoding(&mut self) -> Result<VecEncoding, ParseError> {
1589        if !matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("using")) {
1590            return Ok(VecEncoding::F32);
1591        }
1592        self.advance();
1593        let enc_ident = match self.advance() {
1594            Token::Ident(s) => s,
1595            other => {
1596                return Err(self.err(format!(
1597                    "expected vector encoding after USING, got {other:?}"
1598                )));
1599            }
1600        };
1601        match enc_ident.to_ascii_lowercase().as_str() {
1602            "sq8" => Ok(VecEncoding::Sq8),
1603            // v6.0.3: `HALF` (pgvector convention) selects IEEE-754
1604            // binary16 per-element storage.
1605            "half" => Ok(VecEncoding::F16),
1606            other => Err(self.err(format!(
1607                "unknown vector encoding {other:?}; supported: SQ8, HALF"
1608            ))),
1609        }
1610    }
1611
1612    fn parse_paren_size(&mut self, label: &str) -> Result<u32, ParseError> {
1613        if !matches!(self.peek(), Token::LParen) {
1614            return Err(self.err(format!("{label} type requires (N), got {:?}", self.peek())));
1615        }
1616        self.advance();
1617        let n = match self.advance() {
1618            Token::Integer(n) if n > 0 => u32::try_from(n).map_err(|_| ParseError {
1619                message: format!("{label} size too large: {n}"),
1620                token_pos: self.pos.saturating_sub(1),
1621            })?,
1622            other => {
1623                return Err(ParseError {
1624                    message: format!("expected positive integer {label} size, got {other:?}"),
1625                    token_pos: self.pos.saturating_sub(1),
1626                });
1627            }
1628        };
1629        if !matches!(self.peek(), Token::RParen) {
1630            return Err(self.err(format!(
1631                "expected ')' after {label} size, got {:?}",
1632                self.peek()
1633            )));
1634        }
1635        self.advance();
1636        Ok(n)
1637    }
1638
1639    fn parse_insert_stmt(&mut self) -> Result<Statement, ParseError> {
1640        debug_assert!(matches!(self.peek(), Token::Insert));
1641        self.advance();
1642        if !matches!(self.peek(), Token::Into) {
1643            return Err(self.err(format!("expected INTO after INSERT, got {:?}", self.peek())));
1644        }
1645        self.advance();
1646        let table = self.expect_ident_like()?;
1647        // Optional column list — `INSERT INTO t (a, b) VALUES ...`.
1648        let columns = if matches!(self.peek(), Token::LParen) {
1649            self.advance();
1650            let mut names = Vec::new();
1651            loop {
1652                names.push(self.expect_ident_like()?);
1653                match self.peek() {
1654                    Token::Comma => {
1655                        self.advance();
1656                    }
1657                    Token::RParen => {
1658                        self.advance();
1659                        break;
1660                    }
1661                    other => {
1662                        return Err(self.err(format!(
1663                            "expected ',' or ')' in INSERT column list, got {other:?}"
1664                        )));
1665                    }
1666                }
1667            }
1668            Some(names)
1669        } else {
1670            None
1671        };
1672        if !matches!(self.peek(), Token::Values) {
1673            return Err(self.err(format!(
1674                "expected VALUES after table name, got {:?}",
1675                self.peek()
1676            )));
1677        }
1678        self.advance();
1679        if !matches!(self.peek(), Token::LParen) {
1680            return Err(self.err(format!("expected '(' after VALUES, got {:?}", self.peek())));
1681        }
1682        let mut rows = Vec::new();
1683        loop {
1684            // Each iteration consumes one `(expr, expr, …)` tuple.
1685            if !matches!(self.peek(), Token::LParen) {
1686                return Err(self.err(format!(
1687                    "expected '(' for next VALUES tuple, got {:?}",
1688                    self.peek()
1689                )));
1690            }
1691            self.advance();
1692            let mut tuple = Vec::new();
1693            loop {
1694                tuple.push(self.parse_expr(0)?);
1695                match self.peek() {
1696                    Token::Comma => {
1697                        self.advance();
1698                    }
1699                    Token::RParen => {
1700                        self.advance();
1701                        break;
1702                    }
1703                    other => {
1704                        return Err(self.err(format!(
1705                            "expected ',' or ')' in VALUES tuple, got {other:?}"
1706                        )));
1707                    }
1708                }
1709            }
1710            if tuple.is_empty() {
1711                return Err(self.err("INSERT VALUES tuple requires at least one value".into()));
1712            }
1713            rows.push(tuple);
1714            // Continue with comma-separated tuples.
1715            if matches!(self.peek(), Token::Comma) {
1716                self.advance();
1717            } else {
1718                break;
1719            }
1720        }
1721        Ok(Statement::Insert(InsertStatement {
1722            table,
1723            columns,
1724            rows,
1725        }))
1726    }
1727
1728    fn parse_select_list(&mut self) -> Result<Vec<SelectItem>, ParseError> {
1729        let mut items = Vec::new();
1730        loop {
1731            items.push(self.parse_select_item()?);
1732            if matches!(self.peek(), Token::Comma) {
1733                self.advance();
1734            } else {
1735                break;
1736            }
1737        }
1738        Ok(items)
1739    }
1740
1741    fn parse_select_item(&mut self) -> Result<SelectItem, ParseError> {
1742        if matches!(self.peek(), Token::Star) {
1743            self.advance();
1744            return Ok(SelectItem::Wildcard);
1745        }
1746        let expr = self.parse_expr(0)?;
1747        let alias = self.parse_optional_alias();
1748        Ok(SelectItem::Expr { expr, alias })
1749    }
1750
1751    fn parse_table_ref(&mut self) -> Result<TableRef, ParseError> {
1752        let name = self.expect_ident_like()?;
1753        // v6.10.2 — optional `AS OF SEGMENT '<id>'` cold-tier
1754        // time-travel clause. Parse BEFORE the alias so the
1755        // alias can still ride at the tail (`tbl AS OF SEGMENT
1756        // '5' alias`). `AS` is a reserved keyword token, while
1757        // `OF` and `SEGMENT` are bare idents.
1758        let as_of_segment = if matches!(self.peek(), Token::As)
1759            && matches!(self.tokens.get(self.pos + 1), Some(Token::Ident(s) | Token::QuotedIdent(s)) if s.eq_ignore_ascii_case("of"))
1760        {
1761            self.advance(); // AS
1762            self.advance(); // OF
1763            let kw = match self.peek().clone() {
1764                Token::Ident(s) | Token::QuotedIdent(s) => s,
1765                other => {
1766                    return Err(self.err(format!(
1767                        "expected SEGMENT after AS OF, got {other:?}"
1768                    )));
1769                }
1770            };
1771            if !kw.eq_ignore_ascii_case("segment") {
1772                return Err(self.err(format!(
1773                    "expected SEGMENT after AS OF, got {kw:?}; v6.10.2 supports SEGMENT only"
1774                )));
1775            }
1776            self.advance();
1777            // Segment id literal — accept either a string or
1778            // integer for operator ergonomics.
1779            let id = match self.advance() {
1780                Token::String(s) => s
1781                    .parse::<u32>()
1782                    .map_err(|e| self.err(format!("AS OF SEGMENT id parse: {e}")))?,
1783                Token::Integer(n) => u32::try_from(n).map_err(|e| {
1784                    self.err(format!("AS OF SEGMENT id parse: {e}"))
1785                })?,
1786                other => {
1787                    return Err(self.err(format!(
1788                        "expected segment id literal after AS OF SEGMENT, got {other:?}"
1789                    )));
1790                }
1791            };
1792            Some(id)
1793        } else {
1794            None
1795        };
1796        let alias = self.parse_optional_alias();
1797        Ok(TableRef {
1798            name,
1799            alias,
1800            as_of_segment,
1801        })
1802    }
1803
1804    /// FROM-clause: a primary table reference plus zero-or-more joined
1805    /// peers expressed via either `, <table>` (cross-product, no ON) or
1806    /// `[INNER|LEFT [OUTER]|CROSS] JOIN <table> [ON expr]`. v1.10 keeps
1807    /// the join list flat (left-associative nested-loop semantics).
1808    fn parse_from_clause(&mut self) -> Result<FromClause, ParseError> {
1809        let primary = self.parse_table_ref()?;
1810        let mut joins = Vec::new();
1811        loop {
1812            // `, <table>` — cross-product with no ON.
1813            if matches!(self.peek(), Token::Comma) {
1814                self.advance();
1815                let table = self.parse_table_ref()?;
1816                joins.push(FromJoin {
1817                    kind: JoinKind::Cross,
1818                    table,
1819                    on: None,
1820                });
1821                continue;
1822            }
1823            // Explicit JOIN syntax. Accept INNER JOIN, LEFT [OUTER] JOIN,
1824            // CROSS JOIN, and bare JOIN (defaults to INNER).
1825            let kind =
1826                match self.peek() {
1827                    Token::Inner => {
1828                        self.advance();
1829                        if !matches!(self.peek(), Token::Join) {
1830                            return Err(self
1831                                .err(format!("expected JOIN after INNER, got {:?}", self.peek())));
1832                        }
1833                        self.advance();
1834                        JoinKind::Inner
1835                    }
1836                    Token::Left => {
1837                        self.advance();
1838                        if matches!(self.peek(), Token::Outer) {
1839                            self.advance();
1840                        }
1841                        if !matches!(self.peek(), Token::Join) {
1842                            return Err(self.err(format!(
1843                                "expected JOIN after LEFT [OUTER], got {:?}",
1844                                self.peek()
1845                            )));
1846                        }
1847                        self.advance();
1848                        JoinKind::Left
1849                    }
1850                    Token::Cross => {
1851                        self.advance();
1852                        if !matches!(self.peek(), Token::Join) {
1853                            return Err(self
1854                                .err(format!("expected JOIN after CROSS, got {:?}", self.peek())));
1855                        }
1856                        self.advance();
1857                        JoinKind::Cross
1858                    }
1859                    Token::Join => {
1860                        self.advance();
1861                        JoinKind::Inner
1862                    }
1863                    _ => break,
1864                };
1865            let table = self.parse_table_ref()?;
1866            let on = if matches!(self.peek(), Token::On) {
1867                self.advance();
1868                Some(self.parse_expr(0)?)
1869            } else if kind == JoinKind::Cross {
1870                None
1871            } else {
1872                return Err(self.err(format!(
1873                    "expected ON after {:?} JOIN, got {:?}",
1874                    kind,
1875                    self.peek()
1876                )));
1877            };
1878            joins.push(FromJoin { kind, table, on });
1879        }
1880        Ok(FromClause { primary, joins })
1881    }
1882
1883    /// Optional alias after an expression or table:
1884    /// `AS <ident>` is unambiguous; a bare `<ident>` directly after is also
1885    /// accepted (PG-style implicit alias). Returns `None` if the next token
1886    /// is not alias-shaped (e.g. comma, FROM, WHERE, semicolon, EOF, operator).
1887    fn parse_optional_alias(&mut self) -> Option<String> {
1888        if matches!(self.peek(), Token::As) {
1889            self.advance();
1890            // After AS, the next token MUST be an identifier-like — if not,
1891            // we still return None and let the caller surface the error on the
1892            // next expectation. v0.2 keeps the alias path forgiving; the
1893            // corpus tests don't exercise the malformed case.
1894            if let Token::Ident(_) | Token::QuotedIdent(_) = self.peek() {
1895                return self.expect_ident_like().ok();
1896            }
1897            return None;
1898        }
1899        if let Token::Ident(_) | Token::QuotedIdent(_) = self.peek() {
1900            return self.expect_ident_like().ok();
1901        }
1902        None
1903    }
1904
1905    /// Pratt loop. `min_prec` is the minimum binary-op precedence we'll accept.
1906    fn parse_expr(&mut self, min_prec: u8) -> Result<Expr, ParseError> {
1907        let mut lhs = self.parse_unary()?;
1908        while let Some((op, prec)) = binop_from(self.peek()) {
1909            if prec < min_prec {
1910                break;
1911            }
1912            self.advance();
1913            let rhs = self.parse_expr(prec + 1)?;
1914            lhs = Expr::Binary {
1915                lhs: Box::new(lhs),
1916                op,
1917                rhs: Box::new(rhs),
1918            };
1919        }
1920        Ok(lhs)
1921    }
1922
1923    fn parse_unary(&mut self) -> Result<Expr, ParseError> {
1924        match self.peek() {
1925            Token::Not => {
1926                self.advance();
1927                // NOT sits between AND (2) and comparisons (4) — bind everything
1928                // ≥3, which leaves AND/OR outside.
1929                let e = self.parse_expr(3)?;
1930                Ok(Expr::Unary {
1931                    op: UnOp::Not,
1932                    expr: Box::new(e),
1933                })
1934            }
1935            Token::Minus => {
1936                self.advance();
1937                // Unary minus binds tighter than `*`/`/` (now at prec 7 after
1938                // `<->` slotted into 5 and arithmetic shifted up).
1939                let e = self.parse_expr(8)?;
1940                Ok(Expr::Unary {
1941                    op: UnOp::Neg,
1942                    expr: Box::new(e),
1943                })
1944            }
1945            _ => self.parse_atom(),
1946        }
1947    }
1948
1949    fn parse_atom(&mut self) -> Result<Expr, ParseError> {
1950        let tok_pos = self.pos;
1951        match self.advance() {
1952            Token::Integer(n) => Ok(Expr::Literal(Literal::Integer(n))),
1953            Token::Float(x) => Ok(Expr::Literal(Literal::Float(x))),
1954            Token::String(s) => Ok(Expr::Literal(Literal::String(s))),
1955            Token::True => Ok(Expr::Literal(Literal::Bool(true))),
1956            Token::False => Ok(Expr::Literal(Literal::Bool(false))),
1957            Token::Null => Ok(Expr::Literal(Literal::Null)),
1958            // v6.1.1 — `$N` placeholder. The actual Value lookup
1959            // happens in the engine eval path against the prepared-
1960            // statement bind buffer.
1961            Token::Placeholder(n) => Ok(Expr::Placeholder(n)),
1962            Token::LParen => {
1963                // v4.10: `(SELECT ...)` in expression position is a
1964                // scalar subquery; otherwise it's a parenthesised
1965                // expression. Peek for SELECT keyword to dispatch.
1966                if matches!(self.peek(), Token::Select) {
1967                    let inner = self.parse_select_stmt()?;
1968                    match self.advance() {
1969                        Token::RParen => {
1970                            let Statement::Select(s) = inner else {
1971                                unreachable!("parse_select_stmt returns Select")
1972                            };
1973                            Ok(Expr::ScalarSubquery(Box::new(s)))
1974                        }
1975                        other => Err(ParseError {
1976                            message: format!("expected ')' after scalar subquery, got {other:?}"),
1977                            token_pos: self.pos.saturating_sub(1),
1978                        }),
1979                    }
1980                } else {
1981                    let e = self.parse_expr(0)?;
1982                    match self.advance() {
1983                        Token::RParen => Ok(e),
1984                        other => Err(ParseError {
1985                            message: format!("expected ')', got {other:?}"),
1986                            token_pos: self.pos.saturating_sub(1),
1987                        }),
1988                    }
1989                }
1990            }
1991            Token::LBracket => self.parse_vector_literal_body(),
1992            Token::Extract => self.parse_extract_atom(),
1993            Token::Interval => self.parse_interval_atom(),
1994            // v4.10: EXISTS / NOT EXISTS. EXISTS isn't a reserved
1995            // token; we match on the bare ident. NOT is a token
1996            // (consumed in the comparison rung), but `EXISTS (...)`
1997            // at the top of an expression starts here.
1998            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("exists") => {
1999                self.parse_exists_atom(false)
2000            }
2001            Token::Ident(s) | Token::QuotedIdent(s) => self.finish_ident_atom(s),
2002            other => Err(ParseError {
2003                message: format!("unexpected token {other:?} in expression"),
2004                token_pos: tok_pos,
2005            }),
2006        }
2007        // After parsing the atom, fold any postfix `::vector` casts.
2008        .and_then(|atom| self.finish_postfix_casts(atom))
2009    }
2010
2011    /// Postfix operators on an atom: `::TYPE` cast and `IS [NOT] NULL`.
2012    /// Both bind tighter than any binary op.
2013    fn finish_postfix_casts(&mut self, mut expr: Expr) -> Result<Expr, ParseError> {
2014        loop {
2015            if matches!(self.peek(), Token::DoubleColon) {
2016                self.advance();
2017                let target = match self.advance() {
2018                    Token::Ident(s) => match s.as_str() {
2019                        "int" => CastTarget::Int,
2020                        "bigint" => CastTarget::BigInt,
2021                        "float" => CastTarget::Float,
2022                        "text" => CastTarget::Text,
2023                        "bool" => CastTarget::Bool,
2024                        "vector" => CastTarget::Vector,
2025                        "date" => CastTarget::Date,
2026                        "timestamp" | "datetime" => CastTarget::Timestamp,
2027                        other => {
2028                            return Err(ParseError {
2029                                message: format!("unsupported cast target `::{other}`"),
2030                                token_pos: self.pos.saturating_sub(1),
2031                            });
2032                        }
2033                    },
2034                    other => {
2035                        return Err(ParseError {
2036                            message: format!("expected type ident after `::`, got {other:?}"),
2037                            token_pos: self.pos.saturating_sub(1),
2038                        });
2039                    }
2040                };
2041                expr = Expr::Cast {
2042                    expr: Box::new(expr),
2043                    target,
2044                };
2045                continue;
2046            }
2047            if matches!(self.peek(), Token::Is) {
2048                self.advance();
2049                let negated = if matches!(self.peek(), Token::Not) {
2050                    self.advance();
2051                    true
2052                } else {
2053                    false
2054                };
2055                if !matches!(self.peek(), Token::Null) {
2056                    return Err(self.err(format!(
2057                        "expected NULL after IS{}, got {:?}",
2058                        if negated { " NOT" } else { "" },
2059                        self.peek()
2060                    )));
2061                }
2062                self.advance();
2063                expr = Expr::IsNull {
2064                    expr: Box::new(expr),
2065                    negated,
2066                };
2067                continue;
2068            }
2069            // `x [NOT] BETWEEN a AND b`, `x [NOT] IN (...)`, `x [NOT] LIKE p`.
2070            // Look one token ahead so a stray `NOT` not followed by any of
2071            // these flows through to the early return below untouched.
2072            let negated = if matches!(self.peek(), Token::Not) {
2073                let next = self.tokens.get(self.pos + 1);
2074                matches!(next, Some(Token::Between | Token::In | Token::Like))
2075            } else {
2076                false
2077            };
2078            if negated {
2079                self.advance();
2080            }
2081            if matches!(self.peek(), Token::Between) {
2082                expr = self.parse_between_tail(expr, negated)?;
2083                continue;
2084            }
2085            if matches!(self.peek(), Token::In) {
2086                expr = self.parse_in_tail(expr, negated)?;
2087                continue;
2088            }
2089            if matches!(self.peek(), Token::Like) {
2090                self.advance();
2091                // Pattern at the same precedence as other comparison RHSes —
2092                // 5 leaves AND/OR alone so `a LIKE 'x%' AND b` parses right.
2093                let pattern = self.parse_expr(5)?;
2094                expr = Expr::Like {
2095                    expr: Box::new(expr),
2096                    pattern: Box::new(pattern),
2097                    negated,
2098                };
2099                continue;
2100            }
2101            return Ok(expr);
2102        }
2103    }
2104
2105    /// `x BETWEEN low AND high`  →  `(x >= low) AND (x <= high)`, wrapped in
2106    /// `NOT` when `negated`. Bounds parse at precedence 5 so the trailing
2107    /// `AND` is not swallowed.
2108    fn parse_between_tail(&mut self, expr: Expr, negated: bool) -> Result<Expr, ParseError> {
2109        self.advance(); // BETWEEN
2110        let low = self.parse_expr(5)?;
2111        if !matches!(self.peek(), Token::And) {
2112            return Err(self.err(format!(
2113                "expected AND after BETWEEN low bound, got {:?}",
2114                self.peek()
2115            )));
2116        }
2117        self.advance();
2118        let high = self.parse_expr(5)?;
2119        let target = Box::new(expr);
2120        let combined = Expr::Binary {
2121            lhs: Box::new(Expr::Binary {
2122                lhs: target.clone(),
2123                op: BinOp::GtEq,
2124                rhs: Box::new(low),
2125            }),
2126            op: BinOp::And,
2127            rhs: Box::new(Expr::Binary {
2128                lhs: target,
2129                op: BinOp::LtEq,
2130                rhs: Box::new(high),
2131            }),
2132        };
2133        Ok(maybe_not(combined, negated))
2134    }
2135
2136    /// `x IN (a, b, c)`  →  chained OR of equalities. Empty list collapses
2137    /// to FALSE (TRUE under NOT IN), matching standard SQL semantics.
2138    /// v4.11: parse `WITH name AS (SELECT ...) [, ...] SELECT ...`.
2139    /// Caller already consumed the leading `WITH` ident.
2140    fn parse_with_cte_then_select(&mut self) -> Result<Statement, ParseError> {
2141        // v4.22: WITH RECURSIVE — optional keyword right after WITH.
2142        // Comes through as an identifier; consume it if present and
2143        // mark every CTE in the clause as recursive (PG semantics —
2144        // the flag is per-WITH, not per-CTE).
2145        let mut recursive = false;
2146        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
2147            && s.eq_ignore_ascii_case("recursive")
2148        {
2149            self.advance();
2150            recursive = true;
2151        }
2152        let mut ctes = Vec::new();
2153        loop {
2154            let name = self.expect_ident_like()?;
2155            // v4.22: optional column-name list — `WITH t(a,b,c) AS ...`.
2156            // PG uses these to rename the body's output columns; we
2157            // do the same below by overriding `columns[i].name`.
2158            let column_overrides: Vec<String> = if matches!(self.peek(), Token::LParen) {
2159                self.advance();
2160                let mut names = Vec::new();
2161                loop {
2162                    names.push(self.expect_ident_like()?);
2163                    if matches!(self.peek(), Token::Comma) {
2164                        self.advance();
2165                        continue;
2166                    }
2167                    break;
2168                }
2169                if !matches!(self.peek(), Token::RParen) {
2170                    return Err(self.err(format!(
2171                        "expected ')' to close CTE column list, got {:?}",
2172                        self.peek()
2173                    )));
2174                }
2175                self.advance();
2176                names
2177            } else {
2178                Vec::new()
2179            };
2180            // AS is a reserved Token::As (used by SELECT-item / FROM
2181            // aliasing) — handle it specially rather than as a bare
2182            // ident.
2183            if !matches!(self.peek(), Token::As) {
2184                return Err(self.err(format!(
2185                    "expected AS after CTE name {name:?}, got {:?}",
2186                    self.peek()
2187                )));
2188            }
2189            self.advance();
2190            if !matches!(self.peek(), Token::LParen) {
2191                return Err(self.err(format!(
2192                    "expected '(' after AS in WITH clause, got {:?}",
2193                    self.peek()
2194                )));
2195            }
2196            self.advance();
2197            if !matches!(self.peek(), Token::Select) {
2198                return Err(self.err(format!("WITH body must be a SELECT, got {:?}", self.peek())));
2199            }
2200            let inner = self.parse_select_stmt()?;
2201            if !matches!(self.peek(), Token::RParen) {
2202                return Err(self.err(format!(
2203                    "expected ')' after CTE body, got {:?}",
2204                    self.peek()
2205                )));
2206            }
2207            self.advance();
2208            let Statement::Select(body) = inner else {
2209                unreachable!("parse_select_stmt returns Select")
2210            };
2211            ctes.push(crate::ast::Cte {
2212                name,
2213                body,
2214                recursive,
2215                column_overrides,
2216            });
2217            if matches!(self.peek(), Token::Comma) {
2218                self.advance();
2219                continue;
2220            }
2221            break;
2222        }
2223        // The body SELECT follows. Must start with SELECT.
2224        if !matches!(self.peek(), Token::Select) {
2225            return Err(self.err(format!(
2226                "expected SELECT after WITH clause, got {:?}",
2227                self.peek()
2228            )));
2229        }
2230        let body_stmt = self.parse_select_stmt()?;
2231        let Statement::Select(mut body) = body_stmt else {
2232            unreachable!()
2233        };
2234        body.ctes = ctes;
2235        Ok(Statement::Select(body))
2236    }
2237
2238    /// v4.10: parse `EXISTS (SELECT ...)`. Caller (`parse_atom`)
2239    /// already consumed the leading `EXISTS` ident via
2240    /// `self.advance()`.
2241    fn parse_exists_atom(&mut self, negated: bool) -> Result<Expr, ParseError> {
2242        if !matches!(self.peek(), Token::LParen) {
2243            return Err(self.err(format!("expected '(' after EXISTS, got {:?}", self.peek())));
2244        }
2245        self.advance();
2246        let inner = self.parse_select_stmt()?;
2247        if !matches!(self.peek(), Token::RParen) {
2248            return Err(self.err(format!(
2249                "expected ')' after EXISTS-subquery, got {:?}",
2250                self.peek()
2251            )));
2252        }
2253        self.advance();
2254        let Statement::Select(s) = inner else {
2255            unreachable!("parse_select_stmt returns Select")
2256        };
2257        Ok(Expr::Exists {
2258            subquery: Box::new(s),
2259            negated,
2260        })
2261    }
2262
2263    fn parse_in_tail(&mut self, expr: Expr, negated: bool) -> Result<Expr, ParseError> {
2264        self.advance(); // IN
2265        if !matches!(self.peek(), Token::LParen) {
2266            return Err(self.err(format!("expected '(' after IN, got {:?}", self.peek())));
2267        }
2268        self.advance();
2269        // v4.10: `IN (SELECT ...)` — subquery branch.
2270        if matches!(self.peek(), Token::Select) {
2271            let inner = self.parse_select_stmt()?;
2272            if !matches!(self.peek(), Token::RParen) {
2273                return Err(self.err(format!(
2274                    "expected ')' after IN-subquery, got {:?}",
2275                    self.peek()
2276                )));
2277            }
2278            self.advance();
2279            let Statement::Select(s) = inner else {
2280                unreachable!("parse_select_stmt always returns Statement::Select")
2281            };
2282            return Ok(Expr::InSubquery {
2283                expr: Box::new(expr),
2284                subquery: Box::new(s),
2285                negated,
2286            });
2287        }
2288        let mut elements = Vec::new();
2289        if !matches!(self.peek(), Token::RParen) {
2290            loop {
2291                elements.push(self.parse_expr(0)?);
2292                match self.peek() {
2293                    Token::Comma => {
2294                        self.advance();
2295                    }
2296                    Token::RParen => break,
2297                    other => {
2298                        return Err(
2299                            self.err(format!("expected ',' or ')' in IN list, got {other:?}"))
2300                        );
2301                    }
2302                }
2303            }
2304        }
2305        self.advance(); // ')'
2306        let target = Box::new(expr);
2307        let combined = if elements.is_empty() {
2308            Expr::Literal(Literal::Bool(false))
2309        } else {
2310            let mut iter = elements.into_iter();
2311            let first = iter.next().unwrap();
2312            let mut acc = Expr::Binary {
2313                lhs: target.clone(),
2314                op: BinOp::Eq,
2315                rhs: Box::new(first),
2316            };
2317            for elt in iter {
2318                acc = Expr::Binary {
2319                    lhs: Box::new(acc),
2320                    op: BinOp::Or,
2321                    rhs: Box::new(Expr::Binary {
2322                        lhs: target.clone(),
2323                        op: BinOp::Eq,
2324                        rhs: Box::new(elt),
2325                    }),
2326                };
2327            }
2328            acc
2329        };
2330        Ok(maybe_not(combined, negated))
2331    }
2332
2333    /// Parse a pgvector array literal `[ x1, x2, ... ]`. The opening `[` is
2334    /// already consumed by the caller. Elements must be numeric literals
2335    /// (with optional unary `-`); any compound expression is rejected at
2336    /// parse time so the runtime never needs to evaluate inside a vector.
2337    /// `EXTRACT(<field> FROM <source>)`. The dispatching `parse_atom`
2338    /// has already consumed the `EXTRACT` token before calling us —
2339    /// we pick up at the opening `(`.
2340    fn parse_extract_atom(&mut self) -> Result<Expr, ParseError> {
2341        if !matches!(self.peek(), Token::LParen) {
2342            return Err(self.err(format!("expected '(' after EXTRACT, got {:?}", self.peek())));
2343        }
2344        self.advance();
2345        let field_name = self.expect_ident_like()?;
2346        let field = match field_name.to_ascii_lowercase().as_str() {
2347            "year" => ExtractField::Year,
2348            "month" => ExtractField::Month,
2349            "day" => ExtractField::Day,
2350            "hour" => ExtractField::Hour,
2351            "minute" => ExtractField::Minute,
2352            "second" => ExtractField::Second,
2353            "microsecond" | "microseconds" => ExtractField::Microsecond,
2354            other => {
2355                return Err(self.err(format!(
2356                    "unknown EXTRACT field {other:?}; \
2357                     supported: YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MICROSECOND"
2358                )));
2359            }
2360        };
2361        if !matches!(self.peek(), Token::From) {
2362            return Err(self.err(format!(
2363                "expected FROM after EXTRACT field, got {:?}",
2364                self.peek()
2365            )));
2366        }
2367        self.advance();
2368        let source = self.parse_expr(0)?;
2369        if !matches!(self.peek(), Token::RParen) {
2370            return Err(self.err(format!(
2371                "expected ')' to close EXTRACT, got {:?}",
2372                self.peek()
2373            )));
2374        }
2375        self.advance();
2376        Ok(Expr::Extract {
2377            field,
2378            source: Box::new(source),
2379        })
2380    }
2381
2382    /// `INTERVAL '<n> <unit> [<n> <unit> ...]'` — the `INTERVAL` keyword
2383    /// is already consumed; we expect a single string literal next and
2384    /// resolve it into `Literal::Interval` at parse time so the engine
2385    /// never has to re-tokenise inside the string.
2386    fn parse_interval_atom(&mut self) -> Result<Expr, ParseError> {
2387        let tok = self.advance();
2388        let Token::String(text) = tok else {
2389            return Err(self.err(format!(
2390                "expected string literal after INTERVAL, got {tok:?}"
2391            )));
2392        };
2393        let (months, micros) = parse_interval_text(&text).ok_or_else(|| ParseError {
2394            message: format!(
2395                "cannot parse INTERVAL {text:?}; \
2396                     expected `<n> <unit> [<n> <unit> ...]` with units \
2397                     microsecond[s], millisecond[s], second[s], minute[s], \
2398                     hour[s], day[s], week[s], month[s], year[s]"
2399            ),
2400            token_pos: self.pos.saturating_sub(1),
2401        })?;
2402        Ok(Expr::Literal(Literal::Interval {
2403            months,
2404            micros,
2405            text,
2406        }))
2407    }
2408
2409    fn parse_vector_literal_body(&mut self) -> Result<Expr, ParseError> {
2410        let mut elems = Vec::new();
2411        if matches!(self.peek(), Token::RBracket) {
2412            self.advance();
2413            return Ok(Expr::Literal(Literal::Vector(elems)));
2414        }
2415        loop {
2416            let e = self.parse_expr(0)?;
2417            let x = extract_numeric_literal(&e).ok_or_else(|| ParseError {
2418                message: format!("vector element must be a numeric literal, got {e:?}"),
2419                token_pos: self.pos,
2420            })?;
2421            elems.push(x);
2422            match self.peek() {
2423                Token::Comma => {
2424                    self.advance();
2425                }
2426                Token::RBracket => {
2427                    self.advance();
2428                    break;
2429                }
2430                other => {
2431                    return Err(self.err(format!("expected ',' or ']' in vector, got {other:?}")));
2432                }
2433            }
2434        }
2435        Ok(Expr::Literal(Literal::Vector(elems)))
2436    }
2437
2438    /// Atom that started with an identifier: could be `t.col`, `col`, or
2439    /// `func(arg, ...)`. Detect each shape by looking at the next token.
2440    /// v4.12: parse `(PARTITION BY expr, ... ORDER BY expr [DESC]
2441    /// [, ...])`. Caller has already consumed `OVER`. Either clause
2442    /// is optional; an empty `()` is also legal (PG semantics).
2443    /// v6.4.2 — consume an optional `IGNORE NULLS` / `RESPECT NULLS`
2444    /// modifier between `name(args)` and `OVER (...)`. Default is
2445    /// `Respect`. Unrecognised idents leave the stream unchanged.
2446    fn parse_null_treatment_modifier(&mut self) -> NullTreatment {
2447        let Token::Ident(s) = self.peek().clone() else {
2448            return NullTreatment::Respect;
2449        };
2450        let is_ignore = s.eq_ignore_ascii_case("ignore");
2451        let is_respect = s.eq_ignore_ascii_case("respect");
2452        if !is_ignore && !is_respect {
2453            return NullTreatment::Respect;
2454        }
2455        // Lookahead for NULLS — only consume both tokens together.
2456        // pos+1 must hold a "nulls" ident.
2457        if self.pos + 1 < self.tokens.len()
2458            && let Token::Ident(s2) = &self.tokens[self.pos + 1]
2459            && s2.eq_ignore_ascii_case("nulls")
2460        {
2461            self.advance();
2462            self.advance();
2463            return if is_ignore {
2464                NullTreatment::Ignore
2465            } else {
2466                NullTreatment::Respect
2467            };
2468        }
2469        NullTreatment::Respect
2470    }
2471
2472    /// No frame clause is supported.
2473    #[allow(clippy::type_complexity)] // (partitions, ordered-keys-with-desc) is the natural shape
2474    fn parse_over_clause(
2475        &mut self,
2476    ) -> Result<(Vec<Expr>, Vec<(Expr, bool)>, Option<WindowFrame>), ParseError> {
2477        if !matches!(self.peek(), Token::LParen) {
2478            return Err(self.err(format!("expected '(' after OVER, got {:?}", self.peek())));
2479        }
2480        self.advance();
2481        let mut partition_by = Vec::new();
2482        let mut order_by = Vec::new();
2483        // PARTITION BY ?
2484        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
2485            && s.eq_ignore_ascii_case("partition")
2486        {
2487            self.advance();
2488            if !matches!(self.peek(), Token::By) {
2489                return Err(self.err(format!(
2490                    "expected BY after PARTITION, got {:?}",
2491                    self.peek()
2492                )));
2493            }
2494            self.advance();
2495            loop {
2496                partition_by.push(self.parse_expr(0)?);
2497                if matches!(self.peek(), Token::Comma) {
2498                    self.advance();
2499                    continue;
2500                }
2501                break;
2502            }
2503        }
2504        // ORDER BY ?
2505        if matches!(self.peek(), Token::Order) {
2506            self.advance();
2507            if !matches!(self.peek(), Token::By) {
2508                return Err(self.err(format!("expected BY after ORDER, got {:?}", self.peek())));
2509            }
2510            self.advance();
2511            loop {
2512                let e = self.parse_expr(0)?;
2513                let desc = if matches!(self.peek(), Token::Desc) {
2514                    self.advance();
2515                    true
2516                } else if matches!(self.peek(), Token::Asc) {
2517                    self.advance();
2518                    false
2519                } else {
2520                    false
2521                };
2522                order_by.push((e, desc));
2523                if matches!(self.peek(), Token::Comma) {
2524                    self.advance();
2525                    continue;
2526                }
2527                break;
2528            }
2529        }
2530        // v4.20: optional explicit frame, `ROWS ...` / `RANGE ...`.
2531        // Both keywords come through the lexer as identifiers; match
2532        // case-insensitively.
2533        let mut frame: Option<WindowFrame> = None;
2534        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek() {
2535            let kind = if s.eq_ignore_ascii_case("rows") {
2536                Some(FrameKind::Rows)
2537            } else if s.eq_ignore_ascii_case("range") {
2538                Some(FrameKind::Range)
2539            } else {
2540                None
2541            };
2542            if let Some(kind) = kind {
2543                self.advance();
2544                frame = Some(self.parse_frame_tail(kind)?);
2545            }
2546        }
2547        if !matches!(self.peek(), Token::RParen) {
2548            return Err(self.err(format!(
2549                "expected ')' to close OVER clause, got {:?}",
2550                self.peek()
2551            )));
2552        }
2553        self.advance();
2554        Ok((partition_by, order_by, frame))
2555    }
2556
2557    /// v4.20: parse the tail of an explicit frame, given the `ROWS`
2558    /// or `RANGE` keyword was just consumed. Accepts both
2559    /// `BETWEEN <bound> AND <bound>` and the single-bound shorthand
2560    /// (`ROWS UNBOUNDED PRECEDING`, `ROWS 5 PRECEDING`, etc.) which
2561    /// PG normalises to `BETWEEN <bound> AND CURRENT ROW`.
2562    fn parse_frame_tail(&mut self, kind: FrameKind) -> Result<WindowFrame, ParseError> {
2563        if matches!(self.peek(), Token::Between) {
2564            self.advance();
2565            let start = self.parse_frame_bound()?;
2566            if !matches!(self.peek(), Token::And) {
2567                return Err(self.err(format!("expected AND in frame spec, got {:?}", self.peek())));
2568            }
2569            self.advance();
2570            let end = self.parse_frame_bound()?;
2571            Ok(WindowFrame {
2572                kind,
2573                start,
2574                end: Some(end),
2575            })
2576        } else {
2577            let start = self.parse_frame_bound()?;
2578            Ok(WindowFrame {
2579                kind,
2580                start,
2581                end: None,
2582            })
2583        }
2584    }
2585
2586    /// Parse one frame bound: `UNBOUNDED PRECEDING`, `<n> PRECEDING`,
2587    /// `CURRENT ROW`, `<n> FOLLOWING`, `UNBOUNDED FOLLOWING`.
2588    fn parse_frame_bound(&mut self) -> Result<FrameBound, ParseError> {
2589        // Number-led: "<n> PRECEDING" / "<n> FOLLOWING".
2590        if let Token::Integer(n) = *self.peek() {
2591            self.advance();
2592            let n: u64 = u64::try_from(n).map_err(|_| {
2593                self.err(format!(
2594                    "invalid frame offset {n} — expected non-negative integer"
2595                ))
2596            })?;
2597            let dir = self.expect_ident_like()?;
2598            return if dir.eq_ignore_ascii_case("preceding") {
2599                Ok(FrameBound::OffsetPreceding(n))
2600            } else if dir.eq_ignore_ascii_case("following") {
2601                Ok(FrameBound::OffsetFollowing(n))
2602            } else {
2603                Err(self.err(format!(
2604                    "expected PRECEDING or FOLLOWING after offset, got {dir:?}"
2605                )))
2606            };
2607        }
2608        let first = self.expect_ident_like()?;
2609        if first.eq_ignore_ascii_case("unbounded") {
2610            let dir = self.expect_ident_like()?;
2611            return if dir.eq_ignore_ascii_case("preceding") {
2612                Ok(FrameBound::UnboundedPreceding)
2613            } else if dir.eq_ignore_ascii_case("following") {
2614                Ok(FrameBound::UnboundedFollowing)
2615            } else {
2616                Err(self.err(format!(
2617                    "expected PRECEDING or FOLLOWING after UNBOUNDED, got {dir:?}"
2618                )))
2619            };
2620        }
2621        if first.eq_ignore_ascii_case("current") {
2622            let row = self.expect_ident_like()?;
2623            if !row.eq_ignore_ascii_case("row") {
2624                return Err(self.err(format!("expected ROW after CURRENT, got {row:?}")));
2625            }
2626            return Ok(FrameBound::CurrentRow);
2627        }
2628        Err(self.err(format!(
2629            "expected frame bound (UNBOUNDED/CURRENT/<n>), got {first:?}"
2630        )))
2631    }
2632
2633    fn finish_ident_atom(&mut self, first: String) -> Result<Expr, ParseError> {
2634        if matches!(self.peek(), Token::Dot) {
2635            self.advance();
2636            let name = self.expect_ident_like()?;
2637            return Ok(Expr::Column(ColumnName {
2638                qualifier: Some(first),
2639                name,
2640            }));
2641        }
2642        if matches!(self.peek(), Token::LParen) {
2643            self.advance();
2644            // `COUNT(*)` — special-cased here because `*` isn't a normal
2645            // expression token. Lower-case match on `first` since the lexer
2646            // folds identifiers.
2647            if first.eq_ignore_ascii_case("count") && matches!(self.peek(), Token::Star) {
2648                self.advance();
2649                if !matches!(self.peek(), Token::RParen) {
2650                    return Err(self.err(format!(
2651                        "expected ')' after COUNT(*), got {:?}",
2652                        self.peek()
2653                    )));
2654                }
2655                self.advance();
2656                // v4.12: COUNT(*) OVER (...) — same window tail.
2657                let null_treatment = self.parse_null_treatment_modifier();
2658                if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
2659                    && s.eq_ignore_ascii_case("over")
2660                {
2661                    self.advance();
2662                    let (partition_by, order_by, frame) = self.parse_over_clause()?;
2663                    return Ok(Expr::WindowFunction {
2664                        name: "count_star".into(),
2665                        args: Vec::new(),
2666                        partition_by,
2667                        order_by,
2668                        frame,
2669                        null_treatment,
2670                    });
2671                }
2672                return Ok(Expr::FunctionCall {
2673                    name: "count_star".into(),
2674                    args: Vec::new(),
2675                });
2676            }
2677            // Function call. PG-style: zero-or-more comma-separated args.
2678            let mut args = Vec::new();
2679            if !matches!(self.peek(), Token::RParen) {
2680                loop {
2681                    args.push(self.parse_expr(0)?);
2682                    match self.peek() {
2683                        Token::Comma => {
2684                            self.advance();
2685                        }
2686                        Token::RParen => break,
2687                        other => {
2688                            return Err(self.err(format!(
2689                                "expected ',' or ')' in function args, got {other:?}"
2690                            )));
2691                        }
2692                    }
2693                }
2694            }
2695            self.advance(); // consume ')'
2696            // v4.12: window-function tail — `name(args) OVER (...)`.
2697            // Promotes the just-parsed FunctionCall into a
2698            // WindowFunction node carrying partition + order.
2699            // v6.4.2: also accepts `name(args) IGNORE NULLS OVER (...)`
2700            // / `RESPECT NULLS OVER (...)` between the closing paren
2701            // and `OVER`.
2702            let null_treatment = self.parse_null_treatment_modifier();
2703            if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
2704                && s.eq_ignore_ascii_case("over")
2705            {
2706                self.advance();
2707                let (partition_by, order_by, frame) = self.parse_over_clause()?;
2708                return Ok(Expr::WindowFunction {
2709                    name: first,
2710                    args,
2711                    partition_by,
2712                    order_by,
2713                    frame,
2714                    null_treatment,
2715                });
2716            }
2717            return Ok(Expr::FunctionCall { name: first, args });
2718        }
2719        Ok(Expr::Column(ColumnName {
2720            qualifier: None,
2721            name: first,
2722        }))
2723    }
2724}
2725
2726/// v6.8.2 — walk an expression tree and return the first column
2727/// reference's bare name. Used by `parse_create_index_stmt_after_create`
2728/// to derive `CreateIndexStatement.column` from an expression
2729/// key (so downstream planner code resolving a primary column
2730/// position keeps working with expression indexes). Returns
2731/// `None` when the expression has no column ref at all — caller
2732/// surfaces that as a parse error.
2733fn extract_first_column(expr: &Expr) -> Option<String> {
2734    match expr {
2735        Expr::Column(cn) => Some(cn.name.clone()),
2736        Expr::FunctionCall { args, .. } => args.iter().find_map(extract_first_column),
2737        Expr::Binary { lhs, rhs, .. } => {
2738            extract_first_column(lhs).or_else(|| extract_first_column(rhs))
2739        }
2740        Expr::Unary { expr: e, .. } => extract_first_column(e),
2741        _ => None,
2742    }
2743}
2744
2745fn maybe_not(expr: Expr, negated: bool) -> Expr {
2746    if negated {
2747        Expr::Unary {
2748            op: UnOp::Not,
2749            expr: Box::new(expr),
2750        }
2751    } else {
2752        expr
2753    }
2754}
2755
2756fn binop_from(tok: &Token) -> Option<(BinOp, u8)> {
2757    let pair = match tok {
2758        Token::Or => (BinOp::Or, 1),
2759        Token::And => (BinOp::And, 2),
2760        Token::Eq => (BinOp::Eq, 4),
2761        Token::NotEq => (BinOp::NotEq, 4),
2762        Token::Lt => (BinOp::Lt, 4),
2763        Token::LtEq => (BinOp::LtEq, 4),
2764        Token::Gt => (BinOp::Gt, 4),
2765        Token::GtEq => (BinOp::GtEq, 4),
2766        // pgvector distance ops all sit on the same rung — tighter than
2767        // comparisons (4) so `col <-> v < threshold` parses correctly.
2768        Token::L2Distance => (BinOp::L2Distance, 5),
2769        Token::InnerProduct => (BinOp::InnerProduct, 5),
2770        Token::CosineDistance => (BinOp::CosineDistance, 5),
2771        Token::Plus => (BinOp::Add, 6),
2772        Token::Minus => (BinOp::Sub, 6),
2773        // `||` sits beside `+`/`-` (matches PG conceptually — concat groups
2774        // by the same level as binary additive arithmetic).
2775        Token::Concat => (BinOp::Concat, 6),
2776        Token::Star => (BinOp::Mul, 7),
2777        Token::Slash => (BinOp::Div, 7),
2778        // v4.14: JSON path ops bind tighter than comparisons (4)
2779        // and additive (6) so `doc->'k' = 'v'` parses correctly.
2780        // Same rung as the multiplicative ops.
2781        Token::JsonGet => (BinOp::JsonGet, 7),
2782        Token::JsonGetText => (BinOp::JsonGetText, 7),
2783        Token::JsonGetPath => (BinOp::JsonGetPath, 7),
2784        Token::JsonGetPathText => (BinOp::JsonGetPathText, 7),
2785        Token::JsonContains => (BinOp::JsonContains, 7),
2786        _ => return None,
2787    };
2788    Some(pair)
2789}
2790
2791#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
2792// `as f32` here is intentional: vector elements widen / narrow into f32 on
2793// purpose. i64 → f32 loses precision past 2^24, f64 → f32 loses precision
2794// past ~15 decimal digits — both are acceptable for a fixed-precision
2795// pgvector column.
2796fn extract_numeric_literal(e: &Expr) -> Option<f32> {
2797    match e {
2798        Expr::Literal(Literal::Integer(n)) => Some(*n as f32),
2799        Expr::Literal(Literal::Float(x)) => Some(*x as f32),
2800        Expr::Unary {
2801            op: UnOp::Neg,
2802            expr,
2803        } => extract_numeric_literal(expr).map(|x| -x),
2804        _ => None,
2805    }
2806}
2807
2808/// Parse the text inside `INTERVAL '...'` into `(months, micros)`. Accepts
2809/// one or more `<n> <unit>` pairs separated by whitespace. `<n>` may be
2810/// negative. Returns `None` if any pair fails to parse or no pair is found.
2811///
2812/// Recognised units (case-insensitive, optional trailing `s`):
2813/// `microsecond`, `millisecond`, `second`, `minute`, `hour`, `day`, `week`,
2814/// `month`, `year`. `week` widens to 7 days; `year` widens to 12 months.
2815pub fn parse_interval_text(s: &str) -> Option<(i32, i64)> {
2816    let parts: Vec<&str> = s.split_whitespace().collect();
2817    if parts.is_empty() || !parts.len().is_multiple_of(2) {
2818        return None;
2819    }
2820    let mut months: i32 = 0;
2821    let mut micros: i64 = 0;
2822    let mut i = 0;
2823    while i < parts.len() {
2824        let n: i64 = parts[i].parse().ok()?;
2825        let unit = parts[i + 1].to_ascii_lowercase();
2826        let unit_stripped = unit.strip_suffix('s').unwrap_or(&unit);
2827        match unit_stripped {
2828            "microsecond" => micros = micros.checked_add(n)?,
2829            "millisecond" => micros = micros.checked_add(n.checked_mul(1_000)?)?,
2830            "second" => micros = micros.checked_add(n.checked_mul(1_000_000)?)?,
2831            "minute" => micros = micros.checked_add(n.checked_mul(60_000_000)?)?,
2832            "hour" => micros = micros.checked_add(n.checked_mul(3_600_000_000)?)?,
2833            "day" => micros = micros.checked_add(n.checked_mul(86_400_000_000)?)?,
2834            "week" => micros = micros.checked_add(n.checked_mul(604_800_000_000)?)?,
2835            "month" => {
2836                let n32 = i32::try_from(n).ok()?;
2837                months = months.checked_add(n32)?;
2838            }
2839            "year" => {
2840                let n32 = i32::try_from(n).ok()?;
2841                months = months.checked_add(n32.checked_mul(12)?)?;
2842            }
2843            _ => return None,
2844        }
2845        i += 2;
2846    }
2847    Some((months, micros))
2848}
2849
2850#[cfg(test)]
2851mod tests {
2852    use super::*;
2853    use alloc::string::ToString;
2854
2855    fn parse(s: &str) -> Statement {
2856        parse_statement(s).expect("parse ok")
2857    }
2858
2859    fn lit_int(n: i64) -> Expr {
2860        Expr::Literal(Literal::Integer(n))
2861    }
2862
2863    fn col(name: &str) -> Expr {
2864        Expr::Column(ColumnName {
2865            qualifier: None,
2866            name: name.into(),
2867        })
2868    }
2869
2870    #[test]
2871    fn select_single_integer() {
2872        let s = parse("SELECT 1");
2873        let Statement::Select(s) = s else {
2874            panic!("expected SELECT")
2875        };
2876        assert_eq!(s.items.len(), 1);
2877        assert!(s.from.is_none());
2878        assert!(s.where_.is_none());
2879    }
2880
2881    #[test]
2882    fn select_multiple_literal_kinds() {
2883        let s = parse("SELECT 1, 'hi', NULL, TRUE, 1.5");
2884        let Statement::Select(s) = s else {
2885            panic!("expected SELECT")
2886        };
2887        assert_eq!(s.items.len(), 5);
2888    }
2889
2890    #[test]
2891    fn select_wildcard_from_table() {
2892        let s = parse("SELECT * FROM users");
2893        let Statement::Select(s) = s else {
2894            panic!("expected SELECT")
2895        };
2896        assert!(matches!(s.items[..], [SelectItem::Wildcard]));
2897        assert_eq!(s.from.as_ref().unwrap().primary.name, "users");
2898    }
2899
2900    #[test]
2901    fn select_with_table_alias() {
2902        let s = parse("SELECT * FROM users AS u");
2903        let Statement::Select(s) = s else {
2904            panic!("expected SELECT")
2905        };
2906        let t = &s.from.as_ref().unwrap().primary;
2907        assert_eq!(t.name, "users");
2908        assert_eq!(t.alias.as_deref(), Some("u"));
2909    }
2910
2911    #[test]
2912    fn select_with_where_eq() {
2913        let s = parse("SELECT a FROM t WHERE a = 1");
2914        let Statement::Select(s) = s else {
2915            panic!("expected SELECT")
2916        };
2917        let w = s.where_.unwrap();
2918        assert_eq!(
2919            w,
2920            Expr::Binary {
2921                lhs: Box::new(col("a")),
2922                op: BinOp::Eq,
2923                rhs: Box::new(lit_int(1)),
2924            }
2925        );
2926    }
2927
2928    #[test]
2929    fn arithmetic_precedence() {
2930        let s = parse("SELECT 1 + 2 * 3");
2931        let Statement::Select(s) = s else {
2932            panic!("expected SELECT")
2933        };
2934        let SelectItem::Expr { expr, .. } = &s.items[0] else {
2935            panic!("wildcard?")
2936        };
2937        assert_eq!(
2938            expr,
2939            &Expr::Binary {
2940                lhs: Box::new(lit_int(1)),
2941                op: BinOp::Add,
2942                rhs: Box::new(Expr::Binary {
2943                    lhs: Box::new(lit_int(2)),
2944                    op: BinOp::Mul,
2945                    rhs: Box::new(lit_int(3)),
2946                }),
2947            }
2948        );
2949    }
2950
2951    #[test]
2952    fn parentheses_override_precedence() {
2953        let s = parse("SELECT (1 + 2) * 3");
2954        let Statement::Select(s) = s else {
2955            panic!("expected SELECT")
2956        };
2957        let SelectItem::Expr { expr, .. } = &s.items[0] else {
2958            panic!()
2959        };
2960        assert_eq!(
2961            expr,
2962            &Expr::Binary {
2963                lhs: Box::new(Expr::Binary {
2964                    lhs: Box::new(lit_int(1)),
2965                    op: BinOp::Add,
2966                    rhs: Box::new(lit_int(2)),
2967                }),
2968                op: BinOp::Mul,
2969                rhs: Box::new(lit_int(3)),
2970            }
2971        );
2972    }
2973
2974    #[test]
2975    fn not_binds_below_comparison() {
2976        // `NOT a = 1` should parse as `NOT (a = 1)`.
2977        let s = parse("SELECT NOT a = 1 FROM t");
2978        let Statement::Select(s) = s else {
2979            panic!("expected SELECT")
2980        };
2981        let SelectItem::Expr { expr, .. } = &s.items[0] else {
2982            panic!()
2983        };
2984        assert_eq!(
2985            expr,
2986            &Expr::Unary {
2987                op: UnOp::Not,
2988                expr: Box::new(Expr::Binary {
2989                    lhs: Box::new(col("a")),
2990                    op: BinOp::Eq,
2991                    rhs: Box::new(lit_int(1)),
2992                }),
2993            }
2994        );
2995    }
2996
2997    #[test]
2998    fn unary_minus_binds_above_multiplication() {
2999        // `-a * 2` should be `(-a) * 2`.
3000        let s = parse("SELECT -a * 2 FROM t");
3001        let Statement::Select(s) = s else {
3002            panic!("expected SELECT")
3003        };
3004        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3005            panic!()
3006        };
3007        assert_eq!(
3008            expr,
3009            &Expr::Binary {
3010                lhs: Box::new(Expr::Unary {
3011                    op: UnOp::Neg,
3012                    expr: Box::new(col("a")),
3013                }),
3014                op: BinOp::Mul,
3015                rhs: Box::new(lit_int(2)),
3016            }
3017        );
3018    }
3019
3020    #[test]
3021    fn qualified_column() {
3022        let s = parse("SELECT t.col FROM t");
3023        let Statement::Select(s) = s else {
3024            panic!("expected SELECT")
3025        };
3026        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3027            panic!()
3028        };
3029        assert_eq!(
3030            expr,
3031            &Expr::Column(ColumnName {
3032                qualifier: Some("t".into()),
3033                name: "col".into()
3034            })
3035        );
3036    }
3037
3038    #[test]
3039    fn select_item_alias_with_as() {
3040        let s = parse("SELECT a AS y FROM t");
3041        let Statement::Select(s) = s else {
3042            panic!("expected SELECT")
3043        };
3044        let SelectItem::Expr { alias, .. } = &s.items[0] else {
3045            panic!()
3046        };
3047        assert_eq!(alias.as_deref(), Some("y"));
3048    }
3049
3050    #[test]
3051    fn trailing_semicolon_accepted() {
3052        let s = parse("SELECT 1;");
3053        let Statement::Select(s) = s else {
3054            panic!("expected SELECT")
3055        };
3056        assert_eq!(s.items.len(), 1);
3057    }
3058
3059    #[test]
3060    fn boolean_chain_with_and_or_not() {
3061        // (NOT a) OR (b AND (NOT c))
3062        let s = parse("SELECT NOT a OR b AND NOT c FROM t");
3063        let Statement::Select(s) = s else {
3064            panic!("expected SELECT")
3065        };
3066        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3067            panic!()
3068        };
3069        let expected = Expr::Binary {
3070            lhs: Box::new(Expr::Unary {
3071                op: UnOp::Not,
3072                expr: Box::new(col("a")),
3073            }),
3074            op: BinOp::Or,
3075            rhs: Box::new(Expr::Binary {
3076                lhs: Box::new(col("b")),
3077                op: BinOp::And,
3078                rhs: Box::new(Expr::Unary {
3079                    op: UnOp::Not,
3080                    expr: Box::new(col("c")),
3081                }),
3082            }),
3083        };
3084        assert_eq!(expr, &expected);
3085    }
3086
3087    #[test]
3088    fn empty_input_errors() {
3089        let err = parse_statement("").unwrap_err();
3090        assert!(err.message.contains("SELECT"));
3091    }
3092
3093    #[test]
3094    fn unmatched_paren_errors() {
3095        assert!(parse_statement("SELECT (1 + 2").is_err());
3096    }
3097
3098    #[test]
3099    fn display_round_trip_simple_select() {
3100        let original = parse("SELECT a + 1 FROM t WHERE a > 0");
3101        let text = original.to_string();
3102        let again = parse_statement(&text).expect("re-parse");
3103        assert_eq!(original, again);
3104    }
3105
3106    // --- CREATE TABLE & INSERT (v0.3) ---------------------------------------
3107
3108    #[test]
3109    fn create_table_single_column() {
3110        let s = parse("CREATE TABLE foo (a INT)");
3111        let Statement::CreateTable(c) = s else {
3112            panic!("expected CreateTable")
3113        };
3114        assert_eq!(c.name, "foo");
3115        assert_eq!(c.columns.len(), 1);
3116        assert_eq!(c.columns[0].name, "a");
3117        assert_eq!(c.columns[0].ty, ColumnTypeName::Int);
3118        assert!(c.columns[0].nullable);
3119    }
3120
3121    #[test]
3122    fn create_table_multi_column_with_not_null_mix() {
3123        let s = parse("CREATE TABLE u (id INT NOT NULL, name TEXT, score FLOAT NOT NULL, ok BOOL)");
3124        let Statement::CreateTable(c) = s else {
3125            panic!()
3126        };
3127        assert_eq!(c.columns.len(), 4);
3128        assert_eq!(c.columns[0].ty, ColumnTypeName::Int);
3129        assert!(!c.columns[0].nullable);
3130        assert_eq!(c.columns[1].ty, ColumnTypeName::Text);
3131        assert!(c.columns[1].nullable);
3132        assert_eq!(c.columns[2].ty, ColumnTypeName::Float);
3133        assert!(!c.columns[2].nullable);
3134        assert_eq!(c.columns[3].ty, ColumnTypeName::Bool);
3135    }
3136
3137    #[test]
3138    fn create_table_bigint_supported() {
3139        let s = parse("CREATE TABLE accounts (id BIGINT NOT NULL)");
3140        let Statement::CreateTable(c) = s else {
3141            panic!()
3142        };
3143        assert_eq!(c.columns[0].ty, ColumnTypeName::BigInt);
3144    }
3145
3146    #[test]
3147    fn create_table_vector_default_is_f32() {
3148        let s = parse("CREATE TABLE t (v VECTOR(128))");
3149        let Statement::CreateTable(c) = s else {
3150            panic!()
3151        };
3152        assert_eq!(
3153            c.columns[0].ty,
3154            ColumnTypeName::Vector {
3155                dim: 128,
3156                encoding: VecEncoding::F32,
3157            },
3158        );
3159    }
3160
3161    #[test]
3162    fn create_table_vector_using_sq8() {
3163        // v6.0.1: `USING SQ8` selects scalar-quantised encoding.
3164        // Case-insensitive on both `USING` and the encoding name.
3165        for sql in [
3166            "CREATE TABLE t (v VECTOR(128) USING SQ8)",
3167            "CREATE TABLE t (v VECTOR(128) using sq8)",
3168        ] {
3169            let s = parse(sql);
3170            let Statement::CreateTable(c) = s else {
3171                panic!()
3172            };
3173            assert_eq!(
3174                c.columns[0].ty,
3175                ColumnTypeName::Vector {
3176                    dim: 128,
3177                    encoding: VecEncoding::Sq8,
3178                },
3179                "{sql}",
3180            );
3181        }
3182    }
3183
3184    #[test]
3185    fn create_table_vector_using_unknown_errors() {
3186        let err = parse_statement("CREATE TABLE t (v VECTOR(8) USING PQ8)").unwrap_err();
3187        assert!(
3188            err.message.contains("unknown vector encoding"),
3189            "got: {}",
3190            err.message
3191        );
3192    }
3193
3194    #[test]
3195    fn vector_using_sq8_display_roundtrips() {
3196        // The Display impl must produce text that re-parses to the
3197        // same AST. Guard for the v6.0.1 `USING SQ8` suffix.
3198        let s = parse("CREATE TABLE t (v VECTOR(64) USING SQ8)");
3199        let Statement::CreateTable(c) = s else {
3200            panic!()
3201        };
3202        assert_eq!(c.columns[0].ty.to_string(), "VECTOR(64) USING SQ8");
3203    }
3204
3205    #[test]
3206    fn parser_recognises_placeholders() {
3207        use crate::ast::{Expr, SelectItem, Statement};
3208        // $N in expression position parses as Expr::Placeholder(N).
3209        let s = parse("SELECT $1, $2 + 1 FROM t WHERE x = $3");
3210        let Statement::Select(sel) = s else { panic!() };
3211        assert!(matches!(
3212            sel.items[0],
3213            SelectItem::Expr {
3214                expr: Expr::Placeholder(1),
3215                alias: None
3216            }
3217        ));
3218        // $2 + 1
3219        let SelectItem::Expr {
3220            expr: Expr::Binary { lhs, rhs, .. },
3221            ..
3222        } = &sel.items[1]
3223        else {
3224            panic!()
3225        };
3226        assert!(matches!(**lhs, Expr::Placeholder(2)));
3227        assert!(matches!(**rhs, Expr::Literal(Literal::Integer(1))));
3228        // WHERE x = $3
3229        let Some(Expr::Binary { rhs, .. }) = sel.where_.as_ref() else {
3230            panic!()
3231        };
3232        assert!(matches!(**rhs, Expr::Placeholder(3)));
3233    }
3234
3235    #[test]
3236    fn parser_rejects_dollar_zero() {
3237        // $0 is not valid in PG; the lexer rejects it.
3238        assert!(parse_statement("SELECT $0").is_err());
3239    }
3240
3241    #[test]
3242    fn placeholder_display_roundtrips() {
3243        // The Display impl must produce text that re-lexes to the
3244        // same Placeholder token.
3245        let s = parse("SELECT $42 FROM t");
3246        let printed = s.to_string();
3247        assert!(printed.contains("$42"));
3248        let again = parse(&printed);
3249        assert_eq!(s, again);
3250    }
3251
3252    #[test]
3253    fn alter_index_rebuild_bare() {
3254        use crate::ast::{AlterIndexTarget, Statement};
3255        let s = parse("ALTER INDEX my_idx REBUILD");
3256        let Statement::AlterIndex(a) = s else {
3257            panic!("expected AlterIndex, got {s:?}")
3258        };
3259        assert_eq!(a.name, "my_idx");
3260        assert_eq!(a.target, AlterIndexTarget::Rebuild { encoding: None });
3261    }
3262
3263    #[test]
3264    fn alter_index_rebuild_with_encoding() {
3265        use crate::ast::{AlterIndexTarget, Statement};
3266        for (sql, want) in [
3267            (
3268                "ALTER INDEX my_idx REBUILD WITH (encoding = F32)",
3269                VecEncoding::F32,
3270            ),
3271            (
3272                "ALTER INDEX my_idx REBUILD WITH (encoding = sq8)",
3273                VecEncoding::Sq8,
3274            ),
3275            (
3276                "ALTER INDEX my_idx REBUILD WITH (encoding = HALF)",
3277                VecEncoding::F16,
3278            ),
3279        ] {
3280            let s = parse(sql);
3281            let Statement::AlterIndex(a) = s else {
3282                panic!("{sql}: expected AlterIndex")
3283            };
3284            assert_eq!(a.name, "my_idx");
3285            assert_eq!(
3286                a.target,
3287                AlterIndexTarget::Rebuild {
3288                    encoding: Some(want)
3289                },
3290                "{sql}"
3291            );
3292        }
3293    }
3294
3295    #[test]
3296    fn alter_index_rebuild_unknown_encoding_errors() {
3297        let err = parse_statement("ALTER INDEX my_idx REBUILD WITH (encoding = PQ8)").unwrap_err();
3298        assert!(
3299            err.message.contains("unknown vector encoding"),
3300            "got: {}",
3301            err.message
3302        );
3303    }
3304
3305    #[test]
3306    fn alter_index_rebuild_display_roundtrips() {
3307        for (input, want) in [
3308            ("ALTER INDEX my_idx REBUILD", "ALTER INDEX my_idx REBUILD"),
3309            (
3310                "ALTER INDEX my_idx REBUILD WITH (encoding = SQ8)",
3311                "ALTER INDEX my_idx REBUILD WITH (encoding = SQ8)",
3312            ),
3313            (
3314                "ALTER INDEX my_idx REBUILD WITH (encoding = HALF)",
3315                "ALTER INDEX my_idx REBUILD WITH (encoding = HALF)",
3316            ),
3317        ] {
3318            let s = parse(input);
3319            assert_eq!(s.to_string(), want);
3320        }
3321    }
3322
3323    #[test]
3324    fn create_table_unknown_type_errors() {
3325        // v4.9: JSON is now real; pick an actually unsupported keyword
3326        // (XML never landed and isn't planned).
3327        let err = parse_statement("CREATE TABLE x (a xml)").unwrap_err();
3328        assert!(err.message.contains("unsupported column type"));
3329    }
3330
3331    #[test]
3332    fn create_table_missing_table_keyword_errors() {
3333        assert!(parse_statement("CREATE x (a INT)").is_err());
3334    }
3335
3336    #[test]
3337    fn insert_single_value() {
3338        let s = parse("INSERT INTO foo VALUES (42)");
3339        let Statement::Insert(i) = s else {
3340            panic!("expected Insert")
3341        };
3342        assert_eq!(i.table, "foo");
3343        assert_eq!(i.rows.len(), 1);
3344        assert_eq!(i.rows[0].len(), 1);
3345        assert!(matches!(i.rows[0][0], Expr::Literal(Literal::Integer(42))));
3346    }
3347
3348    #[test]
3349    fn insert_multi_value_with_mixed_literals() {
3350        let s = parse("INSERT INTO foo VALUES (1, 'hi', 3.14, TRUE, NULL)");
3351        let Statement::Insert(i) = s else { panic!() };
3352        assert_eq!(i.rows.len(), 1);
3353        assert_eq!(i.rows[0].len(), 5);
3354    }
3355
3356    #[test]
3357    fn insert_missing_into_errors() {
3358        assert!(parse_statement("INSERT foo VALUES (1)").is_err());
3359    }
3360
3361    #[test]
3362    fn create_table_round_trip() {
3363        let original =
3364            parse("CREATE TABLE foo (id BIGINT NOT NULL, label TEXT, score FLOAT NOT NULL)");
3365        let text = original.to_string();
3366        let again = parse_statement(&text).expect("re-parse");
3367        assert_eq!(original, again);
3368    }
3369
3370    #[test]
3371    fn insert_round_trip_with_negation_and_string() {
3372        let original = parse("INSERT INTO t VALUES (-1, 'it''s', NULL)");
3373        let text = original.to_string();
3374        let again = parse_statement(&text).expect("re-parse");
3375        assert_eq!(original, again);
3376    }
3377
3378    #[test]
3379    fn unknown_keyword_at_statement_start_errors() {
3380        // v4.4: UPDATE is real SQL now. Use a fabricated keyword so
3381        // the top-level dispatch still has no branch to take.
3382        let err = parse_statement("FROBNICATE foo SET x = 1").unwrap_err();
3383        assert!(err.message.contains("expected SELECT"));
3384    }
3385
3386    // --- v0.8 CREATE INDEX --------------------------------------------------
3387
3388    #[test]
3389    fn create_index_basic() {
3390        let s = parse("CREATE INDEX idx_id ON users (id)");
3391        let Statement::CreateIndex(c) = s else {
3392            panic!("expected CreateIndex")
3393        };
3394        assert_eq!(c.name, "idx_id");
3395        assert_eq!(c.table, "users");
3396        assert_eq!(c.column, "id");
3397    }
3398
3399    #[test]
3400    fn create_index_missing_on_errors() {
3401        assert!(parse_statement("CREATE INDEX foo users (id)").is_err());
3402    }
3403
3404    #[test]
3405    fn create_index_missing_paren_errors() {
3406        assert!(parse_statement("CREATE INDEX foo ON users id").is_err());
3407    }
3408
3409    #[test]
3410    fn create_index_round_trip() {
3411        let original = parse("CREATE INDEX by_name ON users (name)");
3412        let again = parse_statement(&original.to_string()).unwrap();
3413        assert_eq!(original, again);
3414    }
3415
3416    // --- v0.9 transactions -------------------------------------------------
3417
3418    #[test]
3419    fn begin_commit_rollback_parse_as_unit_variants() {
3420        assert_eq!(parse("BEGIN"), Statement::Begin);
3421        assert_eq!(parse("COMMIT"), Statement::Commit);
3422        assert_eq!(parse("ROLLBACK"), Statement::Rollback);
3423        // Trailing semicolons accepted too.
3424        assert_eq!(parse("BEGIN;"), Statement::Begin);
3425    }
3426
3427    // --- v1.2: pgvector distance ops + ::vector cast --------------------
3428
3429    #[test]
3430    fn inner_product_binop_parses() {
3431        let s = parse("SELECT v <#> [1.0, 2.0] FROM t");
3432        let Statement::Select(s) = s else { panic!() };
3433        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3434            panic!()
3435        };
3436        assert!(matches!(
3437            expr,
3438            Expr::Binary {
3439                op: BinOp::InnerProduct,
3440                ..
3441            }
3442        ));
3443    }
3444
3445    #[test]
3446    fn cosine_distance_binop_parses() {
3447        let s = parse("SELECT v <=> [1.0, 2.0] FROM t");
3448        let Statement::Select(s) = s else { panic!() };
3449        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3450            panic!()
3451        };
3452        assert!(matches!(
3453            expr,
3454            Expr::Binary {
3455                op: BinOp::CosineDistance,
3456                ..
3457            }
3458        ));
3459    }
3460
3461    #[test]
3462    fn vector_cast_postfix_wraps_string_literal() {
3463        let s = parse("SELECT '[1,2,3]'::vector FROM t");
3464        let Statement::Select(s) = s else { panic!() };
3465        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3466            panic!()
3467        };
3468        assert!(matches!(
3469            expr,
3470            Expr::Cast {
3471                target: CastTarget::Vector,
3472                ..
3473            }
3474        ));
3475    }
3476
3477    #[test]
3478    fn unsupported_cast_target_errors() {
3479        // `::numeric` isn't in the v1.3 cast target set.
3480        let err = parse_statement("SELECT 1::numeric FROM t").unwrap_err();
3481        assert!(err.message.contains("unsupported cast target"));
3482    }
3483
3484    #[test]
3485    fn tx_statements_round_trip() {
3486        for q in ["BEGIN", "COMMIT", "ROLLBACK"] {
3487            let original = parse(q);
3488            let again = parse_statement(&original.to_string()).unwrap();
3489            assert_eq!(original, again);
3490        }
3491    }
3492
3493    #[test]
3494    fn interval_text_parsing_units() {
3495        // Single unit.
3496        assert_eq!(parse_interval_text("1 day"), Some((0, 86_400_000_000)));
3497        assert_eq!(parse_interval_text("1 second"), Some((0, 1_000_000)));
3498        assert_eq!(parse_interval_text("1 month"), Some((1, 0)));
3499        assert_eq!(parse_interval_text("2 years"), Some((24, 0)));
3500        // Compound spans accumulate.
3501        assert_eq!(parse_interval_text("1 year 6 months"), Some((18, 0)));
3502        assert_eq!(
3503            parse_interval_text("1 day 2 hours"),
3504            Some((0, 86_400_000_000 + 7_200_000_000))
3505        );
3506        // Negative numbers carry through.
3507        assert_eq!(parse_interval_text("-1 day"), Some((0, -86_400_000_000)));
3508        // Bad shapes return None.
3509        assert_eq!(parse_interval_text(""), None);
3510        assert_eq!(parse_interval_text("garbage"), None);
3511        assert_eq!(parse_interval_text("1 fortnight"), None);
3512        assert_eq!(parse_interval_text("1"), None);
3513    }
3514
3515    #[test]
3516    fn interval_literal_roundtrips_via_display() {
3517        let parsed = parse("SELECT INTERVAL '1 day 2 hours'");
3518        let s = parsed.to_string();
3519        // Display preserves the original text verbatim.
3520        assert!(s.contains("INTERVAL '1 day 2 hours'"), "got: {s}");
3521        // And re-parsing yields a structurally equal statement.
3522        let again = parse_statement(&s).unwrap();
3523        assert_eq!(parsed, again);
3524    }
3525
3526    // ── v6.1.2: CREATE / DROP PUBLICATION ────────────────────
3527
3528    #[test]
3529    fn parser_recognises_create_publication_bare() {
3530        let s = parse("CREATE PUBLICATION pub_a");
3531        let Statement::CreatePublication(p) = s else {
3532            panic!("expected CreatePublication, got {s:?}")
3533        };
3534        assert_eq!(p.name, "pub_a");
3535        assert_eq!(p.scope, PublicationScope::AllTables);
3536    }
3537
3538    #[test]
3539    fn parser_recognises_create_publication_for_all_tables() {
3540        let s = parse("CREATE PUBLICATION pub_a FOR ALL TABLES");
3541        let Statement::CreatePublication(p) = s else {
3542            panic!("expected CreatePublication, got {s:?}")
3543        };
3544        assert_eq!(p.name, "pub_a");
3545        assert_eq!(p.scope, PublicationScope::AllTables);
3546    }
3547
3548    #[test]
3549    fn parser_recognises_drop_publication() {
3550        let s = parse("DROP PUBLICATION pub_a");
3551        let Statement::DropPublication(name) = s else {
3552            panic!("expected DropPublication, got {s:?}")
3553        };
3554        assert_eq!(name, "pub_a");
3555    }
3556
3557    #[test]
3558    fn parser_recognises_for_table_list() {
3559        let s = parse("CREATE PUBLICATION pub_a FOR TABLE t1, t2, t3");
3560        let Statement::CreatePublication(p) = s else {
3561            panic!("expected CreatePublication, got {s:?}")
3562        };
3563        assert_eq!(p.name, "pub_a");
3564        let PublicationScope::ForTables(ts) = p.scope else {
3565            panic!("expected ForTables scope")
3566        };
3567        assert_eq!(ts, alloc::vec!["t1", "t2", "t3"]);
3568    }
3569
3570    #[test]
3571    fn parser_recognises_for_tables_plural() {
3572        // PG 19 accepts both `FOR TABLE` and `FOR TABLES` — match.
3573        let s = parse("CREATE PUBLICATION pub_a FOR TABLES t1, t2");
3574        let Statement::CreatePublication(p) = s else {
3575            panic!("expected CreatePublication, got {s:?}")
3576        };
3577        let PublicationScope::ForTables(ts) = p.scope else {
3578            panic!("expected ForTables")
3579        };
3580        assert_eq!(ts, alloc::vec!["t1", "t2"]);
3581    }
3582
3583    #[test]
3584    fn parser_recognises_for_all_tables_except_list() {
3585        let s = parse("CREATE PUBLICATION p FOR ALL TABLES EXCEPT t1, t2");
3586        let Statement::CreatePublication(p) = s else {
3587            panic!()
3588        };
3589        let PublicationScope::AllTablesExcept(ts) = p.scope else {
3590            panic!("expected AllTablesExcept")
3591        };
3592        assert_eq!(ts, alloc::vec!["t1", "t2"]);
3593    }
3594
3595    #[test]
3596    fn parser_rejects_for_table_with_empty_list() {
3597        // `FOR TABLE` with nothing after is a parse error.
3598        let err = parse_statement("CREATE PUBLICATION p FOR TABLE")
3599            .expect_err("must error on empty list");
3600        // No specific message asserted — the call falls through to
3601        // expect_ident_like which yields "expected identifier, got …".
3602        assert!(!err.message.is_empty());
3603    }
3604
3605    #[test]
3606    fn parser_recognises_show_publications() {
3607        // v6.1.3 — SHOW PUBLICATIONS lands here. PUBLICATIONS is a
3608        // bare ident in this position, NOT a reserved keyword.
3609        let s = parse("SHOW PUBLICATIONS");
3610        assert!(matches!(s, Statement::ShowPublications));
3611    }
3612
3613    // ── v6.1.4: CREATE / DROP SUBSCRIPTION + SHOW SUBSCRIPTIONS ─
3614
3615    #[test]
3616    fn parser_recognises_create_subscription_single_publication() {
3617        let s = parse("CREATE SUBSCRIPTION sub_a CONNECTION 'host=127.0.0.1 port=20002' PUBLICATION pub_a");
3618        let Statement::CreateSubscription(c) = s else {
3619            panic!("expected CreateSubscription, got {s:?}")
3620        };
3621        assert_eq!(c.name, "sub_a");
3622        assert_eq!(c.conn_str, "host=127.0.0.1 port=20002");
3623        assert_eq!(c.publications, alloc::vec!["pub_a"]);
3624    }
3625
3626    #[test]
3627    fn parser_recognises_create_subscription_multi_publication() {
3628        let s = parse(
3629            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=h' PUBLICATION p1, p2, p3",
3630        );
3631        let Statement::CreateSubscription(c) = s else {
3632            panic!()
3633        };
3634        assert_eq!(c.publications, alloc::vec!["p1", "p2", "p3"]);
3635    }
3636
3637    #[test]
3638    fn parser_rejects_create_subscription_missing_connection() {
3639        let err = parse_statement("CREATE SUBSCRIPTION s PUBLICATION p")
3640            .expect_err("must error on missing CONNECTION");
3641        assert!(err.message.contains("CONNECTION"), "got: {}", err.message);
3642    }
3643
3644    #[test]
3645    fn parser_rejects_create_subscription_missing_publication() {
3646        let err = parse_statement("CREATE SUBSCRIPTION s CONNECTION 'host=x'")
3647            .expect_err("must error on missing PUBLICATION");
3648        assert!(err.message.contains("PUBLICATION"), "got: {}", err.message);
3649    }
3650
3651    #[test]
3652    fn parser_recognises_drop_subscription() {
3653        let s = parse("DROP SUBSCRIPTION sub_a");
3654        let Statement::DropSubscription(name) = s else {
3655            panic!("expected DropSubscription, got {s:?}")
3656        };
3657        assert_eq!(name, "sub_a");
3658    }
3659
3660    #[test]
3661    fn parser_recognises_show_subscriptions() {
3662        let s = parse("SHOW SUBSCRIPTIONS");
3663        assert!(matches!(s, Statement::ShowSubscriptions));
3664    }
3665
3666    #[test]
3667    fn parser_recognises_wait_for_wal_position_no_timeout() {
3668        let s = parse("WAIT FOR WAL POSITION 12345");
3669        let Statement::WaitForWalPosition { pos, timeout_ms } = s else {
3670            panic!("expected WaitForWalPosition, got {s:?}")
3671        };
3672        assert_eq!(pos, 12345);
3673        assert!(timeout_ms.is_none());
3674    }
3675
3676    #[test]
3677    fn parser_recognises_wait_for_wal_position_with_timeout() {
3678        let s = parse("WAIT FOR WAL POSITION 67890 WITH TIMEOUT 5000");
3679        let Statement::WaitForWalPosition { pos, timeout_ms } = s else {
3680            panic!()
3681        };
3682        assert_eq!(pos, 67890);
3683        assert_eq!(timeout_ms, Some(5000));
3684    }
3685
3686    #[test]
3687    fn parser_rejects_wait_with_negative_position() {
3688        // The lexer treats `-` as a token; `expect_u64_literal`
3689        // only sees the Integer that follows, so the negative
3690        // arrives as a unary-minus expression at higher levels.
3691        // Bare `WAIT FOR WAL POSITION -1` thus surfaces as a
3692        // parse error one way or another.
3693        let err = parse_statement("WAIT FOR WAL POSITION -1").unwrap_err();
3694        assert!(!err.message.is_empty());
3695    }
3696
3697    #[test]
3698    fn parser_recognises_bare_analyze() {
3699        let s = parse("ANALYZE");
3700        assert!(matches!(s, Statement::Analyze(None)));
3701    }
3702
3703    #[test]
3704    fn parser_recognises_analyze_with_table() {
3705        let s = parse("ANALYZE users");
3706        let Statement::Analyze(Some(name)) = s else {
3707            panic!("expected Analyze, got {s:?}")
3708        };
3709        assert_eq!(name, "users");
3710    }
3711
3712    #[test]
3713    fn parser_recognises_analyze_with_quoted_table() {
3714        let s = parse("ANALYZE \"Mixed Case\"");
3715        let Statement::Analyze(Some(name)) = s else {
3716            panic!()
3717        };
3718        assert_eq!(name, "Mixed Case");
3719    }
3720
3721    #[test]
3722    fn parser_rejects_analyze_with_garbage_token() {
3723        let err = parse_statement("ANALYZE 42").expect_err("must error");
3724        assert!(!err.message.is_empty());
3725    }
3726
3727    #[test]
3728    fn analyze_display_roundtrips() {
3729        for sql in ["ANALYZE", "ANALYZE users"] {
3730            let s = parse(sql);
3731            let printed = s.to_string();
3732            let again = parse_statement(&printed)
3733                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
3734            assert_eq!(s, again);
3735        }
3736    }
3737
3738    #[test]
3739    fn wait_for_display_roundtrips() {
3740        for sql in [
3741            "WAIT FOR WAL POSITION 12345",
3742            "WAIT FOR WAL POSITION 67890 WITH TIMEOUT 5000",
3743        ] {
3744            let s = parse(sql);
3745            let printed = s.to_string();
3746            let again = parse_statement(&printed)
3747                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
3748            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
3749        }
3750    }
3751
3752    #[test]
3753    fn subscription_ddl_display_roundtrips() {
3754        for sql in [
3755            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=h port=20002' PUBLICATION pub_a",
3756            "CREATE SUBSCRIPTION sub_b CONNECTION 'host=h' PUBLICATION p1, p2",
3757            "DROP SUBSCRIPTION sub_a",
3758            "SHOW SUBSCRIPTIONS",
3759        ] {
3760            let s = parse(sql);
3761            let printed = s.to_string();
3762            let again = parse_statement(&printed)
3763                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
3764            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
3765        }
3766    }
3767
3768    #[test]
3769    fn parser_drop_dispatches_user_vs_publication() {
3770        // Pre-v6.1.2 DROP USER took the bare-ident path; v6.1.2
3771        // tokenises DROP. Both targets must still parse.
3772        let s = parse("DROP USER 'alice'");
3773        let Statement::DropUser(name) = s else {
3774            panic!("expected DropUser, got {s:?}")
3775        };
3776        assert_eq!(name, "alice");
3777        // And DROP PUBLICATION lands the new variant.
3778        let s = parse("DROP PUBLICATION p1");
3779        assert!(matches!(s, Statement::DropPublication(_)));
3780    }
3781
3782    #[test]
3783    fn publication_ddl_display_roundtrips() {
3784        // Every CREATE PUBLICATION variant must Display → parse →
3785        // same AST. v6.1.3 covers all three scope shapes.
3786        for sql in [
3787            "CREATE PUBLICATION pub_a",
3788            "CREATE PUBLICATION pub_a FOR ALL TABLES",
3789            "CREATE PUBLICATION pub_a FOR TABLE t1, t2",
3790            "CREATE PUBLICATION pub_a FOR ALL TABLES EXCEPT t1",
3791            "DROP PUBLICATION pub_a",
3792            "SHOW PUBLICATIONS",
3793        ] {
3794            let s = parse(sql);
3795            let printed = s.to_string();
3796            let again = parse_statement(&printed)
3797                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
3798            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
3799        }
3800    }
3801}