Skip to main content

spg_sql/
parser.rs

1//! Recursive-descent parser with a Pratt (precedence-climbing) sub-parser for
2//! expressions.
3//!
4//! Precedence (lowest → highest binding):
5//! `OR` (1) `<` `AND` (2) `<` `NOT` unary (3) `<`
6//! comparisons `=` `<>` `<` `<=` `>` `>=` (4) `<`
7//! `+` `-` (5) `<` `*` `/` (6) `<` unary `-` (7) `<` parens / atom.
8//!
9//! This matches PG's behaviour for the operators we support — e.g. `NOT a = b`
10//! parses as `NOT (a = b)` and `-a * b` as `(-a) * b`.
11
12use alloc::boxed::Box;
13use alloc::format;
14use alloc::string::{String, ToString};
15use alloc::vec;
16use alloc::vec::Vec;
17use core::fmt;
18use core::mem;
19
20use crate::ast::{
21    BinOp, CastTarget, ColumnDef, ColumnName, ColumnTypeName, CreateIndexStatement,
22    CreatePublicationStatement, CreateSubscriptionStatement, CreateTableStatement, Expr,
23    ExtractField, FkAction, ForeignKeyConstraint, FrameBound, FrameKind, FromClause, FromJoin,
24    IndexMethod, InsertStatement, JoinKind, Literal, NullTreatment, OrderBy, PublicationScope,
25    SelectItem, SelectStatement, Statement, TableRef, UnOp, UnionKind, VecEncoding, WindowFrame,
26};
27use crate::lexer::{self, LexError, Token};
28
29/// v7.9.22 — recognise pgvector / SPG vector-index opclass names
30/// in CREATE INDEX. SPG's HNSW already routes by query operator;
31/// the opclass is accepted for `pg_dump` compatibility (mailrs
32/// migration follow-up G5).
33fn is_vector_opclass_name(name: &str) -> bool {
34    let lc = name.to_ascii_lowercase();
35    matches!(
36        lc.as_str(),
37        "vector_cosine_ops"
38            | "vector_l2_ops"
39            | "vector_ip_ops"
40            | "halfvec_cosine_ops"
41            | "halfvec_l2_ops"
42            | "halfvec_ip_ops"
43            | "sq8_cosine_ops"
44            | "sq8_l2_ops"
45            | "sq8_ip_ops"
46    )
47}
48
49#[derive(Debug, Clone, PartialEq, Eq)]
50pub struct ParseError {
51    pub message: String,
52    /// Index into the token stream where parsing tripped. Not a byte offset.
53    pub token_pos: usize,
54}
55
56impl fmt::Display for ParseError {
57    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
58        write!(
59            f,
60            "parse error at token #{}: {}",
61            self.token_pos, self.message
62        )
63    }
64}
65
66impl From<LexError> for ParseError {
67    fn from(e: LexError) -> Self {
68        Self {
69            message: format!("lex: {e}"),
70            token_pos: 0,
71        }
72    }
73}
74
75/// Parse exactly one statement, swallow an optional trailing `;`, and require
76/// the token stream to end there.
77pub fn parse_statement(input: &str) -> Result<Statement, ParseError> {
78    let tokens = lexer::tokenize(input)?;
79    let mut p = Parser::new(tokens);
80    let stmt = p.parse_one_statement()?;
81    if matches!(p.peek(), Token::Semicolon) {
82        p.advance();
83    }
84    p.expect_eof()?;
85    Ok(stmt)
86}
87
88struct Parser {
89    tokens: Vec<Token>,
90    pos: usize,
91}
92
93impl Parser {
94    fn new(tokens: Vec<Token>) -> Self {
95        Self { tokens, pos: 0 }
96    }
97
98    fn peek(&self) -> &Token {
99        // tokens always ends with Eof; pos is clamped in advance().
100        &self.tokens[self.pos]
101    }
102
103    fn advance(&mut self) -> Token {
104        let t = mem::replace(&mut self.tokens[self.pos], Token::Eof);
105        if self.pos + 1 < self.tokens.len() {
106            self.pos += 1;
107        }
108        t
109    }
110
111    fn err(&self, message: String) -> ParseError {
112        ParseError {
113            message,
114            token_pos: self.pos,
115        }
116    }
117
118    fn expect_eof(&self) -> Result<(), ParseError> {
119        if matches!(self.peek(), Token::Eof) {
120            Ok(())
121        } else {
122            Err(self.err(format!("expected end of input, got {:?}", self.peek())))
123        }
124    }
125
126    fn expect_ident_like(&mut self) -> Result<String, ParseError> {
127        match self.advance() {
128            Token::Ident(s) | Token::QuotedIdent(s) => Ok(s),
129            other => Err(ParseError {
130                message: format!("expected identifier, got {other:?}"),
131                token_pos: self.pos.saturating_sub(1),
132            }),
133        }
134    }
135
136    #[allow(clippy::too_many_lines)]
137    fn parse_one_statement(&mut self) -> Result<Statement, ParseError> {
138        match self.peek() {
139            Token::Select => self.parse_select_stmt(),
140            // v4.11: `WITH name AS (SELECT ...) [, ...] SELECT ...`.
141            // WITH isn't a reserved token in our lexer — comes through
142            // as `Token::Ident("with")` (case-insensitive).
143            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("with") => {
144                self.advance();
145                self.parse_with_cte_then_select()
146            }
147            // v4.26: `EXPLAIN [ANALYZE] <select>`. Comes through as
148            // an identifier — not a reserved keyword.
149            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("explain") => {
150                self.advance();
151                let mut analyze = false;
152                let mut suggest = false;
153                // v6.8.3 — `EXPLAIN (SUGGEST)` opt-in.
154                if matches!(self.peek(), Token::LParen) {
155                    self.advance();
156                    let opt = match self.peek().clone() {
157                        Token::Ident(s) | Token::QuotedIdent(s) => s,
158                        other => {
159                            return Err(self.err(format!(
160                                "expected option keyword inside EXPLAIN (…), got {other:?}"
161                            )));
162                        }
163                    };
164                    if !opt.eq_ignore_ascii_case("suggest") {
165                        return Err(self.err(format!(
166                            "unknown EXPLAIN option {opt:?}; v6.8.3 supports SUGGEST"
167                        )));
168                    }
169                    self.advance();
170                    if !matches!(self.peek(), Token::RParen) {
171                        return Err(self.err(format!(
172                            "expected ')' after EXPLAIN option, got {:?}",
173                            self.peek()
174                        )));
175                    }
176                    self.advance();
177                    suggest = true;
178                } else if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
179                    && (s.eq_ignore_ascii_case("analyze") || s.eq_ignore_ascii_case("analyse"))
180                {
181                    self.advance();
182                    analyze = true;
183                }
184                let inner = self.parse_select_stmt()?;
185                let Statement::Select(s) = inner else {
186                    return Err(self.err(format!("EXPLAIN body must be a SELECT, got {inner:?}")));
187                };
188                Ok(Statement::Explain(crate::ast::ExplainStatement {
189                    analyze,
190                    inner: Box::new(s),
191                    suggest,
192                }))
193            }
194            Token::Create => self.parse_create_stmt(),
195            Token::Insert => self.parse_insert_stmt(),
196            Token::Begin => {
197                self.advance();
198                Ok(Statement::Begin)
199            }
200            Token::Commit => {
201                self.advance();
202                Ok(Statement::Commit)
203            }
204            Token::Rollback => {
205                self.advance();
206                // `ROLLBACK TO [SAVEPOINT] <name>` returns to that
207                // savepoint without ending the transaction. Bare
208                // `ROLLBACK` drops the whole TX.
209                if matches!(self.peek(), Token::To) {
210                    self.advance();
211                    if matches!(self.peek(), Token::Savepoint) {
212                        self.advance();
213                    }
214                    let name = self.expect_ident_like()?;
215                    Ok(Statement::RollbackToSavepoint(name))
216                } else {
217                    Ok(Statement::Rollback)
218                }
219            }
220            Token::Savepoint => {
221                self.advance();
222                let name = self.expect_ident_like()?;
223                Ok(Statement::Savepoint(name))
224            }
225            Token::Release => {
226                self.advance();
227                // `RELEASE [SAVEPOINT] <name>` — the `SAVEPOINT` keyword
228                // is optional in standard SQL.
229                if matches!(self.peek(), Token::Savepoint) {
230                    self.advance();
231                }
232                let name = self.expect_ident_like()?;
233                Ok(Statement::ReleaseSavepoint(name))
234            }
235            Token::Show => {
236                self.advance();
237                // `SHOW TABLES` / `SHOW USERS` / `SHOW COLUMNS FROM <table>`.
238                // v6.1.2 promoted TABLES to a reserved keyword (for
239                // `CREATE PUBLICATION … FOR ALL TABLES`), so it now
240                // arrives as `Token::Tables` rather than a bare ident.
241                // USERS / COLUMNS remain bare idents.
242                let target = match self.advance() {
243                    Token::Tables => "tables".to_string(),
244                    Token::Ident(s) | Token::QuotedIdent(s) => s.to_ascii_lowercase(),
245                    other => {
246                        return Err(self.err(format!(
247                            "expected SHOW target, got {other:?}"
248                        )));
249                    }
250                };
251                match target.as_str() {
252                    "tables" => Ok(Statement::ShowTables),
253                    "users" => Ok(Statement::ShowUsers),
254                    // v6.1.3 — PUBLICATIONS plural is NOT a reserved
255                    // keyword on its own; it lands here as a bare
256                    // ident. Returning all publications + their
257                    // scope summary.
258                    "publications" => Ok(Statement::ShowPublications),
259                    // v6.1.4 — same shape for SUBSCRIPTIONS plural.
260                    "subscriptions" => Ok(Statement::ShowSubscriptions),
261                    "columns" => {
262                        if !matches!(self.peek(), Token::From) {
263                            return Err(self.err(format!(
264                                "expected FROM after SHOW COLUMNS, got {:?}",
265                                self.peek()
266                            )));
267                        }
268                        self.advance();
269                        let table = self.expect_ident_like()?;
270                        Ok(Statement::ShowColumns(table))
271                    }
272                    other => Err(self.err(format!(
273                        "unknown SHOW target {other:?}; supported: TABLES, COLUMNS, USERS, PUBLICATIONS"
274                    ))),
275                }
276            }
277            // v6.1.2: `DROP` is now a reserved keyword (it dispatches
278            // to DROP USER and DROP PUBLICATION today; DROP TABLE /
279            // DROP INDEX are still SHOW-shaped admin ops). Pre-6.1.2
280            // arrived as a bare ident; tokenising it dedicatedly
281            // keeps the dispatch tree small.
282            Token::Drop => {
283                self.advance();
284                match self.peek() {
285                    Token::Publication => {
286                        self.advance();
287                        let name = self.expect_ident_or_string()?;
288                        Ok(Statement::DropPublication(name))
289                    }
290                    Token::Subscription => {
291                        self.advance();
292                        let name = self.expect_ident_or_string()?;
293                        Ok(Statement::DropSubscription(name))
294                    }
295                    Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("user") => {
296                        self.advance();
297                        let name = self.expect_ident_or_string()?;
298                        Ok(Statement::DropUser(name))
299                    }
300                    other => Err(self.err(format!(
301                        "expected USER / PUBLICATION / SUBSCRIPTION after DROP, got {other:?}"
302                    ))),
303                }
304            }
305            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("update") => {
306                self.advance();
307                self.parse_update_after_keyword()
308            }
309            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("delete") => {
310                self.advance();
311                self.parse_delete_after_keyword()
312            }
313            // v6.0.4: ALTER INDEX <name> REBUILD [WITH (encoding = ...)].
314            // ALTER is not a reserved keyword in the lexer — handled
315            // as a bare ident here.
316            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("alter") => {
317                self.advance();
318                self.parse_alter_after_keyword()
319            }
320            // v6.1.7: WAIT FOR WAL POSITION <pos> [WITH TIMEOUT <ms>].
321            // WAIT / POSITION / TIMEOUT are bare idents — no lexer
322            // additions needed.
323            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("wait") => {
324                self.advance();
325                self.parse_wait_after_keyword()
326            }
327            // v6.2.0: ANALYZE [<table>]. ANALYZE is a bare ident.
328            // Bare ANALYZE → analyse every user table; ANALYZE
329            // <name> → re-stats one. The argument is an optional
330            // ident (or quoted ident); anything else is a parse
331            // error.
332            // v6.7.3 — `COMPACT COLD SEGMENTS`. No arguments, no
333            // `WHERE` filter (carved out per V6_7_DESIGN.md
334            // STABILITY). Lex order: identifier "compact" → "cold"
335            // → "segments". Anything else after `COMPACT` is a
336            // parse error.
337            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("compact") => {
338                self.advance();
339                let next = self.peek().clone();
340                let cold = match next {
341                    Token::Ident(s) | Token::QuotedIdent(s) => s,
342                    _ => {
343                        return Err(
344                            self.err(format!("expected COLD after COMPACT, got {:?}", self.peek()))
345                        );
346                    }
347                };
348                if !cold.eq_ignore_ascii_case("cold") {
349                    return Err(self.err(format!("expected COLD after COMPACT, got {cold:?}")));
350                }
351                self.advance();
352                let next = self.peek().clone();
353                let segments = match next {
354                    Token::Ident(s) | Token::QuotedIdent(s) => s,
355                    _ => {
356                        return Err(self.err(format!(
357                            "expected SEGMENTS after COMPACT COLD, got {:?}",
358                            self.peek()
359                        )));
360                    }
361                };
362                if !segments.eq_ignore_ascii_case("segments") {
363                    return Err(self.err(format!(
364                        "expected SEGMENTS after COMPACT COLD, got {segments:?}"
365                    )));
366                }
367                self.advance();
368                Ok(Statement::CompactColdSegments)
369            }
370            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("analyze") => {
371                self.advance();
372                let target = match self.peek() {
373                    Token::Eof | Token::Semicolon => None,
374                    Token::Ident(_) | Token::QuotedIdent(_) => {
375                        Some(self.expect_ident_like()?)
376                    }
377                    other => {
378                        return Err(self.err(format!(
379                            "expected table name or end of statement after ANALYZE, got {other:?}"
380                        )));
381                    }
382                };
383                Ok(Statement::Analyze(target))
384            }
385            other => Err(self.err(format!(
386                "expected SELECT / CREATE / DROP / INSERT / UPDATE / DELETE / ALTER / BEGIN / COMMIT / \
387                 ROLLBACK / SAVEPOINT / RELEASE / SHOW at start of statement, got {other:?}"
388            ))),
389        }
390    }
391
392    fn parse_create_stmt(&mut self) -> Result<Statement, ParseError> {
393        debug_assert!(matches!(self.peek(), Token::Create));
394        self.advance();
395        match self.peek() {
396            Token::Table => self.parse_create_table_stmt_after_create(),
397            Token::Index => self.parse_create_index_stmt_after_create(),
398            Token::Publication => {
399                self.advance();
400                self.parse_create_publication_after_keyword()
401            }
402            Token::Subscription => {
403                self.advance();
404                self.parse_create_subscription_after_keyword()
405            }
406            // v4.1: CREATE USER 'name' WITH PASSWORD 'pw' [ROLE 'role'].
407            // USER isn't a reserved keyword — we look for the bare
408            // identifier so the lexer doesn't have to grow a token.
409            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("user") => {
410                self.advance();
411                self.parse_create_user_after_keyword()
412            }
413            // v7.9.15 — `CREATE EXTENSION [IF NOT EXISTS] <name>
414            // [WITH SCHEMA …] [VERSION '…'] [CASCADE]` as a
415            // no-op. mailrs follow-up F3.
416            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("extension") => {
417                self.advance();
418                self.parse_create_extension_after_keyword()
419            }
420            other => Err(self.err(format!(
421                "expected TABLE / INDEX / USER / EXTENSION / PUBLICATION / SUBSCRIPTION after CREATE, got {other:?}"
422            ))),
423        }
424    }
425
426    /// v7.9.15 — accept and discard `CREATE EXTENSION` DDL.
427    /// SPG doesn't have a registry; pgvector / similar are
428    /// either builtin (VECTOR(N) ↔ pgvector) or n/a. Parsing
429    /// the syntax lets dual-target schemas keep the line.
430    fn parse_create_extension_after_keyword(&mut self) -> Result<Statement, ParseError> {
431        // Optional `IF NOT EXISTS`.
432        self.consume_if_not_exists();
433        let name = self.expect_ident_like()?;
434        // Drain optional WITH SCHEMA <ident> / VERSION '<v>' /
435        // CASCADE / FROM '<v>' clauses; we don't model them.
436        loop {
437            match self.peek() {
438                Token::Ident(s) if s.eq_ignore_ascii_case("with") => {
439                    self.advance();
440                    continue;
441                }
442                Token::Ident(s) if s.eq_ignore_ascii_case("schema") => {
443                    self.advance();
444                    let _ = self.expect_ident_like()?;
445                    continue;
446                }
447                Token::Ident(s) if s.eq_ignore_ascii_case("version") => {
448                    self.advance();
449                    // String or ident literal.
450                    let _ = self.advance();
451                    continue;
452                }
453                Token::Ident(s) if s.eq_ignore_ascii_case("from") => {
454                    self.advance();
455                    let _ = self.advance();
456                    continue;
457                }
458                Token::Ident(s) if s.eq_ignore_ascii_case("cascade") => {
459                    self.advance();
460                    continue;
461                }
462                _ => break,
463            }
464        }
465        Ok(Statement::CreateExtension(name))
466    }
467
468    /// v6.1.2 → v6.1.3 — `CREATE PUBLICATION <name>` body. Accepts:
469    ///   - (no clause) → implicit `FOR ALL TABLES`
470    ///   - `FOR ALL TABLES`
471    ///   - `FOR ALL TABLES EXCEPT t1, t2, …` (v6.1.3)
472    ///   - `FOR TABLE t1, t2, …` (v6.1.3) — `FOR TABLES …` also
473    ///     accepted (PG accepts both forms in PG 19).
474    fn parse_create_publication_after_keyword(&mut self) -> Result<Statement, ParseError> {
475        let name = self.expect_ident_or_string()?;
476        // Bare DDL maps to FOR ALL TABLES — matches the v6.1.2
477        // shape so existing publications keep parsing identically.
478        let scope = if matches!(self.peek(), Token::For) {
479            self.advance();
480            if matches!(self.peek(), Token::All) {
481                self.advance();
482                if !matches!(self.peek(), Token::Tables) {
483                    return Err(self.err(format!(
484                        "expected TABLES after FOR ALL, got {:?}",
485                        self.peek()
486                    )));
487                }
488                self.advance();
489                if matches!(self.peek(), Token::Except) {
490                    self.advance();
491                    let tables = self.parse_publication_table_list()?;
492                    PublicationScope::AllTablesExcept(tables)
493                } else {
494                    PublicationScope::AllTables
495                }
496            } else if matches!(self.peek(), Token::Table | Token::Tables) {
497                // PG 19 accepts both `FOR TABLE …` (singular) and
498                // `FOR TABLES …` (plural); SPG matches.
499                self.advance();
500                let tables = self.parse_publication_table_list()?;
501                PublicationScope::ForTables(tables)
502            } else {
503                return Err(self.err(format!(
504                    "expected ALL TABLES or TABLE <list> after FOR, got {:?}",
505                    self.peek()
506                )));
507            }
508        } else {
509            PublicationScope::AllTables
510        };
511        Ok(Statement::CreatePublication(CreatePublicationStatement {
512            name,
513            scope,
514        }))
515    }
516
517    /// v6.1.3 — Comma-separated identifier list for the publication
518    /// FOR-clause. Requires at least one entry; empty list is a
519    /// parse error (PG behaviour). Quoted idents are accepted; the
520    /// names round-trip through `Display` as `quote_ident(name)`.
521    fn parse_publication_table_list(&mut self) -> Result<Vec<String>, ParseError> {
522        let first = self.expect_ident_like()?;
523        let mut out = alloc::vec![first];
524        while matches!(self.peek(), Token::Comma) {
525            self.advance();
526            out.push(self.expect_ident_like()?);
527        }
528        Ok(out)
529    }
530
531    /// v6.1.4 — `CREATE SUBSCRIPTION <name>
532    ///                 CONNECTION '<conn>'
533    ///                 PUBLICATION <pub> [, <pub> ...]`.
534    ///
535    /// The clause order is fixed (CONNECTION first, then
536    /// PUBLICATION) to match PG. No WITH-options accepted in
537    /// v6.1.4 — `enabled` defaults to true, no other knobs ship.
538    fn parse_create_subscription_after_keyword(&mut self) -> Result<Statement, ParseError> {
539        let name = self.expect_ident_or_string()?;
540        if !matches!(self.peek(), Token::Connection) {
541            return Err(self.err(format!(
542                "expected CONNECTION after CREATE SUBSCRIPTION <name>, got {:?}",
543                self.peek()
544            )));
545        }
546        self.advance();
547        let conn_str = self.expect_string_literal()?;
548        if !matches!(self.peek(), Token::Publication) {
549            return Err(self.err(format!(
550                "expected PUBLICATION after CONNECTION '<conn>', got {:?}",
551                self.peek()
552            )));
553        }
554        self.advance();
555        // Reuse the publication FOR-list parser shape: at least one
556        // identifier, comma-separated.
557        let first = self.expect_ident_like()?;
558        let mut publications = alloc::vec![first];
559        while matches!(self.peek(), Token::Comma) {
560            self.advance();
561            publications.push(self.expect_ident_like()?);
562        }
563        Ok(Statement::CreateSubscription(
564            CreateSubscriptionStatement {
565                name,
566                conn_str,
567                publications,
568            },
569        ))
570    }
571
572    /// v6.1.7 — `WAIT FOR WAL POSITION <pos> [WITH TIMEOUT <ms>]`.
573    /// All keywords after `WAIT` are bare idents in v6.1.x; no
574    /// lexer churn. Both `<pos>` and `<ms>` are positive integers
575    /// that fit `u64`.
576    fn parse_wait_after_keyword(&mut self) -> Result<Statement, ParseError> {
577        // FOR is a v6.1.2-reserved keyword (Token::For). The
578        // other two are bare idents — they've never needed lexer
579        // support and we keep it that way.
580        if !matches!(self.peek(), Token::For) {
581            return Err(self.err(format!(
582                "expected FOR after WAIT, got {:?}",
583                self.peek()
584            )));
585        }
586        self.advance();
587        self.expect_keyword_ident("wal")?;
588        self.expect_keyword_ident("position")?;
589        let pos = self.expect_u64_literal()?;
590        let timeout_ms = if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("with"))
591        {
592            self.advance();
593            self.expect_keyword_ident("timeout")?;
594            Some(self.expect_u64_literal()?)
595        } else {
596            None
597        };
598        Ok(Statement::WaitForWalPosition { pos, timeout_ms })
599    }
600
601    /// v6.1.7 helper — consume a `Token::Integer` and check it
602    /// fits `u64`. WAL positions and millisecond timeouts are
603    /// non-negative.
604    fn expect_u64_literal(&mut self) -> Result<u64, ParseError> {
605        match self.advance() {
606            Token::Integer(n) if n >= 0 => Ok(n as u64),
607            Token::Integer(n) => Err(ParseError {
608                message: format!("expected non-negative integer, got {n}"),
609                token_pos: self.pos.saturating_sub(1),
610            }),
611            other => Err(ParseError {
612                message: format!("expected integer literal, got {other:?}"),
613                token_pos: self.pos.saturating_sub(1),
614            }),
615        }
616    }
617
618    /// `CREATE USER` body — name + WITH PASSWORD '<pw>' + optional
619    /// ROLE '<role>' (defaults to readonly). All string slots accept
620    /// either a quoted ident or a quoted string literal.
621    fn parse_create_user_after_keyword(&mut self) -> Result<Statement, ParseError> {
622        let name = self.expect_ident_or_string()?;
623        self.expect_keyword_ident("with")?;
624        self.expect_keyword_ident("password")?;
625        let password = self.expect_string_literal()?;
626        let role = if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
627            && s.eq_ignore_ascii_case("role")
628        {
629            self.advance();
630            self.expect_string_literal()?
631        } else {
632            "readonly".to_string()
633        };
634        Ok(Statement::CreateUser(crate::ast::CreateUserStatement {
635            name,
636            password,
637            role,
638        }))
639    }
640
641    /// v4.4 `UPDATE <table> SET col = expr [, col = expr]* [WHERE cond]`.
642    /// Caller already consumed the leading `UPDATE` ident.
643    fn parse_update_after_keyword(&mut self) -> Result<Statement, ParseError> {
644        let table = self.expect_ident_like()?;
645        self.expect_keyword_ident("set")?;
646        let mut assignments = Vec::new();
647        loop {
648            let col = self.expect_ident_like()?;
649            if !matches!(self.peek(), Token::Eq) {
650                return Err(self.err(format!(
651                    "expected `=` after column name in UPDATE SET, got {:?}",
652                    self.peek()
653                )));
654            }
655            self.advance();
656            let value = self.parse_expr(0)?;
657            assignments.push((col, value));
658            if matches!(self.peek(), Token::Comma) {
659                self.advance();
660                continue;
661            }
662            break;
663        }
664        let where_ = if matches!(self.peek(), Token::Where) {
665            self.advance();
666            Some(self.parse_expr(0)?)
667        } else {
668            None
669        };
670        let returning = self.parse_optional_returning()?;
671        Ok(Statement::Update(crate::ast::UpdateStatement {
672            table,
673            assignments,
674            where_,
675            returning,
676        }))
677    }
678
679    /// v4.4 `DELETE FROM <table> [WHERE cond]`. Caller already consumed
680    /// the leading `DELETE` ident.
681    fn parse_delete_after_keyword(&mut self) -> Result<Statement, ParseError> {
682        if !matches!(self.peek(), Token::From) {
683            return Err(self.err(format!("expected FROM after DELETE, got {:?}", self.peek())));
684        }
685        self.advance();
686        let table = self.expect_ident_like()?;
687        let where_ = if matches!(self.peek(), Token::Where) {
688            self.advance();
689            Some(self.parse_expr(0)?)
690        } else {
691            None
692        };
693        let returning = self.parse_optional_returning()?;
694        Ok(Statement::Delete(crate::ast::DeleteStatement {
695            table,
696            where_,
697            returning,
698        }))
699    }
700
701    /// v7.9.4 — parse the optional trailing `RETURNING <projection>`
702    /// clause on INSERT / UPDATE / DELETE. Same projection grammar
703    /// as SELECT, so `RETURNING *`, `RETURNING col`,
704    /// `RETURNING expr AS alias`, and `RETURNING a, b, c` all work.
705    fn parse_optional_returning(&mut self) -> Result<Option<Vec<crate::ast::SelectItem>>, ParseError> {
706        let is_returning_kw = matches!(
707            self.peek(),
708            Token::Ident(s) if s.eq_ignore_ascii_case("returning")
709        );
710        if !is_returning_kw {
711            return Ok(None);
712        }
713        self.advance();
714        let mut items = Vec::new();
715        loop {
716            items.push(self.parse_select_item()?);
717            if matches!(self.peek(), Token::Comma) {
718                self.advance();
719                continue;
720            }
721            break;
722        }
723        Ok(Some(items))
724    }
725
726    /// v6.0.4 — parse the tail of an ALTER statement after the
727    /// leading `ALTER` keyword has been consumed. Only one form is
728    /// supported in v6.0.4:
729    ///
730    /// ```text
731    /// ALTER INDEX <name> REBUILD [WITH (encoding = <enc>)]
732    /// ```
733    fn parse_alter_after_keyword(&mut self) -> Result<Statement, ParseError> {
734        // ALTER INDEX <name> ... | ALTER TABLE <name> SET hot_tier_bytes = <n>
735        match self.advance() {
736            Token::Index => {}
737            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("index") => {}
738            // v6.7.2 — ALTER TABLE t SET hot_tier_bytes = X
739            Token::Table => return self.parse_alter_table_after_keyword(),
740            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("table") => {
741                return self.parse_alter_table_after_keyword();
742            }
743            other => {
744                return Err(self.err(format!("expected INDEX or TABLE after ALTER, got {other:?}")));
745            }
746        }
747        let name = self.expect_ident_like()?;
748        // REBUILD
749        self.expect_keyword_ident("rebuild")?;
750        // Optional: WITH (encoding = <enc>)
751        let encoding = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("with")) {
752            self.advance();
753            if !matches!(self.peek(), Token::LParen) {
754                return Err(self.err(format!(
755                    "expected '(' after WITH in ALTER INDEX REBUILD, got {:?}",
756                    self.peek()
757                )));
758            }
759            self.advance();
760            self.expect_keyword_ident("encoding")?;
761            if !matches!(self.peek(), Token::Eq) {
762                return Err(self.err(format!(
763                    "expected '=' after encoding in ALTER INDEX REBUILD, got {:?}",
764                    self.peek()
765                )));
766            }
767            self.advance();
768            let enc_ident = match self.advance() {
769                Token::Ident(s) | Token::QuotedIdent(s) => s,
770                other => {
771                    return Err(self.err(format!("expected encoding name after =, got {other:?}")));
772                }
773            };
774            let enc = match enc_ident.to_ascii_lowercase().as_str() {
775                "f32" => VecEncoding::F32,
776                "sq8" => VecEncoding::Sq8,
777                "half" => VecEncoding::F16,
778                other => {
779                    return Err(self.err(format!(
780                        "unknown vector encoding {other:?} in ALTER INDEX REBUILD; supported: F32, SQ8, HALF"
781                    )));
782                }
783            };
784            if !matches!(self.peek(), Token::RParen) {
785                return Err(self.err(format!(
786                    "expected ')' after encoding value, got {:?}",
787                    self.peek()
788                )));
789            }
790            self.advance();
791            Some(enc)
792        } else {
793            None
794        };
795        Ok(Statement::AlterIndex(crate::ast::AlterIndexStatement {
796            name,
797            target: crate::ast::AlterIndexTarget::Rebuild { encoding },
798        }))
799    }
800
801    /// v6.7.2 — `ALTER TABLE <name> SET hot_tier_bytes = <n>`. The
802    /// only `SET` form currently supported; future v6.7.x can add
803    /// more SET subjects without changing the dispatch shape.
804    fn parse_alter_table_after_keyword(&mut self) -> Result<Statement, ParseError> {
805        let table_name = self.expect_ident_like()?;
806        // v7.6.8 — dispatch on the next keyword: SET / ADD / DROP.
807        // SET kept identical to v6.7.x. ADD / DROP CONSTRAINT routes
808        // to FK installation / removal.
809        match self.peek() {
810            Token::Ident(s) if s.eq_ignore_ascii_case("set") => {
811                self.advance();
812                let setting = self.expect_ident_like()?;
813                if !setting.eq_ignore_ascii_case("hot_tier_bytes") {
814                    return Err(self.err(alloc::format!(
815                        "ALTER TABLE SET: unknown setting {setting:?}; supported: hot_tier_bytes"
816                    )));
817                }
818                if !matches!(self.peek(), Token::Eq) {
819                    return Err(self.err(alloc::format!(
820                        "expected '=' after hot_tier_bytes, got {:?}",
821                        self.peek()
822                    )));
823                }
824                self.advance();
825                let n = self.expect_u64_literal()?;
826                Ok(Statement::AlterTable(crate::ast::AlterTableStatement {
827                    name: table_name,
828                    target: crate::ast::AlterTableTarget::SetHotTierBytes(n),
829                }))
830            }
831            Token::Ident(s) if s.eq_ignore_ascii_case("add") => {
832                self.advance();
833                // Optional `CONSTRAINT <name>` prefix, then the same
834                // FK clause shape as table-level CREATE TABLE FK.
835                let fk = self.parse_table_level_fk()?;
836                Ok(Statement::AlterTable(crate::ast::AlterTableStatement {
837                    name: table_name,
838                    target: crate::ast::AlterTableTarget::AddForeignKey(fk),
839                }))
840            }
841            Token::Drop => {
842                self.advance();
843                match self.advance() {
844                    Token::Ident(s) if s.eq_ignore_ascii_case("constraint") => {}
845                    other => {
846                        return Err(self.err(alloc::format!(
847                            "expected CONSTRAINT after DROP in ALTER TABLE, got {other:?}"
848                        )));
849                    }
850                }
851                let cname = self.expect_ident_like()?;
852                Ok(Statement::AlterTable(crate::ast::AlterTableStatement {
853                    name: table_name,
854                    target: crate::ast::AlterTableTarget::DropForeignKey(cname),
855                }))
856            }
857            other => Err(self.err(alloc::format!(
858                "expected SET / ADD / DROP in ALTER TABLE, got {other:?}"
859            ))),
860        }
861    }
862
863    /// Consume a bare ident if its lowercase matches `kw`, else err.
864    fn expect_keyword_ident(&mut self, kw: &str) -> Result<(), ParseError> {
865        match self.advance() {
866            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case(kw) => Ok(()),
867            other => Err(ParseError {
868                message: format!("expected {kw:?}, got {other:?}"),
869                token_pos: self.pos.saturating_sub(1),
870            }),
871        }
872    }
873
874    /// Accept either a quoted identifier (`"foo"`) or a quoted string
875    /// literal (`'foo'`) — same shape used by CREATE USER for the
876    /// username slot.
877    fn expect_ident_or_string(&mut self) -> Result<String, ParseError> {
878        match self.advance() {
879            Token::Ident(s) | Token::QuotedIdent(s) | Token::String(s) => Ok(s),
880            other => Err(ParseError {
881                message: format!("expected identifier or string, got {other:?}"),
882                token_pos: self.pos.saturating_sub(1),
883            }),
884        }
885    }
886
887    fn expect_string_literal(&mut self) -> Result<String, ParseError> {
888        match self.advance() {
889            Token::String(s) => Ok(s),
890            other => Err(ParseError {
891                message: format!("expected quoted string, got {other:?}"),
892                token_pos: self.pos.saturating_sub(1),
893            }),
894        }
895    }
896
897    fn parse_select_stmt(&mut self) -> Result<Statement, ParseError> {
898        // Caller dispatches on Token::Select; the inner helper handles
899        // the rest. ORDER BY / LIMIT bind at this top level; UNION peers
900        // get a fresh bare-select parse and may not have their own ORDER
901        // BY / LIMIT.
902        let mut head = self.parse_bare_select()?;
903        while matches!(self.peek(), Token::Union) {
904            self.advance();
905            let kind = if matches!(self.peek(), Token::All) {
906                self.advance();
907                UnionKind::All
908            } else {
909                UnionKind::Distinct
910            };
911            let peer = self.parse_bare_select()?;
912            head.unions.push((kind, peer));
913        }
914        head.order_by = if matches!(self.peek(), Token::Order) {
915            self.advance();
916            if !matches!(self.peek(), Token::By) {
917                return Err(self.err(format!("expected BY after ORDER, got {:?}", self.peek())));
918            }
919            self.advance();
920            // v6.4.0 — multi-key ORDER BY. Loop over comma-separated
921            // `<expr> [ASC|DESC]` items.
922            let mut keys = Vec::new();
923            loop {
924                let expr = self.parse_expr(0)?;
925                let desc = if matches!(self.peek(), Token::Desc) {
926                    self.advance();
927                    true
928                } else if matches!(self.peek(), Token::Asc) {
929                    self.advance();
930                    false
931                } else {
932                    false
933                };
934                keys.push(OrderBy { expr, desc });
935                if matches!(self.peek(), Token::Comma) {
936                    self.advance();
937                } else {
938                    break;
939                }
940            }
941            keys
942        } else {
943            Vec::new()
944        };
945        head.limit = if matches!(self.peek(), Token::Limit) {
946            self.advance();
947            let n = self.expect_u32_literal("LIMIT")?;
948            Some(n)
949        } else {
950            None
951        };
952        head.offset = if matches!(self.peek(), Token::Offset) {
953            self.advance();
954            let n = self.expect_u32_literal("OFFSET")?;
955            Some(n)
956        } else {
957            None
958        };
959        Ok(Statement::Select(head))
960    }
961
962    fn expect_u32_literal(&mut self, label: &str) -> Result<u32, ParseError> {
963        match self.advance() {
964            Token::Integer(n) if n >= 0 => u32::try_from(n).map_err(|_| ParseError {
965                message: format!("{label} value too large: {n}"),
966                token_pos: self.pos.saturating_sub(1),
967            }),
968            other => Err(ParseError {
969                message: format!("expected non-negative integer after {label}, got {other:?}"),
970                token_pos: self.pos.saturating_sub(1),
971            }),
972        }
973    }
974
975    /// Parse one SELECT block without ORDER BY / LIMIT / UNION chaining —
976    /// just `[DISTINCT] items [FROM] [WHERE] [GROUP BY]`. Returned with
977    /// `unions` empty and `order_by` / `limit` `None`; the top-level
978    /// `parse_select_stmt` is responsible for filling those in.
979    fn parse_bare_select(&mut self) -> Result<SelectStatement, ParseError> {
980        if !matches!(self.peek(), Token::Select) {
981            return Err(self.err(format!(
982                "expected SELECT to start a query block, got {:?}",
983                self.peek()
984            )));
985        }
986        self.advance();
987        let distinct = if matches!(self.peek(), Token::Distinct) {
988            self.advance();
989            true
990        } else {
991            false
992        };
993        let items = self.parse_select_list()?;
994        let from = if matches!(self.peek(), Token::From) {
995            self.advance();
996            Some(self.parse_from_clause()?)
997        } else {
998            None
999        };
1000        let where_ = if matches!(self.peek(), Token::Where) {
1001            self.advance();
1002            Some(self.parse_expr(0)?)
1003        } else {
1004            None
1005        };
1006        let mut group_by_all = false;
1007        let group_by = if matches!(self.peek(), Token::Group) {
1008            self.advance();
1009            if !matches!(self.peek(), Token::By) {
1010                return Err(self.err(format!("expected BY after GROUP, got {:?}", self.peek())));
1011            }
1012            self.advance();
1013            // v6.4.1 — `GROUP BY ALL` shortcut. Planner expands to
1014            // every non-aggregate SELECT-list item later.
1015            if matches!(self.peek(), Token::All) {
1016                self.advance();
1017                group_by_all = true;
1018                None
1019            } else {
1020                let mut groups = Vec::new();
1021                loop {
1022                    groups.push(self.parse_expr(0)?);
1023                    if matches!(self.peek(), Token::Comma) {
1024                        self.advance();
1025                    } else {
1026                        break;
1027                    }
1028                }
1029                Some(groups)
1030            }
1031        } else {
1032            None
1033        };
1034        let having = if matches!(self.peek(), Token::Having) {
1035            self.advance();
1036            Some(self.parse_expr(0)?)
1037        } else {
1038            None
1039        };
1040        Ok(SelectStatement {
1041            ctes: Vec::new(),
1042            distinct,
1043            items,
1044            from,
1045            where_,
1046            group_by,
1047            group_by_all,
1048            having,
1049            unions: Vec::new(),
1050            order_by: Vec::new(),
1051            limit: None,
1052            offset: None,
1053        })
1054    }
1055
1056    fn parse_create_table_stmt_after_create(&mut self) -> Result<Statement, ParseError> {
1057        // Caller already consumed CREATE; we're sitting on TABLE.
1058        debug_assert!(matches!(self.peek(), Token::Table));
1059        self.advance();
1060        let if_not_exists = self.consume_if_not_exists();
1061        let name = self.expect_ident_like()?;
1062        if !matches!(self.peek(), Token::LParen) {
1063            return Err(self.err(format!(
1064                "expected '(' after table name, got {:?}",
1065                self.peek()
1066            )));
1067        }
1068        self.advance();
1069        let mut columns = Vec::new();
1070        let mut foreign_keys: Vec<ForeignKeyConstraint> = Vec::new();
1071        let mut table_constraints: Vec<crate::ast::TableConstraint> = Vec::new();
1072        loop {
1073            // v7.6.0 / v7.9.18 — distinguish table-level constraint
1074            // clauses from column definitions. Constraints start
1075            // with `CONSTRAINT <name> …`, `FOREIGN KEY (…)`,
1076            // `PRIMARY KEY (…)`, or `UNIQUE (…)`. Anything else is
1077            // a column.
1078            if self.peek_table_level_pk_start() {
1079                table_constraints.push(self.parse_table_level_primary_key()?);
1080            } else if self.peek_table_level_unique_start() {
1081                table_constraints.push(self.parse_table_level_unique()?);
1082            } else if self.peek_constraint_or_fk_start() {
1083                foreign_keys.push(self.parse_table_level_fk()?);
1084            } else {
1085                let (col, col_level_fk) = self.parse_column_def_with_fk()?;
1086                columns.push(col);
1087                if let Some(fk) = col_level_fk {
1088                    foreign_keys.push(fk);
1089                }
1090            }
1091            match self.peek() {
1092                Token::Comma => {
1093                    self.advance();
1094                }
1095                Token::RParen => {
1096                    self.advance();
1097                    break;
1098                }
1099                other => {
1100                    return Err(
1101                        self.err(format!("expected ',' or ')' in column list, got {other:?}"))
1102                    );
1103                }
1104            }
1105        }
1106        if columns.is_empty() {
1107            return Err(self.err("CREATE TABLE requires at least one column".into()));
1108        }
1109        Ok(Statement::CreateTable(CreateTableStatement {
1110            name,
1111            columns,
1112            if_not_exists,
1113            foreign_keys,
1114            table_constraints,
1115        }))
1116    }
1117
1118    /// v7.9.18 — true when the next tokens are `PRIMARY KEY (…)`.
1119    /// PRIMARY and KEY are bare idents; we look-ahead 2 to be
1120    /// sure (otherwise a column literally named `primary` would
1121    /// be mistaken).
1122    fn peek_table_level_pk_start(&self) -> bool {
1123        let cur = self.peek();
1124        let nxt = self.tokens.get(self.pos + 1);
1125        let nxt2 = self.tokens.get(self.pos + 2);
1126        let is_primary = matches!(cur, Token::Ident(s) if s.eq_ignore_ascii_case("primary"));
1127        let is_key = matches!(nxt, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("key"));
1128        let is_lparen = matches!(nxt2, Some(Token::LParen));
1129        is_primary && is_key && is_lparen
1130    }
1131
1132    /// v7.9.18 — true when the next tokens are `UNIQUE (…)`.
1133    fn peek_table_level_unique_start(&self) -> bool {
1134        let cur = self.peek();
1135        let nxt = self.tokens.get(self.pos + 1);
1136        let is_unique = matches!(cur, Token::Ident(s) if s.eq_ignore_ascii_case("unique"));
1137        let is_lparen = matches!(nxt, Some(Token::LParen));
1138        is_unique && is_lparen
1139    }
1140
1141    fn parse_table_level_primary_key(
1142        &mut self,
1143    ) -> Result<crate::ast::TableConstraint, ParseError> {
1144        self.advance(); // PRIMARY
1145        self.advance(); // KEY
1146        let columns = self.parse_paren_ident_list("PRIMARY KEY")?;
1147        Ok(crate::ast::TableConstraint::PrimaryKey {
1148            name: None,
1149            columns,
1150        })
1151    }
1152
1153    fn parse_table_level_unique(
1154        &mut self,
1155    ) -> Result<crate::ast::TableConstraint, ParseError> {
1156        self.advance(); // UNIQUE
1157        let columns = self.parse_paren_ident_list("UNIQUE")?;
1158        Ok(crate::ast::TableConstraint::Unique {
1159            name: None,
1160            columns,
1161        })
1162    }
1163
1164    fn parse_paren_ident_list(
1165        &mut self,
1166        ctx: &str,
1167    ) -> Result<Vec<String>, ParseError> {
1168        if !matches!(self.peek(), Token::LParen) {
1169            return Err(self.err(alloc::format!(
1170                "expected '(' after {ctx}, got {:?}",
1171                self.peek()
1172            )));
1173        }
1174        self.advance();
1175        let mut out = Vec::new();
1176        loop {
1177            out.push(self.expect_ident_like()?);
1178            match self.peek() {
1179                Token::Comma => {
1180                    self.advance();
1181                }
1182                Token::RParen => {
1183                    self.advance();
1184                    break;
1185                }
1186                other => {
1187                    return Err(self.err(alloc::format!(
1188                        "expected ',' or ')' in {ctx} list, got {other:?}"
1189                    )));
1190                }
1191            }
1192        }
1193        if out.is_empty() {
1194            return Err(self.err(alloc::format!("{ctx} requires at least one column")));
1195        }
1196        Ok(out)
1197    }
1198
1199    /// v7.6.0 — true when the next tokens are `CONSTRAINT <name>
1200    /// FOREIGN KEY` or bare `FOREIGN KEY`. Both introduce a
1201    /// table-level FK; a column def never starts with either keyword
1202    /// (column names are not in this reserved set).
1203    fn peek_constraint_or_fk_start(&self) -> bool {
1204        let is_constraint_kw = matches!(
1205            self.peek(),
1206            Token::Ident(s) if s.eq_ignore_ascii_case("constraint")
1207        );
1208        let is_foreign_kw = matches!(
1209            self.peek(),
1210            Token::Ident(s) if s.eq_ignore_ascii_case("foreign")
1211        );
1212        is_constraint_kw || is_foreign_kw
1213    }
1214
1215    /// v7.6.0 — parse a table-level FK clause:
1216    /// `[CONSTRAINT <name>] FOREIGN KEY (<col>[,<col>]*) REFERENCES
1217    /// <tbl> [(<pcol>[,<pcol>]*)] [ON DELETE <action>] [ON UPDATE <action>]`.
1218    fn parse_table_level_fk(&mut self) -> Result<ForeignKeyConstraint, ParseError> {
1219        let mut name: Option<String> = None;
1220        if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("constraint")) {
1221            self.advance();
1222            name = Some(self.expect_ident_like()?);
1223        }
1224        // `FOREIGN`
1225        match self.advance() {
1226            Token::Ident(s) if s.eq_ignore_ascii_case("foreign") => {}
1227            other => return Err(self.err(format!("expected FOREIGN, got {other:?}"))),
1228        }
1229        // `KEY`
1230        match self.advance() {
1231            Token::Ident(s) if s.eq_ignore_ascii_case("key") => {}
1232            other => return Err(self.err(format!("expected KEY after FOREIGN, got {other:?}"))),
1233        }
1234        // `(col, col, ...)`
1235        if !matches!(self.peek(), Token::LParen) {
1236            return Err(self.err(format!("expected '(' after FOREIGN KEY, got {:?}", self.peek())));
1237        }
1238        self.advance();
1239        let mut columns = Vec::new();
1240        loop {
1241            columns.push(self.expect_ident_like()?);
1242            match self.peek() {
1243                Token::Comma => {
1244                    self.advance();
1245                }
1246                Token::RParen => {
1247                    self.advance();
1248                    break;
1249                }
1250                other => return Err(self.err(format!("expected ',' or ')' in FK column list, got {other:?}"))),
1251            }
1252        }
1253        if columns.is_empty() {
1254            return Err(self.err("FOREIGN KEY requires at least one column".into()));
1255        }
1256        let (parent_table, parent_columns, on_delete, on_update) =
1257            self.parse_references_tail(columns.len())?;
1258        Ok(ForeignKeyConstraint {
1259            name,
1260            columns,
1261            parent_table,
1262            parent_columns,
1263            on_delete,
1264            on_update,
1265        })
1266    }
1267
1268    /// v7.6.0 — parse the tail `REFERENCES <tbl> [(<pcol>...)] [ON
1269    /// DELETE <action>] [ON UPDATE <action>]`. `expected_arity` is
1270    /// the local column count, used to default the parent column
1271    /// list when omitted (SQL spec: parent's PK is implied).
1272    fn parse_references_tail(
1273        &mut self,
1274        expected_arity: usize,
1275    ) -> Result<(String, Vec<String>, FkAction, FkAction), ParseError> {
1276        match self.advance() {
1277            Token::Ident(s) if s.eq_ignore_ascii_case("references") => {}
1278            other => return Err(self.err(format!("expected REFERENCES, got {other:?}"))),
1279        }
1280        let parent_table = self.expect_ident_like()?;
1281        let mut parent_columns: Vec<String> = Vec::new();
1282        if matches!(self.peek(), Token::LParen) {
1283            self.advance();
1284            loop {
1285                parent_columns.push(self.expect_ident_like()?);
1286                match self.peek() {
1287                    Token::Comma => {
1288                        self.advance();
1289                    }
1290                    Token::RParen => {
1291                        self.advance();
1292                        break;
1293                    }
1294                    other => return Err(self.err(format!("expected ',' or ')' in REFERENCES column list, got {other:?}"))),
1295                }
1296            }
1297        }
1298        if !parent_columns.is_empty() && parent_columns.len() != expected_arity {
1299            return Err(self.err(format!(
1300                "FK arity mismatch: {} local column(s) vs {} parent column(s)",
1301                expected_arity,
1302                parent_columns.len()
1303            )));
1304        }
1305        // v7.6.7 — accept and reject `[NOT] DEFERRABLE [INITIALLY
1306        // {DEFERRED | IMMEDIATE}]` so existing PG dumps don't fail
1307        // at parse time. SPG's single-writer model has no deferred
1308        // constraint window, so we surface this as a clean
1309        // unsupported-feature error rather than a syntax error.
1310        loop {
1311            if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("deferrable")) {
1312                return Err(self.err(
1313                    "DEFERRABLE constraints are not supported (SPG is single-writer; \
1314                     constraints are always evaluated immediately at commit)"
1315                        .into(),
1316                ));
1317            }
1318            if matches!(self.peek(), Token::Not) {
1319                let look = self.tokens.get(self.pos + 1);
1320                if matches!(look, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("deferrable")) {
1321                    // NOT DEFERRABLE — accept as the SPG default
1322                    // and consume both tokens silently.
1323                    self.advance();
1324                    self.advance();
1325                    // Optional `INITIALLY IMMEDIATE` clause.
1326                    if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("initially"))
1327                    {
1328                        self.advance();
1329                        match self.advance() {
1330                            Token::Ident(s) if s.eq_ignore_ascii_case("immediate") => {}
1331                            other => {
1332                                return Err(self.err(format!(
1333                                    "expected IMMEDIATE after INITIALLY for NOT DEFERRABLE, \
1334                                     got {other:?}"
1335                                )));
1336                            }
1337                        }
1338                    }
1339                    continue;
1340                }
1341                break;
1342            }
1343            break;
1344        }
1345        // Optional `ON DELETE <action>` and `ON UPDATE <action>` in
1346        // either order, each at most once.
1347        let mut on_delete = FkAction::Restrict;
1348        let mut on_update = FkAction::Restrict;
1349        let mut seen_on_delete = false;
1350        let mut seen_on_update = false;
1351        loop {
1352            if !matches!(self.peek(), Token::On) {
1353                break;
1354            }
1355            self.advance();
1356            let which = self.advance();
1357            let action = self.parse_fk_action()?;
1358            match which {
1359                Token::Ident(ref s) if s.eq_ignore_ascii_case("delete") => {
1360                    if seen_on_delete {
1361                        return Err(self.err("ON DELETE specified twice".into()));
1362                    }
1363                    seen_on_delete = true;
1364                    on_delete = action;
1365                }
1366                Token::Ident(ref s) if s.eq_ignore_ascii_case("update") => {
1367                    if seen_on_update {
1368                        return Err(self.err("ON UPDATE specified twice".into()));
1369                    }
1370                    seen_on_update = true;
1371                    on_update = action;
1372                }
1373                other => {
1374                    return Err(self.err(format!(
1375                        "expected DELETE or UPDATE after ON, got {other:?}"
1376                    )));
1377                }
1378            }
1379        }
1380        Ok((parent_table, parent_columns, on_delete, on_update))
1381    }
1382
1383    /// v7.6.0 — parse `CASCADE | RESTRICT | SET NULL | SET DEFAULT |
1384    /// NO ACTION`.
1385    fn parse_fk_action(&mut self) -> Result<FkAction, ParseError> {
1386        match self.advance() {
1387            Token::Ident(s) if s.eq_ignore_ascii_case("cascade") => Ok(FkAction::Cascade),
1388            Token::Ident(s) if s.eq_ignore_ascii_case("restrict") => Ok(FkAction::Restrict),
1389            Token::Ident(s) if s.eq_ignore_ascii_case("set") => {
1390                match self.advance() {
1391                    Token::Null => Ok(FkAction::SetNull),
1392                    Token::Default => Ok(FkAction::SetDefault),
1393                    other => Err(self.err(format!(
1394                        "expected NULL or DEFAULT after SET in FK action, got {other:?}"
1395                    ))),
1396                }
1397            }
1398            Token::Ident(s) if s.eq_ignore_ascii_case("no") => {
1399                match self.advance() {
1400                    Token::Ident(s) if s.eq_ignore_ascii_case("action") => Ok(FkAction::NoAction),
1401                    other => Err(self.err(format!(
1402                        "expected ACTION after NO in FK action, got {other:?}"
1403                    ))),
1404                }
1405            }
1406            other => Err(self.err(format!(
1407                "expected CASCADE | RESTRICT | SET NULL | SET DEFAULT | NO ACTION, got {other:?}"
1408            ))),
1409        }
1410    }
1411
1412    /// Recognise the optional `IF NOT EXISTS` prefix shared by `CREATE
1413    /// TABLE` and `CREATE INDEX`. Returns `true` if consumed.
1414    fn consume_if_not_exists(&mut self) -> bool {
1415        // `IF` arrives as a bare Ident (we don't reserve it because it
1416        // also appears mid-expression in PG, though we don't support
1417        // those forms yet).
1418        let looks_like_if = matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("if"));
1419        if !looks_like_if {
1420            return false;
1421        }
1422        // Peek one ahead before committing: only consume IF when it's
1423        // actually `IF NOT EXISTS`.
1424        if !matches!(self.tokens.get(self.pos + 1), Some(Token::Not)) {
1425            return false;
1426        }
1427        if !matches!(
1428            self.tokens.get(self.pos + 2),
1429            Some(Token::Ident(s)) if s.eq_ignore_ascii_case("exists")
1430        ) {
1431            return false;
1432        }
1433        self.advance(); // IF
1434        self.advance(); // NOT
1435        self.advance(); // EXISTS
1436        true
1437    }
1438
1439    /// v7.9.14 — consume `ASC | DESC | NULLS FIRST | NULLS LAST`
1440    /// qualifiers after an index column ref. ASC / DESC are
1441    /// reserved tokens; NULLS / FIRST / LAST are bare idents.
1442    /// We accept and discard them since single-column BTree
1443    /// stores rows in natural key order today.
1444    fn consume_optional_index_column_qualifiers(&mut self) {
1445        loop {
1446            match self.peek() {
1447                Token::Asc | Token::Desc => {
1448                    self.advance();
1449                }
1450                Token::Ident(s) if s.eq_ignore_ascii_case("nulls") => {
1451                    let look = self.tokens.get(self.pos + 1);
1452                    if matches!(
1453                        look,
1454                        Some(Token::Ident(k)) if k.eq_ignore_ascii_case("first")
1455                            || k.eq_ignore_ascii_case("last")
1456                    ) {
1457                        self.advance();
1458                        self.advance();
1459                    } else {
1460                        break;
1461                    }
1462                }
1463                _ => break,
1464            }
1465        }
1466    }
1467
1468    fn parse_create_index_stmt_after_create(&mut self) -> Result<Statement, ParseError> {
1469        // Caller consumed CREATE; we're on INDEX.
1470        debug_assert!(matches!(self.peek(), Token::Index));
1471        self.advance();
1472        let if_not_exists = self.consume_if_not_exists();
1473        let name = self.expect_ident_like()?;
1474        if !matches!(self.peek(), Token::On) {
1475            return Err(self.err(format!(
1476                "expected ON after CREATE INDEX <name>, got {:?}",
1477                self.peek()
1478            )));
1479        }
1480        self.advance();
1481        let table = self.expect_ident_like()?;
1482        // Optional `USING <method>` — only recognised method in v2.0 is
1483        // `hnsw` (a single-layer NSW graph for kNN). `USING` is the bare
1484        // ident `using` (we don't promote it to a reserved keyword
1485        // because it isn't reserved anywhere else in our SQL surface).
1486        let method = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("using")) {
1487            self.advance();
1488            let m = self.expect_ident_like()?;
1489            match m.to_ascii_lowercase().as_str() {
1490                "hnsw" => IndexMethod::Hnsw,
1491                "btree" => IndexMethod::BTree,
1492                "brin" => IndexMethod::Brin,
1493                other => {
1494                    return Err(self.err(alloc::format!(
1495                        "unknown index method {other:?}; supported: hnsw, btree, brin"
1496                    )));
1497                }
1498            }
1499        } else {
1500            IndexMethod::BTree
1501        };
1502        if !matches!(self.peek(), Token::LParen) {
1503            return Err(self.err(format!(
1504                "expected '(' before indexed column, got {:?}",
1505                self.peek()
1506            )));
1507        }
1508        self.advance();
1509        // v6.8.2 — accept either a bare column ident (legacy) or
1510        // an expression `fn(col, …)` for expression indexes.
1511        // Distinguish by peeking the token *after* the current
1512        // ident: `ident )` is the legacy column-only path;
1513        // anything else triggers the Pratt expression parser.
1514        // (`advance()` uses `mem::replace` to nil out the current
1515        // slot, so we can't save+rewind cleanly — peek-ahead via
1516        // direct index avoids the mutation.)
1517        let (column, expression): (String, Option<Expr>) = match self.peek().clone() {
1518            // Single column with `)` immediately after — fast path.
1519            Token::Ident(s) | Token::QuotedIdent(s)
1520                if matches!(self.tokens.get(self.pos + 1), Some(Token::RParen)) =>
1521            {
1522                self.advance();
1523                (s, None)
1524            }
1525            // v7.9.22 — single column followed by a pgvector
1526            // opclass ident: `(col vector_cosine_ops)`. mailrs G5.
1527            // SPG's HNSW currently picks its distance metric from
1528            // the query's operator (`<->` / `<#>` / `<=>`), so the
1529            // opclass is informational — accepted and discarded.
1530            // Recognised opclasses: vector_cosine_ops, vector_l2_ops,
1531            // vector_ip_ops, halfvec_*_ops, sq8_*_ops.
1532            Token::Ident(s) | Token::QuotedIdent(s)
1533                if matches!(
1534                    self.tokens.get(self.pos + 1),
1535                    Some(Token::Ident(op) | Token::QuotedIdent(op))
1536                        if is_vector_opclass_name(op)
1537                ) =>
1538            {
1539                self.advance(); // column name
1540                self.advance(); // opclass ident — drop
1541                (s, None)
1542            }
1543            Token::Ident(_) | Token::QuotedIdent(_) => {
1544                let key_expr = self.parse_expr(0)?;
1545                let primary = extract_first_column(&key_expr).ok_or_else(|| {
1546                    self.err(
1547                        "expression index key must reference at least one column".into(),
1548                    )
1549                })?;
1550                (primary, Some(key_expr))
1551            }
1552            other => {
1553                return Err(self.err(format!(
1554                    "expected column ident or expression, got {other:?}"
1555                )));
1556            }
1557        };
1558        // v7.9.14 — accept extra comma-separated columns inside
1559        // the index key parens (`CREATE INDEX … (a, b, c)`).
1560        // mailrs F2. Each extra column may carry an optional
1561        // `ASC` / `DESC` / `NULLS FIRST` / `NULLS LAST` clause
1562        // — parsed and discarded; SPG doesn't honour direction
1563        // on a BTree index today (column ordering is intrinsic
1564        // to the storage). v7.10 will widen to genuine composite
1565        // index keys.
1566        let mut extra_columns: Vec<String> = Vec::new();
1567        // The leading column may also have ASC/DESC after it.
1568        self.consume_optional_index_column_qualifiers();
1569        while matches!(self.peek(), Token::Comma) {
1570            self.advance();
1571            let extra = self.expect_ident_like()?;
1572            self.consume_optional_index_column_qualifiers();
1573            extra_columns.push(extra);
1574        }
1575        if !matches!(self.peek(), Token::RParen) {
1576            return Err(self.err(format!(
1577                "expected ')' after indexed column / expression, got {:?}",
1578                self.peek()
1579            )));
1580        }
1581        self.advance();
1582        // v6.8.0 — optional `INCLUDE (col1, col2, …)` clause for
1583        // index-only-scan annotation. Bare ident (not a reserved
1584        // keyword) so we test by case-insensitive string match.
1585        let included_columns =
1586            if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("include")) {
1587                self.advance();
1588                if !matches!(self.peek(), Token::LParen) {
1589                    return Err(self.err(format!(
1590                        "expected '(' after INCLUDE, got {:?}",
1591                        self.peek()
1592                    )));
1593                }
1594                self.advance();
1595                let mut cols = Vec::new();
1596                loop {
1597                    cols.push(self.expect_ident_like()?);
1598                    match self.peek() {
1599                        Token::Comma => {
1600                            self.advance();
1601                        }
1602                        Token::RParen => {
1603                            self.advance();
1604                            break;
1605                        }
1606                        other => {
1607                            return Err(self.err(format!(
1608                                "expected ',' or ')' in INCLUDE list, got {other:?}"
1609                            )));
1610                        }
1611                    }
1612                }
1613                cols
1614            } else {
1615                Vec::new()
1616            };
1617        // v6.8.1 — optional `WHERE <expr>` partial-index predicate.
1618        let partial_predicate = if matches!(self.peek(), Token::Where) {
1619            self.advance();
1620            Some(self.parse_expr(0)?)
1621        } else {
1622            None
1623        };
1624        Ok(Statement::CreateIndex(CreateIndexStatement {
1625            name,
1626            table,
1627            column,
1628            method,
1629            if_not_exists,
1630            included_columns,
1631            partial_predicate,
1632            extra_columns: extra_columns.clone(),
1633            expression,
1634        }))
1635    }
1636
1637    /// v7.6.0 — wraps `parse_column_def` and consumes an optional
1638    /// column-level `REFERENCES ...` clause. The trailing FK is
1639    /// normalised into table-level shape (single-element columns +
1640    /// parent_columns) so the engine sees one uniform constraint list.
1641    fn parse_column_def_with_fk(
1642        &mut self,
1643    ) -> Result<(ColumnDef, Option<ForeignKeyConstraint>), ParseError> {
1644        let col = self.parse_column_def()?;
1645        // Inline form: `col INT REFERENCES tbl(pcol) [ON DELETE ...] [ON UPDATE ...]`.
1646        let inline_references = matches!(
1647            self.peek(),
1648            Token::Ident(s) if s.eq_ignore_ascii_case("references")
1649        );
1650        if !inline_references {
1651            return Ok((col, None));
1652        }
1653        let (parent_table, parent_columns, on_delete, on_update) =
1654            self.parse_references_tail(1)?;
1655        let fk = ForeignKeyConstraint {
1656            name: None,
1657            columns: vec![col.name.clone()],
1658            parent_table,
1659            parent_columns,
1660            on_delete,
1661            on_update,
1662        };
1663        Ok((col, Some(fk)))
1664    }
1665
1666    fn parse_column_def(&mut self) -> Result<ColumnDef, ParseError> {
1667        let name = self.expect_ident_like()?;
1668        // Type keyword arrives as a bare Ident (we did not promote type names
1669        // to keyword tokens — see lexer rationale).
1670        let ty_ident = match self.advance() {
1671            Token::Ident(s) => s,
1672            other => {
1673                return Err(ParseError {
1674                    message: format!("expected column type, got {other:?}"),
1675                    token_pos: self.pos.saturating_sub(1),
1676                });
1677            }
1678        };
1679        // v7.9.6 — PG `SERIAL` / `BIGSERIAL` shorthand for
1680        // `INT/BIGINT NOT NULL AUTO_INCREMENT`. PG also defines
1681        // SMALLSERIAL → SMALLINT; we accept that too. The implicit
1682        // NOT NULL + AUTO_INCREMENT flags get baked in after the
1683        // type tag so the rest of the constraint-loop parser sees
1684        // them as if user-supplied (rejecting duplicates).
1685        let mut implied_auto_increment = false;
1686        let mut implied_not_null = false;
1687        let ty = match ty_ident.as_str() {
1688            // PG SERIAL family. Implies NOT NULL + AUTO_INCREMENT.
1689            "smallserial" | "serial2" => {
1690                implied_auto_increment = true;
1691                implied_not_null = true;
1692                ColumnTypeName::SmallInt
1693            }
1694            "serial" | "serial4" => {
1695                implied_auto_increment = true;
1696                implied_not_null = true;
1697                ColumnTypeName::Int
1698            }
1699            "bigserial" | "serial8" => {
1700                implied_auto_increment = true;
1701                implied_not_null = true;
1702                ColumnTypeName::BigInt
1703            }
1704            // MySQL flavours we accept by aliasing to the closest SPG
1705            // type. TINYINT covers MySQL's i8 — held inside SMALLINT
1706            // since SPG doesn't have a dedicated i8. MEDIUMINT (MySQL
1707            // 24-bit) → INT. UNSIGNED modifiers are consumed below
1708            // without semantic effect.
1709            "smallint" | "tinyint" => ColumnTypeName::SmallInt,
1710            // INTEGER is MySQL's spelling for INT; MEDIUMINT widens up.
1711            "int" | "integer" | "mediumint" => ColumnTypeName::Int,
1712            "bigint" => ColumnTypeName::BigInt,
1713            // DOUBLE / REAL are 64-bit IEEE — same as our FLOAT.
1714            "float" | "double" | "real" => ColumnTypeName::Float,
1715            "text" => ColumnTypeName::Text,
1716            "bool" | "boolean" => ColumnTypeName::Bool,
1717            "varchar" => ColumnTypeName::Varchar(self.parse_paren_size("VARCHAR")?),
1718            "char" => ColumnTypeName::Char(self.parse_paren_size("CHAR")?),
1719            "vector" => {
1720                let dim = self.parse_paren_size("VECTOR")?;
1721                let encoding = self.parse_optional_vector_encoding()?;
1722                ColumnTypeName::Vector { dim, encoding }
1723            }
1724            "numeric" => {
1725                let (precision, scale) = self.parse_optional_numeric_params()?;
1726                ColumnTypeName::Numeric(precision, scale)
1727            }
1728            "date" => ColumnTypeName::Date,
1729            // MySQL's `DATETIME` is the same domain as standard
1730            // `TIMESTAMP` — accept both spellings.
1731            "timestamp" | "datetime" => ColumnTypeName::Timestamp,
1732            // v7.9.2 — `TIMESTAMPTZ` and full PG spelling
1733            // `TIMESTAMP WITH TIME ZONE`. Same storage as TIMESTAMP;
1734            // only PG-wire OID differs.
1735            "timestamptz" => ColumnTypeName::Timestamptz,
1736            // v4.9: JSON / JSONB. Stored as raw text — no parse-time
1737            // validation. We accept the JSONB spelling too because
1738            // most PG clients default to it; SPG doesn't distinguish
1739            // the two (no path-operator perf advantage to model).
1740            "json" => ColumnTypeName::Json,
1741            "jsonb" => ColumnTypeName::Jsonb,
1742            other => {
1743                return Err(ParseError {
1744                    message: format!("unsupported column type {other:?}"),
1745                    token_pos: self.pos.saturating_sub(1),
1746                });
1747            }
1748        };
1749        // MySQL's `UNSIGNED` modifier sits right after the type
1750        // keyword. SPG doesn't carry a separate unsigned variant —
1751        // accepting the keyword keeps existing schemas compatible
1752        // without changing semantics. Drop it silently.
1753        if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("unsigned")) {
1754            self.advance();
1755        }
1756        // Column constraints: `DEFAULT <expr>`, `NOT NULL`, and the
1757        // MySQL-flavoured `AUTO_INCREMENT` may appear in any order;
1758        // each at most once.
1759        let mut default: Option<Expr> = None;
1760        let mut nullable = !implied_not_null;
1761        let mut nullability_seen = implied_not_null;
1762        let mut auto_increment = implied_auto_increment;
1763        let mut is_primary_key = false;
1764        loop {
1765            if matches!(self.peek(), Token::Default) {
1766                if default.is_some() {
1767                    return Err(self.err("DEFAULT specified twice".into()));
1768                }
1769                self.advance();
1770                default = Some(self.parse_expr(0)?);
1771                continue;
1772            }
1773            if matches!(self.peek(), Token::Not) {
1774                if nullability_seen {
1775                    return Err(self.err("NOT NULL specified twice".into()));
1776                }
1777                self.advance();
1778                if !matches!(self.peek(), Token::Null) {
1779                    return Err(self.err(format!(
1780                        "expected NULL after NOT in column def, got {:?}",
1781                        self.peek()
1782                    )));
1783                }
1784                self.advance();
1785                nullable = false;
1786                nullability_seen = true;
1787                continue;
1788            }
1789            // `AUTO_INCREMENT` or its abbreviated form `AUTOINCREMENT`
1790            // arrives as a bare Ident. Match either, case-insensitive.
1791            if let Token::Ident(s) = self.peek()
1792                && (s.eq_ignore_ascii_case("auto_increment")
1793                    || s.eq_ignore_ascii_case("autoincrement"))
1794            {
1795                if auto_increment {
1796                    return Err(self.err("AUTO_INCREMENT specified twice".into()));
1797                }
1798                self.advance();
1799                auto_increment = true;
1800                continue;
1801            }
1802            // v7.9.13 — inline `PRIMARY KEY` column constraint
1803            // (mailrs F1). Implies `NOT NULL`. The engine creates
1804            // a BTree index for the PK column at CREATE TABLE time
1805            // so FK parent-side index lookups resolve.
1806            if let Token::Ident(s) = self.peek()
1807                && s.eq_ignore_ascii_case("primary")
1808            {
1809                if is_primary_key {
1810                    return Err(self.err("PRIMARY KEY specified twice".into()));
1811                }
1812                // Peek-ahead for the required `KEY` token.
1813                let next = self.tokens.get(self.pos + 1);
1814                let next_is_key = matches!(
1815                    next,
1816                    Some(Token::Ident(k)) if k.eq_ignore_ascii_case("key")
1817                );
1818                if !next_is_key {
1819                    return Err(self.err(format!(
1820                        "expected KEY after PRIMARY in column def, got {:?}",
1821                        next
1822                    )));
1823                }
1824                self.advance(); // PRIMARY
1825                self.advance(); // KEY
1826                is_primary_key = true;
1827                if nullability_seen && nullable {
1828                    return Err(self.err(
1829                        "column declared NULL but inline PRIMARY KEY implies NOT NULL".into(),
1830                    ));
1831                }
1832                nullable = false;
1833                nullability_seen = true;
1834                continue;
1835            }
1836            break;
1837        }
1838        Ok(ColumnDef {
1839            name,
1840            ty,
1841            nullable,
1842            default,
1843            auto_increment,
1844            is_primary_key,
1845        })
1846    }
1847
1848    /// `NUMERIC` may appear without parameters, with one (precision
1849    /// only, scale=0), or with both. Returns `(precision, scale)` with
1850    /// 0 = unspecified for the bare form.
1851    fn parse_optional_numeric_params(&mut self) -> Result<(u8, u8), ParseError> {
1852        if !matches!(self.peek(), Token::LParen) {
1853            // Bare `NUMERIC` — PG treats this as "unlimited precision";
1854            // we surface it as precision=0 to mean "unconstrained" so
1855            // the engine doesn't need a separate variant.
1856            return Ok((0, 0));
1857        }
1858        self.advance();
1859        let precision = match self.advance() {
1860            Token::Integer(n) if (1..=38).contains(&n) => u8::try_from(n).expect("range-checked"),
1861            other => {
1862                return Err(ParseError {
1863                    message: format!(
1864                        "NUMERIC precision must be an integer in 1..=38, got {other:?}"
1865                    ),
1866                    token_pos: self.pos.saturating_sub(1),
1867                });
1868            }
1869        };
1870        let scale = if matches!(self.peek(), Token::Comma) {
1871            self.advance();
1872            match self.advance() {
1873                Token::Integer(n) if (0..=i64::from(precision)).contains(&n) => {
1874                    u8::try_from(n).expect("range-checked")
1875                }
1876                other => {
1877                    return Err(ParseError {
1878                        message: format!(
1879                            "NUMERIC scale must be a non-negative integer ≤ precision, got {other:?}"
1880                        ),
1881                        token_pos: self.pos.saturating_sub(1),
1882                    });
1883                }
1884            }
1885        } else {
1886            0
1887        };
1888        if !matches!(self.peek(), Token::RParen) {
1889            return Err(self.err(format!(
1890                "expected ')' to close NUMERIC params, got {:?}",
1891                self.peek()
1892            )));
1893        }
1894        self.advance();
1895        Ok((precision, scale))
1896    }
1897
1898    /// Parse `(N)` where `N` is a positive integer literal — used by the
1899    /// `VARCHAR`/`CHAR`/`VECTOR` column types. `label` is the type name
1900    /// for the error message.
1901    /// v6.0.1: parse the optional `USING <encoding>` clause that
1902    /// follows `VECTOR(N)` in a column definition. Missing clause
1903    /// → `VecEncoding::F32` (pre-v6 default). Unknown encoding
1904    /// ident → `ParseError` listing the encodings recognised today.
1905    fn parse_optional_vector_encoding(&mut self) -> Result<VecEncoding, ParseError> {
1906        if !matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("using")) {
1907            return Ok(VecEncoding::F32);
1908        }
1909        self.advance();
1910        let enc_ident = match self.advance() {
1911            Token::Ident(s) => s,
1912            other => {
1913                return Err(self.err(format!(
1914                    "expected vector encoding after USING, got {other:?}"
1915                )));
1916            }
1917        };
1918        match enc_ident.to_ascii_lowercase().as_str() {
1919            "sq8" => Ok(VecEncoding::Sq8),
1920            // v6.0.3: `HALF` (pgvector convention) selects IEEE-754
1921            // binary16 per-element storage.
1922            "half" => Ok(VecEncoding::F16),
1923            other => Err(self.err(format!(
1924                "unknown vector encoding {other:?}; supported: SQ8, HALF"
1925            ))),
1926        }
1927    }
1928
1929    fn parse_paren_size(&mut self, label: &str) -> Result<u32, ParseError> {
1930        if !matches!(self.peek(), Token::LParen) {
1931            return Err(self.err(format!("{label} type requires (N), got {:?}", self.peek())));
1932        }
1933        self.advance();
1934        let n = match self.advance() {
1935            Token::Integer(n) if n > 0 => u32::try_from(n).map_err(|_| ParseError {
1936                message: format!("{label} size too large: {n}"),
1937                token_pos: self.pos.saturating_sub(1),
1938            })?,
1939            other => {
1940                return Err(ParseError {
1941                    message: format!("expected positive integer {label} size, got {other:?}"),
1942                    token_pos: self.pos.saturating_sub(1),
1943                });
1944            }
1945        };
1946        if !matches!(self.peek(), Token::RParen) {
1947            return Err(self.err(format!(
1948                "expected ')' after {label} size, got {:?}",
1949                self.peek()
1950            )));
1951        }
1952        self.advance();
1953        Ok(n)
1954    }
1955
1956    fn parse_insert_stmt(&mut self) -> Result<Statement, ParseError> {
1957        debug_assert!(matches!(self.peek(), Token::Insert));
1958        self.advance();
1959        if !matches!(self.peek(), Token::Into) {
1960            return Err(self.err(format!("expected INTO after INSERT, got {:?}", self.peek())));
1961        }
1962        self.advance();
1963        let table = self.expect_ident_like()?;
1964        // Optional column list — `INSERT INTO t (a, b) VALUES ...`.
1965        let columns = if matches!(self.peek(), Token::LParen) {
1966            self.advance();
1967            let mut names = Vec::new();
1968            loop {
1969                names.push(self.expect_ident_like()?);
1970                match self.peek() {
1971                    Token::Comma => {
1972                        self.advance();
1973                    }
1974                    Token::RParen => {
1975                        self.advance();
1976                        break;
1977                    }
1978                    other => {
1979                        return Err(self.err(format!(
1980                            "expected ',' or ')' in INSERT column list, got {other:?}"
1981                        )));
1982                    }
1983                }
1984            }
1985            Some(names)
1986        } else {
1987            None
1988        };
1989        if !matches!(self.peek(), Token::Values) {
1990            return Err(self.err(format!(
1991                "expected VALUES after table name, got {:?}",
1992                self.peek()
1993            )));
1994        }
1995        self.advance();
1996        if !matches!(self.peek(), Token::LParen) {
1997            return Err(self.err(format!("expected '(' after VALUES, got {:?}", self.peek())));
1998        }
1999        let mut rows = Vec::new();
2000        loop {
2001            // Each iteration consumes one `(expr, expr, …)` tuple.
2002            if !matches!(self.peek(), Token::LParen) {
2003                return Err(self.err(format!(
2004                    "expected '(' for next VALUES tuple, got {:?}",
2005                    self.peek()
2006                )));
2007            }
2008            self.advance();
2009            let mut tuple = Vec::new();
2010            loop {
2011                tuple.push(self.parse_expr(0)?);
2012                match self.peek() {
2013                    Token::Comma => {
2014                        self.advance();
2015                    }
2016                    Token::RParen => {
2017                        self.advance();
2018                        break;
2019                    }
2020                    other => {
2021                        return Err(self.err(format!(
2022                            "expected ',' or ')' in VALUES tuple, got {other:?}"
2023                        )));
2024                    }
2025                }
2026            }
2027            if tuple.is_empty() {
2028                return Err(self.err("INSERT VALUES tuple requires at least one value".into()));
2029            }
2030            rows.push(tuple);
2031            // Continue with comma-separated tuples.
2032            if matches!(self.peek(), Token::Comma) {
2033                self.advance();
2034            } else {
2035                break;
2036            }
2037        }
2038        let on_conflict = self.parse_optional_on_conflict()?;
2039        let returning = self.parse_optional_returning()?;
2040        Ok(Statement::Insert(InsertStatement {
2041            table,
2042            columns,
2043            rows,
2044            on_conflict,
2045            returning,
2046        }))
2047    }
2048
2049    /// v7.9.7 — parse the optional `ON CONFLICT (cols) DO …`
2050    /// clause sitting between the INSERT body and the trailing
2051    /// RETURNING. All keywords come in as bare idents; `ON` is
2052    /// a reserved Token though.
2053    fn parse_optional_on_conflict(
2054        &mut self,
2055    ) -> Result<Option<crate::ast::OnConflictClause>, ParseError> {
2056        if !matches!(self.peek(), Token::On) {
2057            return Ok(None);
2058        }
2059        // Peek further: we want exactly "ON CONFLICT ...". If the
2060        // next ident isn't "conflict", let some other parser handle.
2061        let next_is_conflict = matches!(
2062            self.tokens.get(self.pos + 1),
2063            Some(Token::Ident(s) | Token::QuotedIdent(s)) if s.eq_ignore_ascii_case("conflict")
2064        );
2065        if !next_is_conflict {
2066            return Ok(None);
2067        }
2068        self.advance(); // ON
2069        self.advance(); // CONFLICT
2070        // Optional `(col [, col]*)` target list.
2071        let mut target_columns: Vec<String> = Vec::new();
2072        if matches!(self.peek(), Token::LParen) {
2073            self.advance();
2074            loop {
2075                target_columns.push(self.expect_ident_like()?);
2076                match self.peek() {
2077                    Token::Comma => {
2078                        self.advance();
2079                    }
2080                    Token::RParen => {
2081                        self.advance();
2082                        break;
2083                    }
2084                    other => {
2085                        return Err(self.err(alloc::format!(
2086                            "expected ',' or ')' in ON CONFLICT target list, got {other:?}"
2087                        )));
2088                    }
2089                }
2090            }
2091        }
2092        // Required `DO`.
2093        match self.advance() {
2094            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("do") => {}
2095            other => {
2096                return Err(self.err(alloc::format!(
2097                    "expected DO after ON CONFLICT [(…)], got {other:?}"
2098                )));
2099            }
2100        }
2101        // Action: NOTHING | UPDATE SET …
2102        let action = match self.advance() {
2103            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("nothing") => {
2104                crate::ast::OnConflictAction::Nothing
2105            }
2106            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("update") => {
2107                self.parse_on_conflict_update_action()?
2108            }
2109            other => {
2110                return Err(self.err(alloc::format!(
2111                    "expected NOTHING or UPDATE after ON CONFLICT DO, got {other:?}"
2112                )));
2113            }
2114        };
2115        Ok(Some(crate::ast::OnConflictClause {
2116            target_columns,
2117            action,
2118        }))
2119    }
2120
2121    /// v7.9.7 — tail of `ON CONFLICT … DO UPDATE`: parse
2122    /// `SET col = expr [, …] [WHERE cond]`. Caller already
2123    /// consumed `UPDATE`.
2124    fn parse_on_conflict_update_action(
2125        &mut self,
2126    ) -> Result<crate::ast::OnConflictAction, ParseError> {
2127        // `SET`
2128        match self.advance() {
2129            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("set") => {}
2130            other => {
2131                return Err(self.err(alloc::format!(
2132                    "expected SET after ON CONFLICT DO UPDATE, got {other:?}"
2133                )));
2134            }
2135        }
2136        let mut assignments: Vec<(String, Expr)> = Vec::new();
2137        loop {
2138            let col = self.expect_ident_like()?;
2139            if !matches!(self.peek(), Token::Eq) {
2140                return Err(self.err(alloc::format!(
2141                    "expected `=` after column in ON CONFLICT DO UPDATE SET, got {:?}",
2142                    self.peek()
2143                )));
2144            }
2145            self.advance();
2146            let value = self.parse_expr(0)?;
2147            assignments.push((col, value));
2148            if matches!(self.peek(), Token::Comma) {
2149                self.advance();
2150                continue;
2151            }
2152            break;
2153        }
2154        let where_ = if matches!(self.peek(), Token::Where) {
2155            self.advance();
2156            Some(self.parse_expr(0)?)
2157        } else {
2158            None
2159        };
2160        Ok(crate::ast::OnConflictAction::Update {
2161            assignments,
2162            where_,
2163        })
2164    }
2165
2166    fn parse_select_list(&mut self) -> Result<Vec<SelectItem>, ParseError> {
2167        let mut items = Vec::new();
2168        loop {
2169            items.push(self.parse_select_item()?);
2170            if matches!(self.peek(), Token::Comma) {
2171                self.advance();
2172            } else {
2173                break;
2174            }
2175        }
2176        Ok(items)
2177    }
2178
2179    fn parse_select_item(&mut self) -> Result<SelectItem, ParseError> {
2180        if matches!(self.peek(), Token::Star) {
2181            self.advance();
2182            return Ok(SelectItem::Wildcard);
2183        }
2184        let expr = self.parse_expr(0)?;
2185        let alias = self.parse_optional_alias();
2186        Ok(SelectItem::Expr { expr, alias })
2187    }
2188
2189    fn parse_table_ref(&mut self) -> Result<TableRef, ParseError> {
2190        let name = self.expect_ident_like()?;
2191        // v6.10.2 — optional `AS OF SEGMENT '<id>'` cold-tier
2192        // time-travel clause. Parse BEFORE the alias so the
2193        // alias can still ride at the tail (`tbl AS OF SEGMENT
2194        // '5' alias`). `AS` is a reserved keyword token, while
2195        // `OF` and `SEGMENT` are bare idents.
2196        let as_of_segment = if matches!(self.peek(), Token::As)
2197            && matches!(self.tokens.get(self.pos + 1), Some(Token::Ident(s) | Token::QuotedIdent(s)) if s.eq_ignore_ascii_case("of"))
2198        {
2199            self.advance(); // AS
2200            self.advance(); // OF
2201            let kw = match self.peek().clone() {
2202                Token::Ident(s) | Token::QuotedIdent(s) => s,
2203                other => {
2204                    return Err(self.err(format!(
2205                        "expected SEGMENT after AS OF, got {other:?}"
2206                    )));
2207                }
2208            };
2209            if !kw.eq_ignore_ascii_case("segment") {
2210                return Err(self.err(format!(
2211                    "expected SEGMENT after AS OF, got {kw:?}; v6.10.2 supports SEGMENT only"
2212                )));
2213            }
2214            self.advance();
2215            // Segment id literal — accept either a string or
2216            // integer for operator ergonomics.
2217            let id = match self.advance() {
2218                Token::String(s) => s
2219                    .parse::<u32>()
2220                    .map_err(|e| self.err(format!("AS OF SEGMENT id parse: {e}")))?,
2221                Token::Integer(n) => u32::try_from(n).map_err(|e| {
2222                    self.err(format!("AS OF SEGMENT id parse: {e}"))
2223                })?,
2224                other => {
2225                    return Err(self.err(format!(
2226                        "expected segment id literal after AS OF SEGMENT, got {other:?}"
2227                    )));
2228                }
2229            };
2230            Some(id)
2231        } else {
2232            None
2233        };
2234        let alias = self.parse_optional_alias();
2235        Ok(TableRef {
2236            name,
2237            alias,
2238            as_of_segment,
2239        })
2240    }
2241
2242    /// FROM-clause: a primary table reference plus zero-or-more joined
2243    /// peers expressed via either `, <table>` (cross-product, no ON) or
2244    /// `[INNER|LEFT [OUTER]|CROSS] JOIN <table> [ON expr]`. v1.10 keeps
2245    /// the join list flat (left-associative nested-loop semantics).
2246    fn parse_from_clause(&mut self) -> Result<FromClause, ParseError> {
2247        let primary = self.parse_table_ref()?;
2248        let mut joins = Vec::new();
2249        loop {
2250            // `, <table>` — cross-product with no ON.
2251            if matches!(self.peek(), Token::Comma) {
2252                self.advance();
2253                let table = self.parse_table_ref()?;
2254                joins.push(FromJoin {
2255                    kind: JoinKind::Cross,
2256                    table,
2257                    on: None,
2258                });
2259                continue;
2260            }
2261            // Explicit JOIN syntax. Accept INNER JOIN, LEFT [OUTER] JOIN,
2262            // CROSS JOIN, and bare JOIN (defaults to INNER).
2263            let kind =
2264                match self.peek() {
2265                    Token::Inner => {
2266                        self.advance();
2267                        if !matches!(self.peek(), Token::Join) {
2268                            return Err(self
2269                                .err(format!("expected JOIN after INNER, got {:?}", self.peek())));
2270                        }
2271                        self.advance();
2272                        JoinKind::Inner
2273                    }
2274                    Token::Left => {
2275                        self.advance();
2276                        if matches!(self.peek(), Token::Outer) {
2277                            self.advance();
2278                        }
2279                        if !matches!(self.peek(), Token::Join) {
2280                            return Err(self.err(format!(
2281                                "expected JOIN after LEFT [OUTER], got {:?}",
2282                                self.peek()
2283                            )));
2284                        }
2285                        self.advance();
2286                        JoinKind::Left
2287                    }
2288                    Token::Cross => {
2289                        self.advance();
2290                        if !matches!(self.peek(), Token::Join) {
2291                            return Err(self
2292                                .err(format!("expected JOIN after CROSS, got {:?}", self.peek())));
2293                        }
2294                        self.advance();
2295                        JoinKind::Cross
2296                    }
2297                    Token::Join => {
2298                        self.advance();
2299                        JoinKind::Inner
2300                    }
2301                    _ => break,
2302                };
2303            let table = self.parse_table_ref()?;
2304            let on = if matches!(self.peek(), Token::On) {
2305                self.advance();
2306                Some(self.parse_expr(0)?)
2307            } else if kind == JoinKind::Cross {
2308                None
2309            } else {
2310                return Err(self.err(format!(
2311                    "expected ON after {:?} JOIN, got {:?}",
2312                    kind,
2313                    self.peek()
2314                )));
2315            };
2316            joins.push(FromJoin { kind, table, on });
2317        }
2318        Ok(FromClause { primary, joins })
2319    }
2320
2321    /// Optional alias after an expression or table:
2322    /// `AS <ident>` is unambiguous; a bare `<ident>` directly after is also
2323    /// accepted (PG-style implicit alias). Returns `None` if the next token
2324    /// is not alias-shaped (e.g. comma, FROM, WHERE, semicolon, EOF, operator).
2325    fn parse_optional_alias(&mut self) -> Option<String> {
2326        if matches!(self.peek(), Token::As) {
2327            self.advance();
2328            // After AS, the next token MUST be an identifier-like — if not,
2329            // we still return None and let the caller surface the error on the
2330            // next expectation. v0.2 keeps the alias path forgiving; the
2331            // corpus tests don't exercise the malformed case.
2332            if let Token::Ident(_) | Token::QuotedIdent(_) = self.peek() {
2333                return self.expect_ident_like().ok();
2334            }
2335            return None;
2336        }
2337        if let Token::Ident(_) | Token::QuotedIdent(_) = self.peek() {
2338            return self.expect_ident_like().ok();
2339        }
2340        None
2341    }
2342
2343    /// Pratt loop. `min_prec` is the minimum binary-op precedence we'll accept.
2344    fn parse_expr(&mut self, min_prec: u8) -> Result<Expr, ParseError> {
2345        let mut lhs = self.parse_unary()?;
2346        while let Some((op, prec)) = binop_from(self.peek()) {
2347            if prec < min_prec {
2348                break;
2349            }
2350            self.advance();
2351            let rhs = self.parse_expr(prec + 1)?;
2352            lhs = Expr::Binary {
2353                lhs: Box::new(lhs),
2354                op,
2355                rhs: Box::new(rhs),
2356            };
2357        }
2358        Ok(lhs)
2359    }
2360
2361    fn parse_unary(&mut self) -> Result<Expr, ParseError> {
2362        match self.peek() {
2363            Token::Not => {
2364                self.advance();
2365                // NOT sits between AND (2) and comparisons (4) — bind everything
2366                // ≥3, which leaves AND/OR outside.
2367                let e = self.parse_expr(3)?;
2368                Ok(Expr::Unary {
2369                    op: UnOp::Not,
2370                    expr: Box::new(e),
2371                })
2372            }
2373            Token::Minus => {
2374                self.advance();
2375                // Unary minus binds tighter than `*`/`/` (now at prec 7 after
2376                // `<->` slotted into 5 and arithmetic shifted up).
2377                let e = self.parse_expr(8)?;
2378                Ok(Expr::Unary {
2379                    op: UnOp::Neg,
2380                    expr: Box::new(e),
2381                })
2382            }
2383            _ => self.parse_atom(),
2384        }
2385    }
2386
2387    fn parse_atom(&mut self) -> Result<Expr, ParseError> {
2388        let tok_pos = self.pos;
2389        match self.advance() {
2390            Token::Integer(n) => Ok(Expr::Literal(Literal::Integer(n))),
2391            Token::Float(x) => Ok(Expr::Literal(Literal::Float(x))),
2392            Token::String(s) => Ok(Expr::Literal(Literal::String(s))),
2393            Token::True => Ok(Expr::Literal(Literal::Bool(true))),
2394            Token::False => Ok(Expr::Literal(Literal::Bool(false))),
2395            Token::Null => Ok(Expr::Literal(Literal::Null)),
2396            // v6.1.1 — `$N` placeholder. The actual Value lookup
2397            // happens in the engine eval path against the prepared-
2398            // statement bind buffer.
2399            Token::Placeholder(n) => Ok(Expr::Placeholder(n)),
2400            Token::LParen => {
2401                // v4.10: `(SELECT ...)` in expression position is a
2402                // scalar subquery; otherwise it's a parenthesised
2403                // expression. Peek for SELECT keyword to dispatch.
2404                if matches!(self.peek(), Token::Select) {
2405                    let inner = self.parse_select_stmt()?;
2406                    match self.advance() {
2407                        Token::RParen => {
2408                            let Statement::Select(s) = inner else {
2409                                unreachable!("parse_select_stmt returns Select")
2410                            };
2411                            Ok(Expr::ScalarSubquery(Box::new(s)))
2412                        }
2413                        other => Err(ParseError {
2414                            message: format!("expected ')' after scalar subquery, got {other:?}"),
2415                            token_pos: self.pos.saturating_sub(1),
2416                        }),
2417                    }
2418                } else {
2419                    let e = self.parse_expr(0)?;
2420                    match self.advance() {
2421                        Token::RParen => Ok(e),
2422                        other => Err(ParseError {
2423                            message: format!("expected ')', got {other:?}"),
2424                            token_pos: self.pos.saturating_sub(1),
2425                        }),
2426                    }
2427                }
2428            }
2429            Token::LBracket => self.parse_vector_literal_body(),
2430            Token::Extract => self.parse_extract_atom(),
2431            Token::Interval => self.parse_interval_atom(),
2432            // v4.10: EXISTS / NOT EXISTS. EXISTS isn't a reserved
2433            // token; we match on the bare ident. NOT is a token
2434            // (consumed in the comparison rung), but `EXISTS (...)`
2435            // at the top of an expression starts here.
2436            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("exists") => {
2437                self.parse_exists_atom(false)
2438            }
2439            Token::Ident(s) | Token::QuotedIdent(s) => self.finish_ident_atom(s),
2440            other => Err(ParseError {
2441                message: format!("unexpected token {other:?} in expression"),
2442                token_pos: tok_pos,
2443            }),
2444        }
2445        // After parsing the atom, fold any postfix `::vector` casts.
2446        .and_then(|atom| self.finish_postfix_casts(atom))
2447    }
2448
2449    /// Postfix operators on an atom: `::TYPE` cast and `IS [NOT] NULL`.
2450    /// Both bind tighter than any binary op.
2451    fn finish_postfix_casts(&mut self, mut expr: Expr) -> Result<Expr, ParseError> {
2452        loop {
2453            if matches!(self.peek(), Token::DoubleColon) {
2454                self.advance();
2455                let target = match self.advance() {
2456                    Token::Ident(s) => match s.as_str() {
2457                        "int" => CastTarget::Int,
2458                        "bigint" => CastTarget::BigInt,
2459                        "float" => CastTarget::Float,
2460                        "text" => CastTarget::Text,
2461                        "bool" => CastTarget::Bool,
2462                        "vector" => CastTarget::Vector,
2463                        "date" => CastTarget::Date,
2464                        "timestamp" | "datetime" => CastTarget::Timestamp,
2465                        other => {
2466                            return Err(ParseError {
2467                                message: format!("unsupported cast target `::{other}`"),
2468                                token_pos: self.pos.saturating_sub(1),
2469                            });
2470                        }
2471                    },
2472                    other => {
2473                        return Err(ParseError {
2474                            message: format!("expected type ident after `::`, got {other:?}"),
2475                            token_pos: self.pos.saturating_sub(1),
2476                        });
2477                    }
2478                };
2479                expr = Expr::Cast {
2480                    expr: Box::new(expr),
2481                    target,
2482                };
2483                continue;
2484            }
2485            if matches!(self.peek(), Token::Is) {
2486                self.advance();
2487                let negated = if matches!(self.peek(), Token::Not) {
2488                    self.advance();
2489                    true
2490                } else {
2491                    false
2492                };
2493                if !matches!(self.peek(), Token::Null) {
2494                    return Err(self.err(format!(
2495                        "expected NULL after IS{}, got {:?}",
2496                        if negated { " NOT" } else { "" },
2497                        self.peek()
2498                    )));
2499                }
2500                self.advance();
2501                expr = Expr::IsNull {
2502                    expr: Box::new(expr),
2503                    negated,
2504                };
2505                continue;
2506            }
2507            // `x [NOT] BETWEEN a AND b`, `x [NOT] IN (...)`, `x [NOT] LIKE p`.
2508            // Look one token ahead so a stray `NOT` not followed by any of
2509            // these flows through to the early return below untouched.
2510            let negated = if matches!(self.peek(), Token::Not) {
2511                let next = self.tokens.get(self.pos + 1);
2512                matches!(next, Some(Token::Between | Token::In | Token::Like))
2513            } else {
2514                false
2515            };
2516            if negated {
2517                self.advance();
2518            }
2519            if matches!(self.peek(), Token::Between) {
2520                expr = self.parse_between_tail(expr, negated)?;
2521                continue;
2522            }
2523            if matches!(self.peek(), Token::In) {
2524                expr = self.parse_in_tail(expr, negated)?;
2525                continue;
2526            }
2527            if matches!(self.peek(), Token::Like) {
2528                self.advance();
2529                // Pattern at the same precedence as other comparison RHSes —
2530                // 5 leaves AND/OR alone so `a LIKE 'x%' AND b` parses right.
2531                let pattern = self.parse_expr(5)?;
2532                expr = Expr::Like {
2533                    expr: Box::new(expr),
2534                    pattern: Box::new(pattern),
2535                    negated,
2536                };
2537                continue;
2538            }
2539            return Ok(expr);
2540        }
2541    }
2542
2543    /// `x BETWEEN low AND high`  →  `(x >= low) AND (x <= high)`, wrapped in
2544    /// `NOT` when `negated`. Bounds parse at precedence 5 so the trailing
2545    /// `AND` is not swallowed.
2546    fn parse_between_tail(&mut self, expr: Expr, negated: bool) -> Result<Expr, ParseError> {
2547        self.advance(); // BETWEEN
2548        let low = self.parse_expr(5)?;
2549        if !matches!(self.peek(), Token::And) {
2550            return Err(self.err(format!(
2551                "expected AND after BETWEEN low bound, got {:?}",
2552                self.peek()
2553            )));
2554        }
2555        self.advance();
2556        let high = self.parse_expr(5)?;
2557        let target = Box::new(expr);
2558        let combined = Expr::Binary {
2559            lhs: Box::new(Expr::Binary {
2560                lhs: target.clone(),
2561                op: BinOp::GtEq,
2562                rhs: Box::new(low),
2563            }),
2564            op: BinOp::And,
2565            rhs: Box::new(Expr::Binary {
2566                lhs: target,
2567                op: BinOp::LtEq,
2568                rhs: Box::new(high),
2569            }),
2570        };
2571        Ok(maybe_not(combined, negated))
2572    }
2573
2574    /// `x IN (a, b, c)`  →  chained OR of equalities. Empty list collapses
2575    /// to FALSE (TRUE under NOT IN), matching standard SQL semantics.
2576    /// v4.11: parse `WITH name AS (SELECT ...) [, ...] SELECT ...`.
2577    /// Caller already consumed the leading `WITH` ident.
2578    fn parse_with_cte_then_select(&mut self) -> Result<Statement, ParseError> {
2579        // v4.22: WITH RECURSIVE — optional keyword right after WITH.
2580        // Comes through as an identifier; consume it if present and
2581        // mark every CTE in the clause as recursive (PG semantics —
2582        // the flag is per-WITH, not per-CTE).
2583        let mut recursive = false;
2584        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
2585            && s.eq_ignore_ascii_case("recursive")
2586        {
2587            self.advance();
2588            recursive = true;
2589        }
2590        let mut ctes = Vec::new();
2591        loop {
2592            let name = self.expect_ident_like()?;
2593            // v4.22: optional column-name list — `WITH t(a,b,c) AS ...`.
2594            // PG uses these to rename the body's output columns; we
2595            // do the same below by overriding `columns[i].name`.
2596            let column_overrides: Vec<String> = if matches!(self.peek(), Token::LParen) {
2597                self.advance();
2598                let mut names = Vec::new();
2599                loop {
2600                    names.push(self.expect_ident_like()?);
2601                    if matches!(self.peek(), Token::Comma) {
2602                        self.advance();
2603                        continue;
2604                    }
2605                    break;
2606                }
2607                if !matches!(self.peek(), Token::RParen) {
2608                    return Err(self.err(format!(
2609                        "expected ')' to close CTE column list, got {:?}",
2610                        self.peek()
2611                    )));
2612                }
2613                self.advance();
2614                names
2615            } else {
2616                Vec::new()
2617            };
2618            // AS is a reserved Token::As (used by SELECT-item / FROM
2619            // aliasing) — handle it specially rather than as a bare
2620            // ident.
2621            if !matches!(self.peek(), Token::As) {
2622                return Err(self.err(format!(
2623                    "expected AS after CTE name {name:?}, got {:?}",
2624                    self.peek()
2625                )));
2626            }
2627            self.advance();
2628            if !matches!(self.peek(), Token::LParen) {
2629                return Err(self.err(format!(
2630                    "expected '(' after AS in WITH clause, got {:?}",
2631                    self.peek()
2632                )));
2633            }
2634            self.advance();
2635            if !matches!(self.peek(), Token::Select) {
2636                return Err(self.err(format!("WITH body must be a SELECT, got {:?}", self.peek())));
2637            }
2638            let inner = self.parse_select_stmt()?;
2639            if !matches!(self.peek(), Token::RParen) {
2640                return Err(self.err(format!(
2641                    "expected ')' after CTE body, got {:?}",
2642                    self.peek()
2643                )));
2644            }
2645            self.advance();
2646            let Statement::Select(body) = inner else {
2647                unreachable!("parse_select_stmt returns Select")
2648            };
2649            ctes.push(crate::ast::Cte {
2650                name,
2651                body,
2652                recursive,
2653                column_overrides,
2654            });
2655            if matches!(self.peek(), Token::Comma) {
2656                self.advance();
2657                continue;
2658            }
2659            break;
2660        }
2661        // The body SELECT follows. Must start with SELECT.
2662        if !matches!(self.peek(), Token::Select) {
2663            return Err(self.err(format!(
2664                "expected SELECT after WITH clause, got {:?}",
2665                self.peek()
2666            )));
2667        }
2668        let body_stmt = self.parse_select_stmt()?;
2669        let Statement::Select(mut body) = body_stmt else {
2670            unreachable!()
2671        };
2672        body.ctes = ctes;
2673        Ok(Statement::Select(body))
2674    }
2675
2676    /// v4.10: parse `EXISTS (SELECT ...)`. Caller (`parse_atom`)
2677    /// already consumed the leading `EXISTS` ident via
2678    /// `self.advance()`.
2679    fn parse_exists_atom(&mut self, negated: bool) -> Result<Expr, ParseError> {
2680        if !matches!(self.peek(), Token::LParen) {
2681            return Err(self.err(format!("expected '(' after EXISTS, got {:?}", self.peek())));
2682        }
2683        self.advance();
2684        let inner = self.parse_select_stmt()?;
2685        if !matches!(self.peek(), Token::RParen) {
2686            return Err(self.err(format!(
2687                "expected ')' after EXISTS-subquery, got {:?}",
2688                self.peek()
2689            )));
2690        }
2691        self.advance();
2692        let Statement::Select(s) = inner else {
2693            unreachable!("parse_select_stmt returns Select")
2694        };
2695        Ok(Expr::Exists {
2696            subquery: Box::new(s),
2697            negated,
2698        })
2699    }
2700
2701    fn parse_in_tail(&mut self, expr: Expr, negated: bool) -> Result<Expr, ParseError> {
2702        self.advance(); // IN
2703        if !matches!(self.peek(), Token::LParen) {
2704            return Err(self.err(format!("expected '(' after IN, got {:?}", self.peek())));
2705        }
2706        self.advance();
2707        // v4.10: `IN (SELECT ...)` — subquery branch.
2708        if matches!(self.peek(), Token::Select) {
2709            let inner = self.parse_select_stmt()?;
2710            if !matches!(self.peek(), Token::RParen) {
2711                return Err(self.err(format!(
2712                    "expected ')' after IN-subquery, got {:?}",
2713                    self.peek()
2714                )));
2715            }
2716            self.advance();
2717            let Statement::Select(s) = inner else {
2718                unreachable!("parse_select_stmt always returns Statement::Select")
2719            };
2720            return Ok(Expr::InSubquery {
2721                expr: Box::new(expr),
2722                subquery: Box::new(s),
2723                negated,
2724            });
2725        }
2726        let mut elements = Vec::new();
2727        if !matches!(self.peek(), Token::RParen) {
2728            loop {
2729                elements.push(self.parse_expr(0)?);
2730                match self.peek() {
2731                    Token::Comma => {
2732                        self.advance();
2733                    }
2734                    Token::RParen => break,
2735                    other => {
2736                        return Err(
2737                            self.err(format!("expected ',' or ')' in IN list, got {other:?}"))
2738                        );
2739                    }
2740                }
2741            }
2742        }
2743        self.advance(); // ')'
2744        let target = Box::new(expr);
2745        let combined = if elements.is_empty() {
2746            Expr::Literal(Literal::Bool(false))
2747        } else {
2748            let mut iter = elements.into_iter();
2749            let first = iter.next().unwrap();
2750            let mut acc = Expr::Binary {
2751                lhs: target.clone(),
2752                op: BinOp::Eq,
2753                rhs: Box::new(first),
2754            };
2755            for elt in iter {
2756                acc = Expr::Binary {
2757                    lhs: Box::new(acc),
2758                    op: BinOp::Or,
2759                    rhs: Box::new(Expr::Binary {
2760                        lhs: target.clone(),
2761                        op: BinOp::Eq,
2762                        rhs: Box::new(elt),
2763                    }),
2764                };
2765            }
2766            acc
2767        };
2768        Ok(maybe_not(combined, negated))
2769    }
2770
2771    /// Parse a pgvector array literal `[ x1, x2, ... ]`. The opening `[` is
2772    /// already consumed by the caller. Elements must be numeric literals
2773    /// (with optional unary `-`); any compound expression is rejected at
2774    /// parse time so the runtime never needs to evaluate inside a vector.
2775    /// `EXTRACT(<field> FROM <source>)`. The dispatching `parse_atom`
2776    /// has already consumed the `EXTRACT` token before calling us —
2777    /// we pick up at the opening `(`.
2778    fn parse_extract_atom(&mut self) -> Result<Expr, ParseError> {
2779        if !matches!(self.peek(), Token::LParen) {
2780            return Err(self.err(format!("expected '(' after EXTRACT, got {:?}", self.peek())));
2781        }
2782        self.advance();
2783        let field_name = self.expect_ident_like()?;
2784        let field = match field_name.to_ascii_lowercase().as_str() {
2785            "year" => ExtractField::Year,
2786            "month" => ExtractField::Month,
2787            "day" => ExtractField::Day,
2788            "hour" => ExtractField::Hour,
2789            "minute" => ExtractField::Minute,
2790            "second" => ExtractField::Second,
2791            "microsecond" | "microseconds" => ExtractField::Microsecond,
2792            other => {
2793                return Err(self.err(format!(
2794                    "unknown EXTRACT field {other:?}; \
2795                     supported: YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MICROSECOND"
2796                )));
2797            }
2798        };
2799        if !matches!(self.peek(), Token::From) {
2800            return Err(self.err(format!(
2801                "expected FROM after EXTRACT field, got {:?}",
2802                self.peek()
2803            )));
2804        }
2805        self.advance();
2806        let source = self.parse_expr(0)?;
2807        if !matches!(self.peek(), Token::RParen) {
2808            return Err(self.err(format!(
2809                "expected ')' to close EXTRACT, got {:?}",
2810                self.peek()
2811            )));
2812        }
2813        self.advance();
2814        Ok(Expr::Extract {
2815            field,
2816            source: Box::new(source),
2817        })
2818    }
2819
2820    /// `INTERVAL '<n> <unit> [<n> <unit> ...]'` — the `INTERVAL` keyword
2821    /// is already consumed; we expect a single string literal next and
2822    /// resolve it into `Literal::Interval` at parse time so the engine
2823    /// never has to re-tokenise inside the string.
2824    fn parse_interval_atom(&mut self) -> Result<Expr, ParseError> {
2825        let tok = self.advance();
2826        let Token::String(text) = tok else {
2827            return Err(self.err(format!(
2828                "expected string literal after INTERVAL, got {tok:?}"
2829            )));
2830        };
2831        let (months, micros) = parse_interval_text(&text).ok_or_else(|| ParseError {
2832            message: format!(
2833                "cannot parse INTERVAL {text:?}; \
2834                     expected `<n> <unit> [<n> <unit> ...]` with units \
2835                     microsecond[s], millisecond[s], second[s], minute[s], \
2836                     hour[s], day[s], week[s], month[s], year[s]"
2837            ),
2838            token_pos: self.pos.saturating_sub(1),
2839        })?;
2840        Ok(Expr::Literal(Literal::Interval {
2841            months,
2842            micros,
2843            text,
2844        }))
2845    }
2846
2847    fn parse_vector_literal_body(&mut self) -> Result<Expr, ParseError> {
2848        let mut elems = Vec::new();
2849        if matches!(self.peek(), Token::RBracket) {
2850            self.advance();
2851            return Ok(Expr::Literal(Literal::Vector(elems)));
2852        }
2853        loop {
2854            let e = self.parse_expr(0)?;
2855            let x = extract_numeric_literal(&e).ok_or_else(|| ParseError {
2856                message: format!("vector element must be a numeric literal, got {e:?}"),
2857                token_pos: self.pos,
2858            })?;
2859            elems.push(x);
2860            match self.peek() {
2861                Token::Comma => {
2862                    self.advance();
2863                }
2864                Token::RBracket => {
2865                    self.advance();
2866                    break;
2867                }
2868                other => {
2869                    return Err(self.err(format!("expected ',' or ']' in vector, got {other:?}")));
2870                }
2871            }
2872        }
2873        Ok(Expr::Literal(Literal::Vector(elems)))
2874    }
2875
2876    /// Atom that started with an identifier: could be `t.col`, `col`, or
2877    /// `func(arg, ...)`. Detect each shape by looking at the next token.
2878    /// v4.12: parse `(PARTITION BY expr, ... ORDER BY expr [DESC]
2879    /// [, ...])`. Caller has already consumed `OVER`. Either clause
2880    /// is optional; an empty `()` is also legal (PG semantics).
2881    /// v6.4.2 — consume an optional `IGNORE NULLS` / `RESPECT NULLS`
2882    /// modifier between `name(args)` and `OVER (...)`. Default is
2883    /// `Respect`. Unrecognised idents leave the stream unchanged.
2884    fn parse_null_treatment_modifier(&mut self) -> NullTreatment {
2885        let Token::Ident(s) = self.peek().clone() else {
2886            return NullTreatment::Respect;
2887        };
2888        let is_ignore = s.eq_ignore_ascii_case("ignore");
2889        let is_respect = s.eq_ignore_ascii_case("respect");
2890        if !is_ignore && !is_respect {
2891            return NullTreatment::Respect;
2892        }
2893        // Lookahead for NULLS — only consume both tokens together.
2894        // pos+1 must hold a "nulls" ident.
2895        if self.pos + 1 < self.tokens.len()
2896            && let Token::Ident(s2) = &self.tokens[self.pos + 1]
2897            && s2.eq_ignore_ascii_case("nulls")
2898        {
2899            self.advance();
2900            self.advance();
2901            return if is_ignore {
2902                NullTreatment::Ignore
2903            } else {
2904                NullTreatment::Respect
2905            };
2906        }
2907        NullTreatment::Respect
2908    }
2909
2910    /// No frame clause is supported.
2911    #[allow(clippy::type_complexity)] // (partitions, ordered-keys-with-desc) is the natural shape
2912    fn parse_over_clause(
2913        &mut self,
2914    ) -> Result<(Vec<Expr>, Vec<(Expr, bool)>, Option<WindowFrame>), ParseError> {
2915        if !matches!(self.peek(), Token::LParen) {
2916            return Err(self.err(format!("expected '(' after OVER, got {:?}", self.peek())));
2917        }
2918        self.advance();
2919        let mut partition_by = Vec::new();
2920        let mut order_by = Vec::new();
2921        // PARTITION BY ?
2922        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
2923            && s.eq_ignore_ascii_case("partition")
2924        {
2925            self.advance();
2926            if !matches!(self.peek(), Token::By) {
2927                return Err(self.err(format!(
2928                    "expected BY after PARTITION, got {:?}",
2929                    self.peek()
2930                )));
2931            }
2932            self.advance();
2933            loop {
2934                partition_by.push(self.parse_expr(0)?);
2935                if matches!(self.peek(), Token::Comma) {
2936                    self.advance();
2937                    continue;
2938                }
2939                break;
2940            }
2941        }
2942        // ORDER BY ?
2943        if matches!(self.peek(), Token::Order) {
2944            self.advance();
2945            if !matches!(self.peek(), Token::By) {
2946                return Err(self.err(format!("expected BY after ORDER, got {:?}", self.peek())));
2947            }
2948            self.advance();
2949            loop {
2950                let e = self.parse_expr(0)?;
2951                let desc = if matches!(self.peek(), Token::Desc) {
2952                    self.advance();
2953                    true
2954                } else if matches!(self.peek(), Token::Asc) {
2955                    self.advance();
2956                    false
2957                } else {
2958                    false
2959                };
2960                order_by.push((e, desc));
2961                if matches!(self.peek(), Token::Comma) {
2962                    self.advance();
2963                    continue;
2964                }
2965                break;
2966            }
2967        }
2968        // v4.20: optional explicit frame, `ROWS ...` / `RANGE ...`.
2969        // Both keywords come through the lexer as identifiers; match
2970        // case-insensitively.
2971        let mut frame: Option<WindowFrame> = None;
2972        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek() {
2973            let kind = if s.eq_ignore_ascii_case("rows") {
2974                Some(FrameKind::Rows)
2975            } else if s.eq_ignore_ascii_case("range") {
2976                Some(FrameKind::Range)
2977            } else {
2978                None
2979            };
2980            if let Some(kind) = kind {
2981                self.advance();
2982                frame = Some(self.parse_frame_tail(kind)?);
2983            }
2984        }
2985        if !matches!(self.peek(), Token::RParen) {
2986            return Err(self.err(format!(
2987                "expected ')' to close OVER clause, got {:?}",
2988                self.peek()
2989            )));
2990        }
2991        self.advance();
2992        Ok((partition_by, order_by, frame))
2993    }
2994
2995    /// v4.20: parse the tail of an explicit frame, given the `ROWS`
2996    /// or `RANGE` keyword was just consumed. Accepts both
2997    /// `BETWEEN <bound> AND <bound>` and the single-bound shorthand
2998    /// (`ROWS UNBOUNDED PRECEDING`, `ROWS 5 PRECEDING`, etc.) which
2999    /// PG normalises to `BETWEEN <bound> AND CURRENT ROW`.
3000    fn parse_frame_tail(&mut self, kind: FrameKind) -> Result<WindowFrame, ParseError> {
3001        if matches!(self.peek(), Token::Between) {
3002            self.advance();
3003            let start = self.parse_frame_bound()?;
3004            if !matches!(self.peek(), Token::And) {
3005                return Err(self.err(format!("expected AND in frame spec, got {:?}", self.peek())));
3006            }
3007            self.advance();
3008            let end = self.parse_frame_bound()?;
3009            Ok(WindowFrame {
3010                kind,
3011                start,
3012                end: Some(end),
3013            })
3014        } else {
3015            let start = self.parse_frame_bound()?;
3016            Ok(WindowFrame {
3017                kind,
3018                start,
3019                end: None,
3020            })
3021        }
3022    }
3023
3024    /// Parse one frame bound: `UNBOUNDED PRECEDING`, `<n> PRECEDING`,
3025    /// `CURRENT ROW`, `<n> FOLLOWING`, `UNBOUNDED FOLLOWING`.
3026    fn parse_frame_bound(&mut self) -> Result<FrameBound, ParseError> {
3027        // Number-led: "<n> PRECEDING" / "<n> FOLLOWING".
3028        if let Token::Integer(n) = *self.peek() {
3029            self.advance();
3030            let n: u64 = u64::try_from(n).map_err(|_| {
3031                self.err(format!(
3032                    "invalid frame offset {n} — expected non-negative integer"
3033                ))
3034            })?;
3035            let dir = self.expect_ident_like()?;
3036            return if dir.eq_ignore_ascii_case("preceding") {
3037                Ok(FrameBound::OffsetPreceding(n))
3038            } else if dir.eq_ignore_ascii_case("following") {
3039                Ok(FrameBound::OffsetFollowing(n))
3040            } else {
3041                Err(self.err(format!(
3042                    "expected PRECEDING or FOLLOWING after offset, got {dir:?}"
3043                )))
3044            };
3045        }
3046        let first = self.expect_ident_like()?;
3047        if first.eq_ignore_ascii_case("unbounded") {
3048            let dir = self.expect_ident_like()?;
3049            return if dir.eq_ignore_ascii_case("preceding") {
3050                Ok(FrameBound::UnboundedPreceding)
3051            } else if dir.eq_ignore_ascii_case("following") {
3052                Ok(FrameBound::UnboundedFollowing)
3053            } else {
3054                Err(self.err(format!(
3055                    "expected PRECEDING or FOLLOWING after UNBOUNDED, got {dir:?}"
3056                )))
3057            };
3058        }
3059        if first.eq_ignore_ascii_case("current") {
3060            let row = self.expect_ident_like()?;
3061            if !row.eq_ignore_ascii_case("row") {
3062                return Err(self.err(format!("expected ROW after CURRENT, got {row:?}")));
3063            }
3064            return Ok(FrameBound::CurrentRow);
3065        }
3066        Err(self.err(format!(
3067            "expected frame bound (UNBOUNDED/CURRENT/<n>), got {first:?}"
3068        )))
3069    }
3070
3071    fn finish_ident_atom(&mut self, first: String) -> Result<Expr, ParseError> {
3072        if matches!(self.peek(), Token::Dot) {
3073            self.advance();
3074            let name = self.expect_ident_like()?;
3075            return Ok(Expr::Column(ColumnName {
3076                qualifier: Some(first),
3077                name,
3078            }));
3079        }
3080        if matches!(self.peek(), Token::LParen) {
3081            self.advance();
3082            // `COUNT(*)` — special-cased here because `*` isn't a normal
3083            // expression token. Lower-case match on `first` since the lexer
3084            // folds identifiers.
3085            if first.eq_ignore_ascii_case("count") && matches!(self.peek(), Token::Star) {
3086                self.advance();
3087                if !matches!(self.peek(), Token::RParen) {
3088                    return Err(self.err(format!(
3089                        "expected ')' after COUNT(*), got {:?}",
3090                        self.peek()
3091                    )));
3092                }
3093                self.advance();
3094                // v4.12: COUNT(*) OVER (...) — same window tail.
3095                let null_treatment = self.parse_null_treatment_modifier();
3096                if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
3097                    && s.eq_ignore_ascii_case("over")
3098                {
3099                    self.advance();
3100                    let (partition_by, order_by, frame) = self.parse_over_clause()?;
3101                    return Ok(Expr::WindowFunction {
3102                        name: "count_star".into(),
3103                        args: Vec::new(),
3104                        partition_by,
3105                        order_by,
3106                        frame,
3107                        null_treatment,
3108                    });
3109                }
3110                return Ok(Expr::FunctionCall {
3111                    name: "count_star".into(),
3112                    args: Vec::new(),
3113                });
3114            }
3115            // Function call. PG-style: zero-or-more comma-separated args.
3116            let mut args = Vec::new();
3117            if !matches!(self.peek(), Token::RParen) {
3118                loop {
3119                    args.push(self.parse_expr(0)?);
3120                    match self.peek() {
3121                        Token::Comma => {
3122                            self.advance();
3123                        }
3124                        Token::RParen => break,
3125                        other => {
3126                            return Err(self.err(format!(
3127                                "expected ',' or ')' in function args, got {other:?}"
3128                            )));
3129                        }
3130                    }
3131                }
3132            }
3133            self.advance(); // consume ')'
3134            // v4.12: window-function tail — `name(args) OVER (...)`.
3135            // Promotes the just-parsed FunctionCall into a
3136            // WindowFunction node carrying partition + order.
3137            // v6.4.2: also accepts `name(args) IGNORE NULLS OVER (...)`
3138            // / `RESPECT NULLS OVER (...)` between the closing paren
3139            // and `OVER`.
3140            let null_treatment = self.parse_null_treatment_modifier();
3141            if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
3142                && s.eq_ignore_ascii_case("over")
3143            {
3144                self.advance();
3145                let (partition_by, order_by, frame) = self.parse_over_clause()?;
3146                return Ok(Expr::WindowFunction {
3147                    name: first,
3148                    args,
3149                    partition_by,
3150                    order_by,
3151                    frame,
3152                    null_treatment,
3153                });
3154            }
3155            return Ok(Expr::FunctionCall { name: first, args });
3156        }
3157        // v7.9.20 — SQL-standard parenless keyword expressions
3158        // (PG treats these as functions called without parens).
3159        // Resolve to a synthetic FunctionCall so the engine's
3160        // eval path reuses the existing function-call routing.
3161        // mailrs G3.
3162        let lc = first.to_ascii_lowercase();
3163        if matches!(
3164            lc.as_str(),
3165            "current_date"
3166                | "current_time"
3167                | "current_timestamp"
3168                | "localtimestamp"
3169                | "localtime"
3170        ) {
3171            return Ok(Expr::FunctionCall {
3172                name: lc,
3173                args: Vec::new(),
3174            });
3175        }
3176        Ok(Expr::Column(ColumnName {
3177            qualifier: None,
3178            name: first,
3179        }))
3180    }
3181}
3182
3183/// v6.8.2 — walk an expression tree and return the first column
3184/// reference's bare name. Used by `parse_create_index_stmt_after_create`
3185/// to derive `CreateIndexStatement.column` from an expression
3186/// key (so downstream planner code resolving a primary column
3187/// position keeps working with expression indexes). Returns
3188/// `None` when the expression has no column ref at all — caller
3189/// surfaces that as a parse error.
3190fn extract_first_column(expr: &Expr) -> Option<String> {
3191    match expr {
3192        Expr::Column(cn) => Some(cn.name.clone()),
3193        Expr::FunctionCall { args, .. } => args.iter().find_map(extract_first_column),
3194        Expr::Binary { lhs, rhs, .. } => {
3195            extract_first_column(lhs).or_else(|| extract_first_column(rhs))
3196        }
3197        Expr::Unary { expr: e, .. } => extract_first_column(e),
3198        _ => None,
3199    }
3200}
3201
3202fn maybe_not(expr: Expr, negated: bool) -> Expr {
3203    if negated {
3204        Expr::Unary {
3205            op: UnOp::Not,
3206            expr: Box::new(expr),
3207        }
3208    } else {
3209        expr
3210    }
3211}
3212
3213fn binop_from(tok: &Token) -> Option<(BinOp, u8)> {
3214    let pair = match tok {
3215        Token::Or => (BinOp::Or, 1),
3216        Token::And => (BinOp::And, 2),
3217        Token::Eq => (BinOp::Eq, 4),
3218        Token::NotEq => (BinOp::NotEq, 4),
3219        Token::Lt => (BinOp::Lt, 4),
3220        Token::LtEq => (BinOp::LtEq, 4),
3221        Token::Gt => (BinOp::Gt, 4),
3222        Token::GtEq => (BinOp::GtEq, 4),
3223        // pgvector distance ops all sit on the same rung — tighter than
3224        // comparisons (4) so `col <-> v < threshold` parses correctly.
3225        Token::L2Distance => (BinOp::L2Distance, 5),
3226        Token::InnerProduct => (BinOp::InnerProduct, 5),
3227        Token::CosineDistance => (BinOp::CosineDistance, 5),
3228        Token::Plus => (BinOp::Add, 6),
3229        Token::Minus => (BinOp::Sub, 6),
3230        // `||` sits beside `+`/`-` (matches PG conceptually — concat groups
3231        // by the same level as binary additive arithmetic).
3232        Token::Concat => (BinOp::Concat, 6),
3233        Token::Star => (BinOp::Mul, 7),
3234        Token::Slash => (BinOp::Div, 7),
3235        // v4.14: JSON path ops bind tighter than comparisons (4)
3236        // and additive (6) so `doc->'k' = 'v'` parses correctly.
3237        // Same rung as the multiplicative ops.
3238        Token::JsonGet => (BinOp::JsonGet, 7),
3239        Token::JsonGetText => (BinOp::JsonGetText, 7),
3240        Token::JsonGetPath => (BinOp::JsonGetPath, 7),
3241        Token::JsonGetPathText => (BinOp::JsonGetPathText, 7),
3242        Token::JsonContains => (BinOp::JsonContains, 7),
3243        _ => return None,
3244    };
3245    Some(pair)
3246}
3247
3248#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
3249// `as f32` here is intentional: vector elements widen / narrow into f32 on
3250// purpose. i64 → f32 loses precision past 2^24, f64 → f32 loses precision
3251// past ~15 decimal digits — both are acceptable for a fixed-precision
3252// pgvector column.
3253fn extract_numeric_literal(e: &Expr) -> Option<f32> {
3254    match e {
3255        Expr::Literal(Literal::Integer(n)) => Some(*n as f32),
3256        Expr::Literal(Literal::Float(x)) => Some(*x as f32),
3257        Expr::Unary {
3258            op: UnOp::Neg,
3259            expr,
3260        } => extract_numeric_literal(expr).map(|x| -x),
3261        _ => None,
3262    }
3263}
3264
3265/// Parse the text inside `INTERVAL '...'` into `(months, micros)`. Accepts
3266/// one or more `<n> <unit>` pairs separated by whitespace. `<n>` may be
3267/// negative. Returns `None` if any pair fails to parse or no pair is found.
3268///
3269/// Recognised units (case-insensitive, optional trailing `s`):
3270/// `microsecond`, `millisecond`, `second`, `minute`, `hour`, `day`, `week`,
3271/// `month`, `year`. `week` widens to 7 days; `year` widens to 12 months.
3272pub fn parse_interval_text(s: &str) -> Option<(i32, i64)> {
3273    let parts: Vec<&str> = s.split_whitespace().collect();
3274    if parts.is_empty() || !parts.len().is_multiple_of(2) {
3275        return None;
3276    }
3277    let mut months: i32 = 0;
3278    let mut micros: i64 = 0;
3279    let mut i = 0;
3280    while i < parts.len() {
3281        let n: i64 = parts[i].parse().ok()?;
3282        let unit = parts[i + 1].to_ascii_lowercase();
3283        let unit_stripped = unit.strip_suffix('s').unwrap_or(&unit);
3284        match unit_stripped {
3285            "microsecond" => micros = micros.checked_add(n)?,
3286            "millisecond" => micros = micros.checked_add(n.checked_mul(1_000)?)?,
3287            "second" => micros = micros.checked_add(n.checked_mul(1_000_000)?)?,
3288            "minute" => micros = micros.checked_add(n.checked_mul(60_000_000)?)?,
3289            "hour" => micros = micros.checked_add(n.checked_mul(3_600_000_000)?)?,
3290            "day" => micros = micros.checked_add(n.checked_mul(86_400_000_000)?)?,
3291            "week" => micros = micros.checked_add(n.checked_mul(604_800_000_000)?)?,
3292            "month" => {
3293                let n32 = i32::try_from(n).ok()?;
3294                months = months.checked_add(n32)?;
3295            }
3296            "year" => {
3297                let n32 = i32::try_from(n).ok()?;
3298                months = months.checked_add(n32.checked_mul(12)?)?;
3299            }
3300            _ => return None,
3301        }
3302        i += 2;
3303    }
3304    Some((months, micros))
3305}
3306
3307#[cfg(test)]
3308mod tests {
3309    use super::*;
3310    use alloc::string::ToString;
3311
3312    fn parse(s: &str) -> Statement {
3313        parse_statement(s).expect("parse ok")
3314    }
3315
3316    fn lit_int(n: i64) -> Expr {
3317        Expr::Literal(Literal::Integer(n))
3318    }
3319
3320    fn col(name: &str) -> Expr {
3321        Expr::Column(ColumnName {
3322            qualifier: None,
3323            name: name.into(),
3324        })
3325    }
3326
3327    #[test]
3328    fn select_single_integer() {
3329        let s = parse("SELECT 1");
3330        let Statement::Select(s) = s else {
3331            panic!("expected SELECT")
3332        };
3333        assert_eq!(s.items.len(), 1);
3334        assert!(s.from.is_none());
3335        assert!(s.where_.is_none());
3336    }
3337
3338    #[test]
3339    fn select_multiple_literal_kinds() {
3340        let s = parse("SELECT 1, 'hi', NULL, TRUE, 1.5");
3341        let Statement::Select(s) = s else {
3342            panic!("expected SELECT")
3343        };
3344        assert_eq!(s.items.len(), 5);
3345    }
3346
3347    #[test]
3348    fn select_wildcard_from_table() {
3349        let s = parse("SELECT * FROM users");
3350        let Statement::Select(s) = s else {
3351            panic!("expected SELECT")
3352        };
3353        assert!(matches!(s.items[..], [SelectItem::Wildcard]));
3354        assert_eq!(s.from.as_ref().unwrap().primary.name, "users");
3355    }
3356
3357    #[test]
3358    fn select_with_table_alias() {
3359        let s = parse("SELECT * FROM users AS u");
3360        let Statement::Select(s) = s else {
3361            panic!("expected SELECT")
3362        };
3363        let t = &s.from.as_ref().unwrap().primary;
3364        assert_eq!(t.name, "users");
3365        assert_eq!(t.alias.as_deref(), Some("u"));
3366    }
3367
3368    #[test]
3369    fn select_with_where_eq() {
3370        let s = parse("SELECT a FROM t WHERE a = 1");
3371        let Statement::Select(s) = s else {
3372            panic!("expected SELECT")
3373        };
3374        let w = s.where_.unwrap();
3375        assert_eq!(
3376            w,
3377            Expr::Binary {
3378                lhs: Box::new(col("a")),
3379                op: BinOp::Eq,
3380                rhs: Box::new(lit_int(1)),
3381            }
3382        );
3383    }
3384
3385    #[test]
3386    fn arithmetic_precedence() {
3387        let s = parse("SELECT 1 + 2 * 3");
3388        let Statement::Select(s) = s else {
3389            panic!("expected SELECT")
3390        };
3391        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3392            panic!("wildcard?")
3393        };
3394        assert_eq!(
3395            expr,
3396            &Expr::Binary {
3397                lhs: Box::new(lit_int(1)),
3398                op: BinOp::Add,
3399                rhs: Box::new(Expr::Binary {
3400                    lhs: Box::new(lit_int(2)),
3401                    op: BinOp::Mul,
3402                    rhs: Box::new(lit_int(3)),
3403                }),
3404            }
3405        );
3406    }
3407
3408    #[test]
3409    fn parentheses_override_precedence() {
3410        let s = parse("SELECT (1 + 2) * 3");
3411        let Statement::Select(s) = s else {
3412            panic!("expected SELECT")
3413        };
3414        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3415            panic!()
3416        };
3417        assert_eq!(
3418            expr,
3419            &Expr::Binary {
3420                lhs: Box::new(Expr::Binary {
3421                    lhs: Box::new(lit_int(1)),
3422                    op: BinOp::Add,
3423                    rhs: Box::new(lit_int(2)),
3424                }),
3425                op: BinOp::Mul,
3426                rhs: Box::new(lit_int(3)),
3427            }
3428        );
3429    }
3430
3431    #[test]
3432    fn not_binds_below_comparison() {
3433        // `NOT a = 1` should parse as `NOT (a = 1)`.
3434        let s = parse("SELECT NOT a = 1 FROM t");
3435        let Statement::Select(s) = s else {
3436            panic!("expected SELECT")
3437        };
3438        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3439            panic!()
3440        };
3441        assert_eq!(
3442            expr,
3443            &Expr::Unary {
3444                op: UnOp::Not,
3445                expr: Box::new(Expr::Binary {
3446                    lhs: Box::new(col("a")),
3447                    op: BinOp::Eq,
3448                    rhs: Box::new(lit_int(1)),
3449                }),
3450            }
3451        );
3452    }
3453
3454    #[test]
3455    fn unary_minus_binds_above_multiplication() {
3456        // `-a * 2` should be `(-a) * 2`.
3457        let s = parse("SELECT -a * 2 FROM t");
3458        let Statement::Select(s) = s else {
3459            panic!("expected SELECT")
3460        };
3461        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3462            panic!()
3463        };
3464        assert_eq!(
3465            expr,
3466            &Expr::Binary {
3467                lhs: Box::new(Expr::Unary {
3468                    op: UnOp::Neg,
3469                    expr: Box::new(col("a")),
3470                }),
3471                op: BinOp::Mul,
3472                rhs: Box::new(lit_int(2)),
3473            }
3474        );
3475    }
3476
3477    #[test]
3478    fn qualified_column() {
3479        let s = parse("SELECT t.col FROM t");
3480        let Statement::Select(s) = s else {
3481            panic!("expected SELECT")
3482        };
3483        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3484            panic!()
3485        };
3486        assert_eq!(
3487            expr,
3488            &Expr::Column(ColumnName {
3489                qualifier: Some("t".into()),
3490                name: "col".into()
3491            })
3492        );
3493    }
3494
3495    #[test]
3496    fn select_item_alias_with_as() {
3497        let s = parse("SELECT a AS y FROM t");
3498        let Statement::Select(s) = s else {
3499            panic!("expected SELECT")
3500        };
3501        let SelectItem::Expr { alias, .. } = &s.items[0] else {
3502            panic!()
3503        };
3504        assert_eq!(alias.as_deref(), Some("y"));
3505    }
3506
3507    #[test]
3508    fn trailing_semicolon_accepted() {
3509        let s = parse("SELECT 1;");
3510        let Statement::Select(s) = s else {
3511            panic!("expected SELECT")
3512        };
3513        assert_eq!(s.items.len(), 1);
3514    }
3515
3516    #[test]
3517    fn boolean_chain_with_and_or_not() {
3518        // (NOT a) OR (b AND (NOT c))
3519        let s = parse("SELECT NOT a OR b AND NOT c FROM t");
3520        let Statement::Select(s) = s else {
3521            panic!("expected SELECT")
3522        };
3523        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3524            panic!()
3525        };
3526        let expected = Expr::Binary {
3527            lhs: Box::new(Expr::Unary {
3528                op: UnOp::Not,
3529                expr: Box::new(col("a")),
3530            }),
3531            op: BinOp::Or,
3532            rhs: Box::new(Expr::Binary {
3533                lhs: Box::new(col("b")),
3534                op: BinOp::And,
3535                rhs: Box::new(Expr::Unary {
3536                    op: UnOp::Not,
3537                    expr: Box::new(col("c")),
3538                }),
3539            }),
3540        };
3541        assert_eq!(expr, &expected);
3542    }
3543
3544    #[test]
3545    fn empty_input_errors() {
3546        let err = parse_statement("").unwrap_err();
3547        assert!(err.message.contains("SELECT"));
3548    }
3549
3550    #[test]
3551    fn unmatched_paren_errors() {
3552        assert!(parse_statement("SELECT (1 + 2").is_err());
3553    }
3554
3555    #[test]
3556    fn display_round_trip_simple_select() {
3557        let original = parse("SELECT a + 1 FROM t WHERE a > 0");
3558        let text = original.to_string();
3559        let again = parse_statement(&text).expect("re-parse");
3560        assert_eq!(original, again);
3561    }
3562
3563    // --- CREATE TABLE & INSERT (v0.3) ---------------------------------------
3564
3565    #[test]
3566    fn create_table_single_column() {
3567        let s = parse("CREATE TABLE foo (a INT)");
3568        let Statement::CreateTable(c) = s else {
3569            panic!("expected CreateTable")
3570        };
3571        assert_eq!(c.name, "foo");
3572        assert_eq!(c.columns.len(), 1);
3573        assert_eq!(c.columns[0].name, "a");
3574        assert_eq!(c.columns[0].ty, ColumnTypeName::Int);
3575        assert!(c.columns[0].nullable);
3576    }
3577
3578    #[test]
3579    fn create_table_multi_column_with_not_null_mix() {
3580        let s = parse("CREATE TABLE u (id INT NOT NULL, name TEXT, score FLOAT NOT NULL, ok BOOL)");
3581        let Statement::CreateTable(c) = s else {
3582            panic!()
3583        };
3584        assert_eq!(c.columns.len(), 4);
3585        assert_eq!(c.columns[0].ty, ColumnTypeName::Int);
3586        assert!(!c.columns[0].nullable);
3587        assert_eq!(c.columns[1].ty, ColumnTypeName::Text);
3588        assert!(c.columns[1].nullable);
3589        assert_eq!(c.columns[2].ty, ColumnTypeName::Float);
3590        assert!(!c.columns[2].nullable);
3591        assert_eq!(c.columns[3].ty, ColumnTypeName::Bool);
3592    }
3593
3594    #[test]
3595    fn create_table_bigint_supported() {
3596        let s = parse("CREATE TABLE accounts (id BIGINT NOT NULL)");
3597        let Statement::CreateTable(c) = s else {
3598            panic!()
3599        };
3600        assert_eq!(c.columns[0].ty, ColumnTypeName::BigInt);
3601    }
3602
3603    #[test]
3604    fn create_table_vector_default_is_f32() {
3605        let s = parse("CREATE TABLE t (v VECTOR(128))");
3606        let Statement::CreateTable(c) = s else {
3607            panic!()
3608        };
3609        assert_eq!(
3610            c.columns[0].ty,
3611            ColumnTypeName::Vector {
3612                dim: 128,
3613                encoding: VecEncoding::F32,
3614            },
3615        );
3616    }
3617
3618    #[test]
3619    fn create_table_vector_using_sq8() {
3620        // v6.0.1: `USING SQ8` selects scalar-quantised encoding.
3621        // Case-insensitive on both `USING` and the encoding name.
3622        for sql in [
3623            "CREATE TABLE t (v VECTOR(128) USING SQ8)",
3624            "CREATE TABLE t (v VECTOR(128) using sq8)",
3625        ] {
3626            let s = parse(sql);
3627            let Statement::CreateTable(c) = s else {
3628                panic!()
3629            };
3630            assert_eq!(
3631                c.columns[0].ty,
3632                ColumnTypeName::Vector {
3633                    dim: 128,
3634                    encoding: VecEncoding::Sq8,
3635                },
3636                "{sql}",
3637            );
3638        }
3639    }
3640
3641    #[test]
3642    fn create_table_vector_using_unknown_errors() {
3643        let err = parse_statement("CREATE TABLE t (v VECTOR(8) USING PQ8)").unwrap_err();
3644        assert!(
3645            err.message.contains("unknown vector encoding"),
3646            "got: {}",
3647            err.message
3648        );
3649    }
3650
3651    #[test]
3652    fn vector_using_sq8_display_roundtrips() {
3653        // The Display impl must produce text that re-parses to the
3654        // same AST. Guard for the v6.0.1 `USING SQ8` suffix.
3655        let s = parse("CREATE TABLE t (v VECTOR(64) USING SQ8)");
3656        let Statement::CreateTable(c) = s else {
3657            panic!()
3658        };
3659        assert_eq!(c.columns[0].ty.to_string(), "VECTOR(64) USING SQ8");
3660    }
3661
3662    #[test]
3663    fn parser_recognises_placeholders() {
3664        use crate::ast::{Expr, SelectItem, Statement};
3665        // $N in expression position parses as Expr::Placeholder(N).
3666        let s = parse("SELECT $1, $2 + 1 FROM t WHERE x = $3");
3667        let Statement::Select(sel) = s else { panic!() };
3668        assert!(matches!(
3669            sel.items[0],
3670            SelectItem::Expr {
3671                expr: Expr::Placeholder(1),
3672                alias: None
3673            }
3674        ));
3675        // $2 + 1
3676        let SelectItem::Expr {
3677            expr: Expr::Binary { lhs, rhs, .. },
3678            ..
3679        } = &sel.items[1]
3680        else {
3681            panic!()
3682        };
3683        assert!(matches!(**lhs, Expr::Placeholder(2)));
3684        assert!(matches!(**rhs, Expr::Literal(Literal::Integer(1))));
3685        // WHERE x = $3
3686        let Some(Expr::Binary { rhs, .. }) = sel.where_.as_ref() else {
3687            panic!()
3688        };
3689        assert!(matches!(**rhs, Expr::Placeholder(3)));
3690    }
3691
3692    #[test]
3693    fn parser_rejects_dollar_zero() {
3694        // $0 is not valid in PG; the lexer rejects it.
3695        assert!(parse_statement("SELECT $0").is_err());
3696    }
3697
3698    #[test]
3699    fn placeholder_display_roundtrips() {
3700        // The Display impl must produce text that re-lexes to the
3701        // same Placeholder token.
3702        let s = parse("SELECT $42 FROM t");
3703        let printed = s.to_string();
3704        assert!(printed.contains("$42"));
3705        let again = parse(&printed);
3706        assert_eq!(s, again);
3707    }
3708
3709    #[test]
3710    fn alter_index_rebuild_bare() {
3711        use crate::ast::{AlterIndexTarget, Statement};
3712        let s = parse("ALTER INDEX my_idx REBUILD");
3713        let Statement::AlterIndex(a) = s else {
3714            panic!("expected AlterIndex, got {s:?}")
3715        };
3716        assert_eq!(a.name, "my_idx");
3717        assert_eq!(a.target, AlterIndexTarget::Rebuild { encoding: None });
3718    }
3719
3720    #[test]
3721    fn alter_index_rebuild_with_encoding() {
3722        use crate::ast::{AlterIndexTarget, Statement};
3723        for (sql, want) in [
3724            (
3725                "ALTER INDEX my_idx REBUILD WITH (encoding = F32)",
3726                VecEncoding::F32,
3727            ),
3728            (
3729                "ALTER INDEX my_idx REBUILD WITH (encoding = sq8)",
3730                VecEncoding::Sq8,
3731            ),
3732            (
3733                "ALTER INDEX my_idx REBUILD WITH (encoding = HALF)",
3734                VecEncoding::F16,
3735            ),
3736        ] {
3737            let s = parse(sql);
3738            let Statement::AlterIndex(a) = s else {
3739                panic!("{sql}: expected AlterIndex")
3740            };
3741            assert_eq!(a.name, "my_idx");
3742            assert_eq!(
3743                a.target,
3744                AlterIndexTarget::Rebuild {
3745                    encoding: Some(want)
3746                },
3747                "{sql}"
3748            );
3749        }
3750    }
3751
3752    #[test]
3753    fn alter_index_rebuild_unknown_encoding_errors() {
3754        let err = parse_statement("ALTER INDEX my_idx REBUILD WITH (encoding = PQ8)").unwrap_err();
3755        assert!(
3756            err.message.contains("unknown vector encoding"),
3757            "got: {}",
3758            err.message
3759        );
3760    }
3761
3762    #[test]
3763    fn alter_index_rebuild_display_roundtrips() {
3764        for (input, want) in [
3765            ("ALTER INDEX my_idx REBUILD", "ALTER INDEX my_idx REBUILD"),
3766            (
3767                "ALTER INDEX my_idx REBUILD WITH (encoding = SQ8)",
3768                "ALTER INDEX my_idx REBUILD WITH (encoding = SQ8)",
3769            ),
3770            (
3771                "ALTER INDEX my_idx REBUILD WITH (encoding = HALF)",
3772                "ALTER INDEX my_idx REBUILD WITH (encoding = HALF)",
3773            ),
3774        ] {
3775            let s = parse(input);
3776            assert_eq!(s.to_string(), want);
3777        }
3778    }
3779
3780    #[test]
3781    fn create_table_unknown_type_errors() {
3782        // v4.9: JSON is now real; pick an actually unsupported keyword
3783        // (XML never landed and isn't planned).
3784        let err = parse_statement("CREATE TABLE x (a xml)").unwrap_err();
3785        assert!(err.message.contains("unsupported column type"));
3786    }
3787
3788    #[test]
3789    fn create_table_missing_table_keyword_errors() {
3790        assert!(parse_statement("CREATE x (a INT)").is_err());
3791    }
3792
3793    #[test]
3794    fn insert_single_value() {
3795        let s = parse("INSERT INTO foo VALUES (42)");
3796        let Statement::Insert(i) = s else {
3797            panic!("expected Insert")
3798        };
3799        assert_eq!(i.table, "foo");
3800        assert_eq!(i.rows.len(), 1);
3801        assert_eq!(i.rows[0].len(), 1);
3802        assert!(matches!(i.rows[0][0], Expr::Literal(Literal::Integer(42))));
3803    }
3804
3805    #[test]
3806    fn insert_multi_value_with_mixed_literals() {
3807        let s = parse("INSERT INTO foo VALUES (1, 'hi', 3.14, TRUE, NULL)");
3808        let Statement::Insert(i) = s else { panic!() };
3809        assert_eq!(i.rows.len(), 1);
3810        assert_eq!(i.rows[0].len(), 5);
3811    }
3812
3813    #[test]
3814    fn insert_missing_into_errors() {
3815        assert!(parse_statement("INSERT foo VALUES (1)").is_err());
3816    }
3817
3818    #[test]
3819    fn create_table_round_trip() {
3820        let original =
3821            parse("CREATE TABLE foo (id BIGINT NOT NULL, label TEXT, score FLOAT NOT NULL)");
3822        let text = original.to_string();
3823        let again = parse_statement(&text).expect("re-parse");
3824        assert_eq!(original, again);
3825    }
3826
3827    #[test]
3828    fn insert_round_trip_with_negation_and_string() {
3829        let original = parse("INSERT INTO t VALUES (-1, 'it''s', NULL)");
3830        let text = original.to_string();
3831        let again = parse_statement(&text).expect("re-parse");
3832        assert_eq!(original, again);
3833    }
3834
3835    #[test]
3836    fn unknown_keyword_at_statement_start_errors() {
3837        // v4.4: UPDATE is real SQL now. Use a fabricated keyword so
3838        // the top-level dispatch still has no branch to take.
3839        let err = parse_statement("FROBNICATE foo SET x = 1").unwrap_err();
3840        assert!(err.message.contains("expected SELECT"));
3841    }
3842
3843    // --- v0.8 CREATE INDEX --------------------------------------------------
3844
3845    #[test]
3846    fn create_index_basic() {
3847        let s = parse("CREATE INDEX idx_id ON users (id)");
3848        let Statement::CreateIndex(c) = s else {
3849            panic!("expected CreateIndex")
3850        };
3851        assert_eq!(c.name, "idx_id");
3852        assert_eq!(c.table, "users");
3853        assert_eq!(c.column, "id");
3854    }
3855
3856    #[test]
3857    fn create_index_missing_on_errors() {
3858        assert!(parse_statement("CREATE INDEX foo users (id)").is_err());
3859    }
3860
3861    #[test]
3862    fn create_index_missing_paren_errors() {
3863        assert!(parse_statement("CREATE INDEX foo ON users id").is_err());
3864    }
3865
3866    #[test]
3867    fn create_index_round_trip() {
3868        let original = parse("CREATE INDEX by_name ON users (name)");
3869        let again = parse_statement(&original.to_string()).unwrap();
3870        assert_eq!(original, again);
3871    }
3872
3873    // --- v0.9 transactions -------------------------------------------------
3874
3875    #[test]
3876    fn begin_commit_rollback_parse_as_unit_variants() {
3877        assert_eq!(parse("BEGIN"), Statement::Begin);
3878        assert_eq!(parse("COMMIT"), Statement::Commit);
3879        assert_eq!(parse("ROLLBACK"), Statement::Rollback);
3880        // Trailing semicolons accepted too.
3881        assert_eq!(parse("BEGIN;"), Statement::Begin);
3882    }
3883
3884    // --- v1.2: pgvector distance ops + ::vector cast --------------------
3885
3886    #[test]
3887    fn inner_product_binop_parses() {
3888        let s = parse("SELECT v <#> [1.0, 2.0] FROM t");
3889        let Statement::Select(s) = s else { panic!() };
3890        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3891            panic!()
3892        };
3893        assert!(matches!(
3894            expr,
3895            Expr::Binary {
3896                op: BinOp::InnerProduct,
3897                ..
3898            }
3899        ));
3900    }
3901
3902    #[test]
3903    fn cosine_distance_binop_parses() {
3904        let s = parse("SELECT v <=> [1.0, 2.0] FROM t");
3905        let Statement::Select(s) = s else { panic!() };
3906        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3907            panic!()
3908        };
3909        assert!(matches!(
3910            expr,
3911            Expr::Binary {
3912                op: BinOp::CosineDistance,
3913                ..
3914            }
3915        ));
3916    }
3917
3918    #[test]
3919    fn vector_cast_postfix_wraps_string_literal() {
3920        let s = parse("SELECT '[1,2,3]'::vector FROM t");
3921        let Statement::Select(s) = s else { panic!() };
3922        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3923            panic!()
3924        };
3925        assert!(matches!(
3926            expr,
3927            Expr::Cast {
3928                target: CastTarget::Vector,
3929                ..
3930            }
3931        ));
3932    }
3933
3934    #[test]
3935    fn unsupported_cast_target_errors() {
3936        // `::numeric` isn't in the v1.3 cast target set.
3937        let err = parse_statement("SELECT 1::numeric FROM t").unwrap_err();
3938        assert!(err.message.contains("unsupported cast target"));
3939    }
3940
3941    #[test]
3942    fn tx_statements_round_trip() {
3943        for q in ["BEGIN", "COMMIT", "ROLLBACK"] {
3944            let original = parse(q);
3945            let again = parse_statement(&original.to_string()).unwrap();
3946            assert_eq!(original, again);
3947        }
3948    }
3949
3950    #[test]
3951    fn interval_text_parsing_units() {
3952        // Single unit.
3953        assert_eq!(parse_interval_text("1 day"), Some((0, 86_400_000_000)));
3954        assert_eq!(parse_interval_text("1 second"), Some((0, 1_000_000)));
3955        assert_eq!(parse_interval_text("1 month"), Some((1, 0)));
3956        assert_eq!(parse_interval_text("2 years"), Some((24, 0)));
3957        // Compound spans accumulate.
3958        assert_eq!(parse_interval_text("1 year 6 months"), Some((18, 0)));
3959        assert_eq!(
3960            parse_interval_text("1 day 2 hours"),
3961            Some((0, 86_400_000_000 + 7_200_000_000))
3962        );
3963        // Negative numbers carry through.
3964        assert_eq!(parse_interval_text("-1 day"), Some((0, -86_400_000_000)));
3965        // Bad shapes return None.
3966        assert_eq!(parse_interval_text(""), None);
3967        assert_eq!(parse_interval_text("garbage"), None);
3968        assert_eq!(parse_interval_text("1 fortnight"), None);
3969        assert_eq!(parse_interval_text("1"), None);
3970    }
3971
3972    #[test]
3973    fn interval_literal_roundtrips_via_display() {
3974        let parsed = parse("SELECT INTERVAL '1 day 2 hours'");
3975        let s = parsed.to_string();
3976        // Display preserves the original text verbatim.
3977        assert!(s.contains("INTERVAL '1 day 2 hours'"), "got: {s}");
3978        // And re-parsing yields a structurally equal statement.
3979        let again = parse_statement(&s).unwrap();
3980        assert_eq!(parsed, again);
3981    }
3982
3983    // ── v6.1.2: CREATE / DROP PUBLICATION ────────────────────
3984
3985    #[test]
3986    fn parser_recognises_create_publication_bare() {
3987        let s = parse("CREATE PUBLICATION pub_a");
3988        let Statement::CreatePublication(p) = s else {
3989            panic!("expected CreatePublication, got {s:?}")
3990        };
3991        assert_eq!(p.name, "pub_a");
3992        assert_eq!(p.scope, PublicationScope::AllTables);
3993    }
3994
3995    #[test]
3996    fn parser_recognises_create_publication_for_all_tables() {
3997        let s = parse("CREATE PUBLICATION pub_a FOR ALL TABLES");
3998        let Statement::CreatePublication(p) = s else {
3999            panic!("expected CreatePublication, got {s:?}")
4000        };
4001        assert_eq!(p.name, "pub_a");
4002        assert_eq!(p.scope, PublicationScope::AllTables);
4003    }
4004
4005    #[test]
4006    fn parser_recognises_drop_publication() {
4007        let s = parse("DROP PUBLICATION pub_a");
4008        let Statement::DropPublication(name) = s else {
4009            panic!("expected DropPublication, got {s:?}")
4010        };
4011        assert_eq!(name, "pub_a");
4012    }
4013
4014    #[test]
4015    fn parser_recognises_for_table_list() {
4016        let s = parse("CREATE PUBLICATION pub_a FOR TABLE t1, t2, t3");
4017        let Statement::CreatePublication(p) = s else {
4018            panic!("expected CreatePublication, got {s:?}")
4019        };
4020        assert_eq!(p.name, "pub_a");
4021        let PublicationScope::ForTables(ts) = p.scope else {
4022            panic!("expected ForTables scope")
4023        };
4024        assert_eq!(ts, alloc::vec!["t1", "t2", "t3"]);
4025    }
4026
4027    #[test]
4028    fn parser_recognises_for_tables_plural() {
4029        // PG 19 accepts both `FOR TABLE` and `FOR TABLES` — match.
4030        let s = parse("CREATE PUBLICATION pub_a FOR TABLES t1, t2");
4031        let Statement::CreatePublication(p) = s else {
4032            panic!("expected CreatePublication, got {s:?}")
4033        };
4034        let PublicationScope::ForTables(ts) = p.scope else {
4035            panic!("expected ForTables")
4036        };
4037        assert_eq!(ts, alloc::vec!["t1", "t2"]);
4038    }
4039
4040    #[test]
4041    fn parser_recognises_for_all_tables_except_list() {
4042        let s = parse("CREATE PUBLICATION p FOR ALL TABLES EXCEPT t1, t2");
4043        let Statement::CreatePublication(p) = s else {
4044            panic!()
4045        };
4046        let PublicationScope::AllTablesExcept(ts) = p.scope else {
4047            panic!("expected AllTablesExcept")
4048        };
4049        assert_eq!(ts, alloc::vec!["t1", "t2"]);
4050    }
4051
4052    #[test]
4053    fn parser_rejects_for_table_with_empty_list() {
4054        // `FOR TABLE` with nothing after is a parse error.
4055        let err = parse_statement("CREATE PUBLICATION p FOR TABLE")
4056            .expect_err("must error on empty list");
4057        // No specific message asserted — the call falls through to
4058        // expect_ident_like which yields "expected identifier, got …".
4059        assert!(!err.message.is_empty());
4060    }
4061
4062    #[test]
4063    fn parser_recognises_show_publications() {
4064        // v6.1.3 — SHOW PUBLICATIONS lands here. PUBLICATIONS is a
4065        // bare ident in this position, NOT a reserved keyword.
4066        let s = parse("SHOW PUBLICATIONS");
4067        assert!(matches!(s, Statement::ShowPublications));
4068    }
4069
4070    // ── v6.1.4: CREATE / DROP SUBSCRIPTION + SHOW SUBSCRIPTIONS ─
4071
4072    #[test]
4073    fn parser_recognises_create_subscription_single_publication() {
4074        let s = parse("CREATE SUBSCRIPTION sub_a CONNECTION 'host=127.0.0.1 port=20002' PUBLICATION pub_a");
4075        let Statement::CreateSubscription(c) = s else {
4076            panic!("expected CreateSubscription, got {s:?}")
4077        };
4078        assert_eq!(c.name, "sub_a");
4079        assert_eq!(c.conn_str, "host=127.0.0.1 port=20002");
4080        assert_eq!(c.publications, alloc::vec!["pub_a"]);
4081    }
4082
4083    #[test]
4084    fn parser_recognises_create_subscription_multi_publication() {
4085        let s = parse(
4086            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=h' PUBLICATION p1, p2, p3",
4087        );
4088        let Statement::CreateSubscription(c) = s else {
4089            panic!()
4090        };
4091        assert_eq!(c.publications, alloc::vec!["p1", "p2", "p3"]);
4092    }
4093
4094    #[test]
4095    fn parser_rejects_create_subscription_missing_connection() {
4096        let err = parse_statement("CREATE SUBSCRIPTION s PUBLICATION p")
4097            .expect_err("must error on missing CONNECTION");
4098        assert!(err.message.contains("CONNECTION"), "got: {}", err.message);
4099    }
4100
4101    #[test]
4102    fn parser_rejects_create_subscription_missing_publication() {
4103        let err = parse_statement("CREATE SUBSCRIPTION s CONNECTION 'host=x'")
4104            .expect_err("must error on missing PUBLICATION");
4105        assert!(err.message.contains("PUBLICATION"), "got: {}", err.message);
4106    }
4107
4108    #[test]
4109    fn parser_recognises_drop_subscription() {
4110        let s = parse("DROP SUBSCRIPTION sub_a");
4111        let Statement::DropSubscription(name) = s else {
4112            panic!("expected DropSubscription, got {s:?}")
4113        };
4114        assert_eq!(name, "sub_a");
4115    }
4116
4117    #[test]
4118    fn parser_recognises_show_subscriptions() {
4119        let s = parse("SHOW SUBSCRIPTIONS");
4120        assert!(matches!(s, Statement::ShowSubscriptions));
4121    }
4122
4123    #[test]
4124    fn parser_recognises_wait_for_wal_position_no_timeout() {
4125        let s = parse("WAIT FOR WAL POSITION 12345");
4126        let Statement::WaitForWalPosition { pos, timeout_ms } = s else {
4127            panic!("expected WaitForWalPosition, got {s:?}")
4128        };
4129        assert_eq!(pos, 12345);
4130        assert!(timeout_ms.is_none());
4131    }
4132
4133    #[test]
4134    fn parser_recognises_wait_for_wal_position_with_timeout() {
4135        let s = parse("WAIT FOR WAL POSITION 67890 WITH TIMEOUT 5000");
4136        let Statement::WaitForWalPosition { pos, timeout_ms } = s else {
4137            panic!()
4138        };
4139        assert_eq!(pos, 67890);
4140        assert_eq!(timeout_ms, Some(5000));
4141    }
4142
4143    #[test]
4144    fn parser_rejects_wait_with_negative_position() {
4145        // The lexer treats `-` as a token; `expect_u64_literal`
4146        // only sees the Integer that follows, so the negative
4147        // arrives as a unary-minus expression at higher levels.
4148        // Bare `WAIT FOR WAL POSITION -1` thus surfaces as a
4149        // parse error one way or another.
4150        let err = parse_statement("WAIT FOR WAL POSITION -1").unwrap_err();
4151        assert!(!err.message.is_empty());
4152    }
4153
4154    #[test]
4155    fn parser_recognises_bare_analyze() {
4156        let s = parse("ANALYZE");
4157        assert!(matches!(s, Statement::Analyze(None)));
4158    }
4159
4160    #[test]
4161    fn parser_recognises_analyze_with_table() {
4162        let s = parse("ANALYZE users");
4163        let Statement::Analyze(Some(name)) = s else {
4164            panic!("expected Analyze, got {s:?}")
4165        };
4166        assert_eq!(name, "users");
4167    }
4168
4169    #[test]
4170    fn parser_recognises_analyze_with_quoted_table() {
4171        let s = parse("ANALYZE \"Mixed Case\"");
4172        let Statement::Analyze(Some(name)) = s else {
4173            panic!()
4174        };
4175        assert_eq!(name, "Mixed Case");
4176    }
4177
4178    #[test]
4179    fn parser_rejects_analyze_with_garbage_token() {
4180        let err = parse_statement("ANALYZE 42").expect_err("must error");
4181        assert!(!err.message.is_empty());
4182    }
4183
4184    #[test]
4185    fn analyze_display_roundtrips() {
4186        for sql in ["ANALYZE", "ANALYZE users"] {
4187            let s = parse(sql);
4188            let printed = s.to_string();
4189            let again = parse_statement(&printed)
4190                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
4191            assert_eq!(s, again);
4192        }
4193    }
4194
4195    #[test]
4196    fn wait_for_display_roundtrips() {
4197        for sql in [
4198            "WAIT FOR WAL POSITION 12345",
4199            "WAIT FOR WAL POSITION 67890 WITH TIMEOUT 5000",
4200        ] {
4201            let s = parse(sql);
4202            let printed = s.to_string();
4203            let again = parse_statement(&printed)
4204                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
4205            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
4206        }
4207    }
4208
4209    #[test]
4210    fn subscription_ddl_display_roundtrips() {
4211        for sql in [
4212            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=h port=20002' PUBLICATION pub_a",
4213            "CREATE SUBSCRIPTION sub_b CONNECTION 'host=h' PUBLICATION p1, p2",
4214            "DROP SUBSCRIPTION sub_a",
4215            "SHOW SUBSCRIPTIONS",
4216        ] {
4217            let s = parse(sql);
4218            let printed = s.to_string();
4219            let again = parse_statement(&printed)
4220                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
4221            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
4222        }
4223    }
4224
4225    #[test]
4226    fn parser_drop_dispatches_user_vs_publication() {
4227        // Pre-v6.1.2 DROP USER took the bare-ident path; v6.1.2
4228        // tokenises DROP. Both targets must still parse.
4229        let s = parse("DROP USER 'alice'");
4230        let Statement::DropUser(name) = s else {
4231            panic!("expected DropUser, got {s:?}")
4232        };
4233        assert_eq!(name, "alice");
4234        // And DROP PUBLICATION lands the new variant.
4235        let s = parse("DROP PUBLICATION p1");
4236        assert!(matches!(s, Statement::DropPublication(_)));
4237    }
4238
4239    #[test]
4240    fn publication_ddl_display_roundtrips() {
4241        // Every CREATE PUBLICATION variant must Display → parse →
4242        // same AST. v6.1.3 covers all three scope shapes.
4243        for sql in [
4244            "CREATE PUBLICATION pub_a",
4245            "CREATE PUBLICATION pub_a FOR ALL TABLES",
4246            "CREATE PUBLICATION pub_a FOR TABLE t1, t2",
4247            "CREATE PUBLICATION pub_a FOR ALL TABLES EXCEPT t1",
4248            "DROP PUBLICATION pub_a",
4249            "SHOW PUBLICATIONS",
4250        ] {
4251            let s = parse(sql);
4252            let printed = s.to_string();
4253            let again = parse_statement(&printed)
4254                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
4255            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
4256        }
4257    }
4258}