Skip to main content

spg_sql/
parser.rs

1//! Recursive-descent parser with a Pratt (precedence-climbing) sub-parser for
2//! expressions.
3//!
4//! Precedence (lowest → highest binding):
5//! `OR` (1) `<` `AND` (2) `<` `NOT` unary (3) `<`
6//! comparisons `=` `<>` `<` `<=` `>` `>=` (4) `<`
7//! `+` `-` (5) `<` `*` `/` (6) `<` unary `-` (7) `<` parens / atom.
8//!
9//! This matches PG's behaviour for the operators we support — e.g. `NOT a = b`
10//! parses as `NOT (a = b)` and `-a * b` as `(-a) * b`.
11
12use alloc::boxed::Box;
13use alloc::format;
14use alloc::string::{String, ToString};
15use alloc::vec;
16use alloc::vec::Vec;
17use core::fmt;
18use core::mem;
19
20use crate::ast::{
21    BinOp, CastTarget, ColumnDef, ColumnName, ColumnTypeName, CreateIndexStatement,
22    CreatePublicationStatement, CreateSubscriptionStatement, CreateTableStatement, Expr,
23    ExtractField, FkAction, ForeignKeyConstraint, FrameBound, FrameKind, FromClause, FromJoin,
24    IndexMethod, InsertStatement, JoinKind, Literal, NullTreatment, OrderBy, PublicationScope,
25    SelectItem, SelectStatement, Statement, TableRef, UnOp, UnionKind, VecEncoding, WindowFrame,
26};
27use crate::lexer::{self, LexError, Token};
28
29/// v7.9.22 — recognise pgvector / SPG vector-index opclass names
30/// in CREATE INDEX. SPG's HNSW already routes by query operator;
31/// the opclass is accepted for `pg_dump` compatibility (mailrs
32/// migration follow-up G5).
33fn is_vector_opclass_name(name: &str) -> bool {
34    let lc = name.to_ascii_lowercase();
35    matches!(
36        lc.as_str(),
37        "vector_cosine_ops"
38            | "vector_l2_ops"
39            | "vector_ip_ops"
40            | "halfvec_cosine_ops"
41            | "halfvec_l2_ops"
42            | "halfvec_ip_ops"
43            | "sq8_cosine_ops"
44            | "sq8_l2_ops"
45            | "sq8_ip_ops"
46    )
47}
48
49#[derive(Debug, Clone, PartialEq, Eq)]
50pub struct ParseError {
51    pub message: String,
52    /// Index into the token stream where parsing tripped. Not a byte offset.
53    pub token_pos: usize,
54}
55
56impl fmt::Display for ParseError {
57    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
58        write!(
59            f,
60            "parse error at token #{}: {}",
61            self.token_pos, self.message
62        )
63    }
64}
65
66impl From<LexError> for ParseError {
67    fn from(e: LexError) -> Self {
68        Self {
69            message: format!("lex: {e}"),
70            token_pos: 0,
71        }
72    }
73}
74
75/// Parse exactly one statement, swallow an optional trailing `;`, and require
76/// the token stream to end there.
77pub fn parse_statement(input: &str) -> Result<Statement, ParseError> {
78    let tokens = lexer::tokenize(input)?;
79    let mut p = Parser::new(tokens);
80    let stmt = p.parse_one_statement()?;
81    if matches!(p.peek(), Token::Semicolon) {
82        p.advance();
83    }
84    p.expect_eof()?;
85    Ok(stmt)
86}
87
88struct Parser {
89    tokens: Vec<Token>,
90    pos: usize,
91}
92
93impl Parser {
94    fn new(tokens: Vec<Token>) -> Self {
95        Self { tokens, pos: 0 }
96    }
97
98    fn peek(&self) -> &Token {
99        // tokens always ends with Eof; pos is clamped in advance().
100        &self.tokens[self.pos]
101    }
102
103    fn advance(&mut self) -> Token {
104        let t = mem::replace(&mut self.tokens[self.pos], Token::Eof);
105        if self.pos + 1 < self.tokens.len() {
106            self.pos += 1;
107        }
108        t
109    }
110
111    fn err(&self, message: String) -> ParseError {
112        ParseError {
113            message,
114            token_pos: self.pos,
115        }
116    }
117
118    fn expect_eof(&self) -> Result<(), ParseError> {
119        if matches!(self.peek(), Token::Eof) {
120            Ok(())
121        } else {
122            Err(self.err(format!("expected end of input, got {:?}", self.peek())))
123        }
124    }
125
126    fn expect_ident_like(&mut self) -> Result<String, ParseError> {
127        match self.advance() {
128            Token::Ident(s) | Token::QuotedIdent(s) => Ok(s),
129            other => Err(ParseError {
130                message: format!("expected identifier, got {other:?}"),
131                token_pos: self.pos.saturating_sub(1),
132            }),
133        }
134    }
135
136    #[allow(clippy::too_many_lines)]
137    fn parse_one_statement(&mut self) -> Result<Statement, ParseError> {
138        match self.peek() {
139            Token::Select => self.parse_select_stmt(),
140            // v7.9.27 — `DO $$ … $$ [LANGUAGE plpgsql]`. PG-only;
141            // SPG has no PL/pgSQL so the body is consumed (lexer
142            // already turned it into a Token::String) and the whole
143            // DO statement returns CommandOk no-op. mailrs H1 +
144            // pg_dump compat.
145            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("do") => {
146                self.advance();
147                // Body — single string token (dollar-quoted or
148                // ordinary).
149                match self.advance() {
150                    Token::String(_) => {}
151                    other => {
152                        return Err(self.err(alloc::format!(
153                            "expected dollar-quoted body after DO, got {other:?}"
154                        )));
155                    }
156                }
157                // Optional `LANGUAGE <name>` trailer (idents only).
158                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("language")) {
159                    self.advance();
160                    let _ = self.expect_ident_like()?;
161                }
162                Ok(Statement::DoBlock)
163            }
164            // v4.11: `WITH name AS (SELECT ...) [, ...] SELECT ...`.
165            // WITH isn't a reserved token in our lexer — comes through
166            // as `Token::Ident("with")` (case-insensitive).
167            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("with") => {
168                self.advance();
169                self.parse_with_cte_then_select()
170            }
171            // v4.26: `EXPLAIN [ANALYZE] <select>`. Comes through as
172            // an identifier — not a reserved keyword.
173            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("explain") => {
174                self.advance();
175                let mut analyze = false;
176                let mut suggest = false;
177                // v6.8.3 — `EXPLAIN (SUGGEST)` opt-in.
178                if matches!(self.peek(), Token::LParen) {
179                    self.advance();
180                    let opt = match self.peek().clone() {
181                        Token::Ident(s) | Token::QuotedIdent(s) => s,
182                        other => {
183                            return Err(self.err(format!(
184                                "expected option keyword inside EXPLAIN (…), got {other:?}"
185                            )));
186                        }
187                    };
188                    if !opt.eq_ignore_ascii_case("suggest") {
189                        return Err(self.err(format!(
190                            "unknown EXPLAIN option {opt:?}; v6.8.3 supports SUGGEST"
191                        )));
192                    }
193                    self.advance();
194                    if !matches!(self.peek(), Token::RParen) {
195                        return Err(self.err(format!(
196                            "expected ')' after EXPLAIN option, got {:?}",
197                            self.peek()
198                        )));
199                    }
200                    self.advance();
201                    suggest = true;
202                } else if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
203                    && (s.eq_ignore_ascii_case("analyze") || s.eq_ignore_ascii_case("analyse"))
204                {
205                    self.advance();
206                    analyze = true;
207                }
208                let inner = self.parse_select_stmt()?;
209                let Statement::Select(s) = inner else {
210                    return Err(self.err(format!("EXPLAIN body must be a SELECT, got {inner:?}")));
211                };
212                Ok(Statement::Explain(crate::ast::ExplainStatement {
213                    analyze,
214                    inner: Box::new(s),
215                    suggest,
216                }))
217            }
218            Token::Create => self.parse_create_stmt(),
219            Token::Insert => self.parse_insert_stmt(),
220            Token::Begin => {
221                self.advance();
222                Ok(Statement::Begin)
223            }
224            Token::Commit => {
225                self.advance();
226                Ok(Statement::Commit)
227            }
228            Token::Rollback => {
229                self.advance();
230                // `ROLLBACK TO [SAVEPOINT] <name>` returns to that
231                // savepoint without ending the transaction. Bare
232                // `ROLLBACK` drops the whole TX.
233                if matches!(self.peek(), Token::To) {
234                    self.advance();
235                    if matches!(self.peek(), Token::Savepoint) {
236                        self.advance();
237                    }
238                    let name = self.expect_ident_like()?;
239                    Ok(Statement::RollbackToSavepoint(name))
240                } else {
241                    Ok(Statement::Rollback)
242                }
243            }
244            Token::Savepoint => {
245                self.advance();
246                let name = self.expect_ident_like()?;
247                Ok(Statement::Savepoint(name))
248            }
249            Token::Release => {
250                self.advance();
251                // `RELEASE [SAVEPOINT] <name>` — the `SAVEPOINT` keyword
252                // is optional in standard SQL.
253                if matches!(self.peek(), Token::Savepoint) {
254                    self.advance();
255                }
256                let name = self.expect_ident_like()?;
257                Ok(Statement::ReleaseSavepoint(name))
258            }
259            Token::Show => {
260                self.advance();
261                // `SHOW TABLES` / `SHOW USERS` / `SHOW COLUMNS FROM <table>`.
262                // v6.1.2 promoted TABLES to a reserved keyword (for
263                // `CREATE PUBLICATION … FOR ALL TABLES`), so it now
264                // arrives as `Token::Tables` rather than a bare ident.
265                // USERS / COLUMNS remain bare idents.
266                let target = match self.advance() {
267                    Token::Tables => "tables".to_string(),
268                    Token::Ident(s) | Token::QuotedIdent(s) => s.to_ascii_lowercase(),
269                    other => {
270                        return Err(self.err(format!(
271                            "expected SHOW target, got {other:?}"
272                        )));
273                    }
274                };
275                match target.as_str() {
276                    "tables" => Ok(Statement::ShowTables),
277                    "users" => Ok(Statement::ShowUsers),
278                    // v6.1.3 — PUBLICATIONS plural is NOT a reserved
279                    // keyword on its own; it lands here as a bare
280                    // ident. Returning all publications + their
281                    // scope summary.
282                    "publications" => Ok(Statement::ShowPublications),
283                    // v6.1.4 — same shape for SUBSCRIPTIONS plural.
284                    "subscriptions" => Ok(Statement::ShowSubscriptions),
285                    "columns" => {
286                        if !matches!(self.peek(), Token::From) {
287                            return Err(self.err(format!(
288                                "expected FROM after SHOW COLUMNS, got {:?}",
289                                self.peek()
290                            )));
291                        }
292                        self.advance();
293                        let table = self.expect_ident_like()?;
294                        Ok(Statement::ShowColumns(table))
295                    }
296                    other => Err(self.err(format!(
297                        "unknown SHOW target {other:?}; supported: TABLES, COLUMNS, USERS, PUBLICATIONS"
298                    ))),
299                }
300            }
301            // v6.1.2: `DROP` is now a reserved keyword (it dispatches
302            // to DROP USER and DROP PUBLICATION today; DROP TABLE /
303            // DROP INDEX are still SHOW-shaped admin ops). Pre-6.1.2
304            // arrived as a bare ident; tokenising it dedicatedly
305            // keeps the dispatch tree small.
306            Token::Drop => {
307                self.advance();
308                match self.peek() {
309                    Token::Publication => {
310                        self.advance();
311                        let name = self.expect_ident_or_string()?;
312                        Ok(Statement::DropPublication(name))
313                    }
314                    Token::Subscription => {
315                        self.advance();
316                        let name = self.expect_ident_or_string()?;
317                        Ok(Statement::DropSubscription(name))
318                    }
319                    Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("user") => {
320                        self.advance();
321                        let name = self.expect_ident_or_string()?;
322                        Ok(Statement::DropUser(name))
323                    }
324                    other => Err(self.err(format!(
325                        "expected USER / PUBLICATION / SUBSCRIPTION after DROP, got {other:?}"
326                    ))),
327                }
328            }
329            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("update") => {
330                self.advance();
331                self.parse_update_after_keyword()
332            }
333            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("delete") => {
334                self.advance();
335                self.parse_delete_after_keyword()
336            }
337            // v6.0.4: ALTER INDEX <name> REBUILD [WITH (encoding = ...)].
338            // ALTER is not a reserved keyword in the lexer — handled
339            // as a bare ident here.
340            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("alter") => {
341                self.advance();
342                self.parse_alter_after_keyword()
343            }
344            // v6.1.7: WAIT FOR WAL POSITION <pos> [WITH TIMEOUT <ms>].
345            // WAIT / POSITION / TIMEOUT are bare idents — no lexer
346            // additions needed.
347            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("wait") => {
348                self.advance();
349                self.parse_wait_after_keyword()
350            }
351            // v6.2.0: ANALYZE [<table>]. ANALYZE is a bare ident.
352            // Bare ANALYZE → analyse every user table; ANALYZE
353            // <name> → re-stats one. The argument is an optional
354            // ident (or quoted ident); anything else is a parse
355            // error.
356            // v6.7.3 — `COMPACT COLD SEGMENTS`. No arguments, no
357            // `WHERE` filter (carved out per V6_7_DESIGN.md
358            // STABILITY). Lex order: identifier "compact" → "cold"
359            // → "segments". Anything else after `COMPACT` is a
360            // parse error.
361            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("compact") => {
362                self.advance();
363                let next = self.peek().clone();
364                let cold = match next {
365                    Token::Ident(s) | Token::QuotedIdent(s) => s,
366                    _ => {
367                        return Err(
368                            self.err(format!("expected COLD after COMPACT, got {:?}", self.peek()))
369                        );
370                    }
371                };
372                if !cold.eq_ignore_ascii_case("cold") {
373                    return Err(self.err(format!("expected COLD after COMPACT, got {cold:?}")));
374                }
375                self.advance();
376                let next = self.peek().clone();
377                let segments = match next {
378                    Token::Ident(s) | Token::QuotedIdent(s) => s,
379                    _ => {
380                        return Err(self.err(format!(
381                            "expected SEGMENTS after COMPACT COLD, got {:?}",
382                            self.peek()
383                        )));
384                    }
385                };
386                if !segments.eq_ignore_ascii_case("segments") {
387                    return Err(self.err(format!(
388                        "expected SEGMENTS after COMPACT COLD, got {segments:?}"
389                    )));
390                }
391                self.advance();
392                Ok(Statement::CompactColdSegments)
393            }
394            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("analyze") => {
395                self.advance();
396                let target = match self.peek() {
397                    Token::Eof | Token::Semicolon => None,
398                    Token::Ident(_) | Token::QuotedIdent(_) => {
399                        Some(self.expect_ident_like()?)
400                    }
401                    other => {
402                        return Err(self.err(format!(
403                            "expected table name or end of statement after ANALYZE, got {other:?}"
404                        )));
405                    }
406                };
407                Ok(Statement::Analyze(target))
408            }
409            other => Err(self.err(format!(
410                "expected SELECT / CREATE / DROP / INSERT / UPDATE / DELETE / ALTER / BEGIN / COMMIT / \
411                 ROLLBACK / SAVEPOINT / RELEASE / SHOW at start of statement, got {other:?}"
412            ))),
413        }
414    }
415
416    fn parse_create_stmt(&mut self) -> Result<Statement, ParseError> {
417        debug_assert!(matches!(self.peek(), Token::Create));
418        self.advance();
419        match self.peek() {
420            Token::Table => self.parse_create_table_stmt_after_create(),
421            Token::Index => self.parse_create_index_stmt_after_create(),
422            Token::Publication => {
423                self.advance();
424                self.parse_create_publication_after_keyword()
425            }
426            Token::Subscription => {
427                self.advance();
428                self.parse_create_subscription_after_keyword()
429            }
430            // v4.1: CREATE USER 'name' WITH PASSWORD 'pw' [ROLE 'role'].
431            // USER isn't a reserved keyword — we look for the bare
432            // identifier so the lexer doesn't have to grow a token.
433            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("user") => {
434                self.advance();
435                self.parse_create_user_after_keyword()
436            }
437            // v7.9.15 — `CREATE EXTENSION [IF NOT EXISTS] <name>
438            // [WITH SCHEMA …] [VERSION '…'] [CASCADE]` as a
439            // no-op. mailrs follow-up F3.
440            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("extension") => {
441                self.advance();
442                self.parse_create_extension_after_keyword()
443            }
444            other => Err(self.err(format!(
445                "expected TABLE / INDEX / USER / EXTENSION / PUBLICATION / SUBSCRIPTION after CREATE, got {other:?}"
446            ))),
447        }
448    }
449
450    /// v7.9.15 — accept and discard `CREATE EXTENSION` DDL.
451    /// SPG doesn't have a registry; pgvector / similar are
452    /// either builtin (VECTOR(N) ↔ pgvector) or n/a. Parsing
453    /// the syntax lets dual-target schemas keep the line.
454    fn parse_create_extension_after_keyword(&mut self) -> Result<Statement, ParseError> {
455        // Optional `IF NOT EXISTS`.
456        self.consume_if_not_exists();
457        let name = self.expect_ident_like()?;
458        // Drain optional WITH SCHEMA <ident> / VERSION '<v>' /
459        // CASCADE / FROM '<v>' clauses; we don't model them.
460        loop {
461            match self.peek() {
462                Token::Ident(s) if s.eq_ignore_ascii_case("with") => {
463                    self.advance();
464                    continue;
465                }
466                Token::Ident(s) if s.eq_ignore_ascii_case("schema") => {
467                    self.advance();
468                    let _ = self.expect_ident_like()?;
469                    continue;
470                }
471                Token::Ident(s) if s.eq_ignore_ascii_case("version") => {
472                    self.advance();
473                    // String or ident literal.
474                    let _ = self.advance();
475                    continue;
476                }
477                Token::Ident(s) if s.eq_ignore_ascii_case("from") => {
478                    self.advance();
479                    let _ = self.advance();
480                    continue;
481                }
482                Token::Ident(s) if s.eq_ignore_ascii_case("cascade") => {
483                    self.advance();
484                    continue;
485                }
486                _ => break,
487            }
488        }
489        Ok(Statement::CreateExtension(name))
490    }
491
492    /// v6.1.2 → v6.1.3 — `CREATE PUBLICATION <name>` body. Accepts:
493    ///   - (no clause) → implicit `FOR ALL TABLES`
494    ///   - `FOR ALL TABLES`
495    ///   - `FOR ALL TABLES EXCEPT t1, t2, …` (v6.1.3)
496    ///   - `FOR TABLE t1, t2, …` (v6.1.3) — `FOR TABLES …` also
497    ///     accepted (PG accepts both forms in PG 19).
498    fn parse_create_publication_after_keyword(&mut self) -> Result<Statement, ParseError> {
499        let name = self.expect_ident_or_string()?;
500        // Bare DDL maps to FOR ALL TABLES — matches the v6.1.2
501        // shape so existing publications keep parsing identically.
502        let scope = if matches!(self.peek(), Token::For) {
503            self.advance();
504            if matches!(self.peek(), Token::All) {
505                self.advance();
506                if !matches!(self.peek(), Token::Tables) {
507                    return Err(self.err(format!(
508                        "expected TABLES after FOR ALL, got {:?}",
509                        self.peek()
510                    )));
511                }
512                self.advance();
513                if matches!(self.peek(), Token::Except) {
514                    self.advance();
515                    let tables = self.parse_publication_table_list()?;
516                    PublicationScope::AllTablesExcept(tables)
517                } else {
518                    PublicationScope::AllTables
519                }
520            } else if matches!(self.peek(), Token::Table | Token::Tables) {
521                // PG 19 accepts both `FOR TABLE …` (singular) and
522                // `FOR TABLES …` (plural); SPG matches.
523                self.advance();
524                let tables = self.parse_publication_table_list()?;
525                PublicationScope::ForTables(tables)
526            } else {
527                return Err(self.err(format!(
528                    "expected ALL TABLES or TABLE <list> after FOR, got {:?}",
529                    self.peek()
530                )));
531            }
532        } else {
533            PublicationScope::AllTables
534        };
535        Ok(Statement::CreatePublication(CreatePublicationStatement {
536            name,
537            scope,
538        }))
539    }
540
541    /// v6.1.3 — Comma-separated identifier list for the publication
542    /// FOR-clause. Requires at least one entry; empty list is a
543    /// parse error (PG behaviour). Quoted idents are accepted; the
544    /// names round-trip through `Display` as `quote_ident(name)`.
545    fn parse_publication_table_list(&mut self) -> Result<Vec<String>, ParseError> {
546        let first = self.expect_ident_like()?;
547        let mut out = alloc::vec![first];
548        while matches!(self.peek(), Token::Comma) {
549            self.advance();
550            out.push(self.expect_ident_like()?);
551        }
552        Ok(out)
553    }
554
555    /// v6.1.4 — `CREATE SUBSCRIPTION <name>
556    ///                 CONNECTION '<conn>'
557    ///                 PUBLICATION <pub> [, <pub> ...]`.
558    ///
559    /// The clause order is fixed (CONNECTION first, then
560    /// PUBLICATION) to match PG. No WITH-options accepted in
561    /// v6.1.4 — `enabled` defaults to true, no other knobs ship.
562    fn parse_create_subscription_after_keyword(&mut self) -> Result<Statement, ParseError> {
563        let name = self.expect_ident_or_string()?;
564        if !matches!(self.peek(), Token::Connection) {
565            return Err(self.err(format!(
566                "expected CONNECTION after CREATE SUBSCRIPTION <name>, got {:?}",
567                self.peek()
568            )));
569        }
570        self.advance();
571        let conn_str = self.expect_string_literal()?;
572        if !matches!(self.peek(), Token::Publication) {
573            return Err(self.err(format!(
574                "expected PUBLICATION after CONNECTION '<conn>', got {:?}",
575                self.peek()
576            )));
577        }
578        self.advance();
579        // Reuse the publication FOR-list parser shape: at least one
580        // identifier, comma-separated.
581        let first = self.expect_ident_like()?;
582        let mut publications = alloc::vec![first];
583        while matches!(self.peek(), Token::Comma) {
584            self.advance();
585            publications.push(self.expect_ident_like()?);
586        }
587        Ok(Statement::CreateSubscription(
588            CreateSubscriptionStatement {
589                name,
590                conn_str,
591                publications,
592            },
593        ))
594    }
595
596    /// v6.1.7 — `WAIT FOR WAL POSITION <pos> [WITH TIMEOUT <ms>]`.
597    /// All keywords after `WAIT` are bare idents in v6.1.x; no
598    /// lexer churn. Both `<pos>` and `<ms>` are positive integers
599    /// that fit `u64`.
600    fn parse_wait_after_keyword(&mut self) -> Result<Statement, ParseError> {
601        // FOR is a v6.1.2-reserved keyword (Token::For). The
602        // other two are bare idents — they've never needed lexer
603        // support and we keep it that way.
604        if !matches!(self.peek(), Token::For) {
605            return Err(self.err(format!(
606                "expected FOR after WAIT, got {:?}",
607                self.peek()
608            )));
609        }
610        self.advance();
611        self.expect_keyword_ident("wal")?;
612        self.expect_keyword_ident("position")?;
613        let pos = self.expect_u64_literal()?;
614        let timeout_ms = if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("with"))
615        {
616            self.advance();
617            self.expect_keyword_ident("timeout")?;
618            Some(self.expect_u64_literal()?)
619        } else {
620            None
621        };
622        Ok(Statement::WaitForWalPosition { pos, timeout_ms })
623    }
624
625    /// v6.1.7 helper — consume a `Token::Integer` and check it
626    /// fits `u64`. WAL positions and millisecond timeouts are
627    /// non-negative.
628    fn expect_u64_literal(&mut self) -> Result<u64, ParseError> {
629        match self.advance() {
630            Token::Integer(n) if n >= 0 => Ok(n as u64),
631            Token::Integer(n) => Err(ParseError {
632                message: format!("expected non-negative integer, got {n}"),
633                token_pos: self.pos.saturating_sub(1),
634            }),
635            other => Err(ParseError {
636                message: format!("expected integer literal, got {other:?}"),
637                token_pos: self.pos.saturating_sub(1),
638            }),
639        }
640    }
641
642    /// `CREATE USER` body — name + WITH PASSWORD '<pw>' + optional
643    /// ROLE '<role>' (defaults to readonly). All string slots accept
644    /// either a quoted ident or a quoted string literal.
645    fn parse_create_user_after_keyword(&mut self) -> Result<Statement, ParseError> {
646        let name = self.expect_ident_or_string()?;
647        self.expect_keyword_ident("with")?;
648        self.expect_keyword_ident("password")?;
649        let password = self.expect_string_literal()?;
650        let role = if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
651            && s.eq_ignore_ascii_case("role")
652        {
653            self.advance();
654            self.expect_string_literal()?
655        } else {
656            "readonly".to_string()
657        };
658        Ok(Statement::CreateUser(crate::ast::CreateUserStatement {
659            name,
660            password,
661            role,
662        }))
663    }
664
665    /// v4.4 `UPDATE <table> SET col = expr [, col = expr]* [WHERE cond]`.
666    /// Caller already consumed the leading `UPDATE` ident.
667    fn parse_update_after_keyword(&mut self) -> Result<Statement, ParseError> {
668        let table = self.expect_ident_like()?;
669        self.expect_keyword_ident("set")?;
670        let mut assignments = Vec::new();
671        loop {
672            let col = self.expect_ident_like()?;
673            if !matches!(self.peek(), Token::Eq) {
674                return Err(self.err(format!(
675                    "expected `=` after column name in UPDATE SET, got {:?}",
676                    self.peek()
677                )));
678            }
679            self.advance();
680            let value = self.parse_expr(0)?;
681            assignments.push((col, value));
682            if matches!(self.peek(), Token::Comma) {
683                self.advance();
684                continue;
685            }
686            break;
687        }
688        let where_ = if matches!(self.peek(), Token::Where) {
689            self.advance();
690            Some(self.parse_expr(0)?)
691        } else {
692            None
693        };
694        let returning = self.parse_optional_returning()?;
695        Ok(Statement::Update(crate::ast::UpdateStatement {
696            table,
697            assignments,
698            where_,
699            returning,
700        }))
701    }
702
703    /// v4.4 `DELETE FROM <table> [WHERE cond]`. Caller already consumed
704    /// the leading `DELETE` ident.
705    fn parse_delete_after_keyword(&mut self) -> Result<Statement, ParseError> {
706        if !matches!(self.peek(), Token::From) {
707            return Err(self.err(format!("expected FROM after DELETE, got {:?}", self.peek())));
708        }
709        self.advance();
710        let table = self.expect_ident_like()?;
711        let where_ = if matches!(self.peek(), Token::Where) {
712            self.advance();
713            Some(self.parse_expr(0)?)
714        } else {
715            None
716        };
717        let returning = self.parse_optional_returning()?;
718        Ok(Statement::Delete(crate::ast::DeleteStatement {
719            table,
720            where_,
721            returning,
722        }))
723    }
724
725    /// v7.9.4 — parse the optional trailing `RETURNING <projection>`
726    /// clause on INSERT / UPDATE / DELETE. Same projection grammar
727    /// as SELECT, so `RETURNING *`, `RETURNING col`,
728    /// `RETURNING expr AS alias`, and `RETURNING a, b, c` all work.
729    fn parse_optional_returning(&mut self) -> Result<Option<Vec<crate::ast::SelectItem>>, ParseError> {
730        let is_returning_kw = matches!(
731            self.peek(),
732            Token::Ident(s) if s.eq_ignore_ascii_case("returning")
733        );
734        if !is_returning_kw {
735            return Ok(None);
736        }
737        self.advance();
738        let mut items = Vec::new();
739        loop {
740            items.push(self.parse_select_item()?);
741            if matches!(self.peek(), Token::Comma) {
742                self.advance();
743                continue;
744            }
745            break;
746        }
747        Ok(Some(items))
748    }
749
750    /// v6.0.4 — parse the tail of an ALTER statement after the
751    /// leading `ALTER` keyword has been consumed. Only one form is
752    /// supported in v6.0.4:
753    ///
754    /// ```text
755    /// ALTER INDEX <name> REBUILD [WITH (encoding = <enc>)]
756    /// ```
757    fn parse_alter_after_keyword(&mut self) -> Result<Statement, ParseError> {
758        // ALTER INDEX <name> ... | ALTER TABLE <name> SET hot_tier_bytes = <n>
759        match self.advance() {
760            Token::Index => {}
761            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("index") => {}
762            // v6.7.2 — ALTER TABLE t SET hot_tier_bytes = X
763            Token::Table => return self.parse_alter_table_after_keyword(),
764            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("table") => {
765                return self.parse_alter_table_after_keyword();
766            }
767            other => {
768                return Err(self.err(format!("expected INDEX or TABLE after ALTER, got {other:?}")));
769            }
770        }
771        let name = self.expect_ident_like()?;
772        // REBUILD
773        self.expect_keyword_ident("rebuild")?;
774        // Optional: WITH (encoding = <enc>)
775        let encoding = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("with")) {
776            self.advance();
777            if !matches!(self.peek(), Token::LParen) {
778                return Err(self.err(format!(
779                    "expected '(' after WITH in ALTER INDEX REBUILD, got {:?}",
780                    self.peek()
781                )));
782            }
783            self.advance();
784            self.expect_keyword_ident("encoding")?;
785            if !matches!(self.peek(), Token::Eq) {
786                return Err(self.err(format!(
787                    "expected '=' after encoding in ALTER INDEX REBUILD, got {:?}",
788                    self.peek()
789                )));
790            }
791            self.advance();
792            let enc_ident = match self.advance() {
793                Token::Ident(s) | Token::QuotedIdent(s) => s,
794                other => {
795                    return Err(self.err(format!("expected encoding name after =, got {other:?}")));
796                }
797            };
798            let enc = match enc_ident.to_ascii_lowercase().as_str() {
799                "f32" => VecEncoding::F32,
800                "sq8" => VecEncoding::Sq8,
801                "half" => VecEncoding::F16,
802                other => {
803                    return Err(self.err(format!(
804                        "unknown vector encoding {other:?} in ALTER INDEX REBUILD; supported: F32, SQ8, HALF"
805                    )));
806                }
807            };
808            if !matches!(self.peek(), Token::RParen) {
809                return Err(self.err(format!(
810                    "expected ')' after encoding value, got {:?}",
811                    self.peek()
812                )));
813            }
814            self.advance();
815            Some(enc)
816        } else {
817            None
818        };
819        Ok(Statement::AlterIndex(crate::ast::AlterIndexStatement {
820            name,
821            target: crate::ast::AlterIndexTarget::Rebuild { encoding },
822        }))
823    }
824
825    /// v6.7.2 — `ALTER TABLE <name> SET hot_tier_bytes = <n>`. The
826    /// only `SET` form currently supported; future v6.7.x can add
827    /// more SET subjects without changing the dispatch shape.
828    fn parse_alter_table_after_keyword(&mut self) -> Result<Statement, ParseError> {
829        let table_name = self.expect_ident_like()?;
830        // v7.6.8 — dispatch on the next keyword: SET / ADD / DROP.
831        // SET kept identical to v6.7.x. ADD / DROP CONSTRAINT routes
832        // to FK installation / removal.
833        match self.peek() {
834            Token::Ident(s) if s.eq_ignore_ascii_case("set") => {
835                self.advance();
836                let setting = self.expect_ident_like()?;
837                if !setting.eq_ignore_ascii_case("hot_tier_bytes") {
838                    return Err(self.err(alloc::format!(
839                        "ALTER TABLE SET: unknown setting {setting:?}; supported: hot_tier_bytes"
840                    )));
841                }
842                if !matches!(self.peek(), Token::Eq) {
843                    return Err(self.err(alloc::format!(
844                        "expected '=' after hot_tier_bytes, got {:?}",
845                        self.peek()
846                    )));
847                }
848                self.advance();
849                let n = self.expect_u64_literal()?;
850                Ok(Statement::AlterTable(crate::ast::AlterTableStatement {
851                    name: table_name,
852                    target: crate::ast::AlterTableTarget::SetHotTierBytes(n),
853                }))
854            }
855            Token::Ident(s) if s.eq_ignore_ascii_case("add") => {
856                self.advance();
857                // Optional `CONSTRAINT <name>` prefix, then the same
858                // FK clause shape as table-level CREATE TABLE FK.
859                let fk = self.parse_table_level_fk()?;
860                Ok(Statement::AlterTable(crate::ast::AlterTableStatement {
861                    name: table_name,
862                    target: crate::ast::AlterTableTarget::AddForeignKey(fk),
863                }))
864            }
865            Token::Drop => {
866                self.advance();
867                match self.advance() {
868                    Token::Ident(s) if s.eq_ignore_ascii_case("constraint") => {}
869                    other => {
870                        return Err(self.err(alloc::format!(
871                            "expected CONSTRAINT after DROP in ALTER TABLE, got {other:?}"
872                        )));
873                    }
874                }
875                let cname = self.expect_ident_like()?;
876                Ok(Statement::AlterTable(crate::ast::AlterTableStatement {
877                    name: table_name,
878                    target: crate::ast::AlterTableTarget::DropForeignKey(cname),
879                }))
880            }
881            other => Err(self.err(alloc::format!(
882                "expected SET / ADD / DROP in ALTER TABLE, got {other:?}"
883            ))),
884        }
885    }
886
887    /// Consume a bare ident if its lowercase matches `kw`, else err.
888    fn expect_keyword_ident(&mut self, kw: &str) -> Result<(), ParseError> {
889        match self.advance() {
890            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case(kw) => Ok(()),
891            other => Err(ParseError {
892                message: format!("expected {kw:?}, got {other:?}"),
893                token_pos: self.pos.saturating_sub(1),
894            }),
895        }
896    }
897
898    /// Accept either a quoted identifier (`"foo"`) or a quoted string
899    /// literal (`'foo'`) — same shape used by CREATE USER for the
900    /// username slot.
901    fn expect_ident_or_string(&mut self) -> Result<String, ParseError> {
902        match self.advance() {
903            Token::Ident(s) | Token::QuotedIdent(s) | Token::String(s) => Ok(s),
904            other => Err(ParseError {
905                message: format!("expected identifier or string, got {other:?}"),
906                token_pos: self.pos.saturating_sub(1),
907            }),
908        }
909    }
910
911    fn expect_string_literal(&mut self) -> Result<String, ParseError> {
912        match self.advance() {
913            Token::String(s) => Ok(s),
914            other => Err(ParseError {
915                message: format!("expected quoted string, got {other:?}"),
916                token_pos: self.pos.saturating_sub(1),
917            }),
918        }
919    }
920
921    fn parse_select_stmt(&mut self) -> Result<Statement, ParseError> {
922        // Caller dispatches on Token::Select; the inner helper handles
923        // the rest. ORDER BY / LIMIT bind at this top level; UNION peers
924        // get a fresh bare-select parse and may not have their own ORDER
925        // BY / LIMIT.
926        let mut head = self.parse_bare_select()?;
927        while matches!(self.peek(), Token::Union) {
928            self.advance();
929            let kind = if matches!(self.peek(), Token::All) {
930                self.advance();
931                UnionKind::All
932            } else {
933                UnionKind::Distinct
934            };
935            let peer = self.parse_bare_select()?;
936            head.unions.push((kind, peer));
937        }
938        head.order_by = if matches!(self.peek(), Token::Order) {
939            self.advance();
940            if !matches!(self.peek(), Token::By) {
941                return Err(self.err(format!("expected BY after ORDER, got {:?}", self.peek())));
942            }
943            self.advance();
944            // v6.4.0 — multi-key ORDER BY. Loop over comma-separated
945            // `<expr> [ASC|DESC]` items.
946            let mut keys = Vec::new();
947            loop {
948                let expr = self.parse_expr(0)?;
949                let desc = if matches!(self.peek(), Token::Desc) {
950                    self.advance();
951                    true
952                } else if matches!(self.peek(), Token::Asc) {
953                    self.advance();
954                    false
955                } else {
956                    false
957                };
958                keys.push(OrderBy { expr, desc });
959                if matches!(self.peek(), Token::Comma) {
960                    self.advance();
961                } else {
962                    break;
963                }
964            }
965            keys
966        } else {
967            Vec::new()
968        };
969        head.limit = if matches!(self.peek(), Token::Limit) {
970            self.advance();
971            Some(self.parse_limit_expr("LIMIT")?)
972        } else {
973            None
974        };
975        head.offset = if matches!(self.peek(), Token::Offset) {
976            self.advance();
977            Some(self.parse_limit_expr("OFFSET")?)
978        } else {
979            None
980        };
981        Ok(Statement::Select(head))
982    }
983
984    /// v7.9.24 — accept `LIMIT <int>` or `LIMIT $N`. mailrs H2.
985    /// Bind value gets resolved during prepared-statement Execute;
986    /// the Pratt expression parser would over-accept here (e.g.
987    /// `LIMIT 5 + 5`), so we narrowly accept only the two PG forms.
988    fn parse_limit_expr(&mut self, label: &str) -> Result<crate::ast::LimitExpr, ParseError> {
989        match self.advance() {
990            Token::Integer(n) if n >= 0 => u32::try_from(n)
991                .map(crate::ast::LimitExpr::Literal)
992                .map_err(|_| ParseError {
993                    message: alloc::format!("{label} value too large: {n}"),
994                    token_pos: self.pos.saturating_sub(1),
995                }),
996            Token::Placeholder(n) => Ok(crate::ast::LimitExpr::Placeholder(n)),
997            other => Err(ParseError {
998                message: alloc::format!(
999                    "expected non-negative integer or $N placeholder after {label}, got {other:?}"
1000                ),
1001                token_pos: self.pos.saturating_sub(1),
1002            }),
1003        }
1004    }
1005
1006    fn expect_u32_literal(&mut self, label: &str) -> Result<u32, ParseError> {
1007        match self.advance() {
1008            Token::Integer(n) if n >= 0 => u32::try_from(n).map_err(|_| ParseError {
1009                message: format!("{label} value too large: {n}"),
1010                token_pos: self.pos.saturating_sub(1),
1011            }),
1012            other => Err(ParseError {
1013                message: format!("expected non-negative integer after {label}, got {other:?}"),
1014                token_pos: self.pos.saturating_sub(1),
1015            }),
1016        }
1017    }
1018
1019    /// Parse one SELECT block without ORDER BY / LIMIT / UNION chaining —
1020    /// just `[DISTINCT] items [FROM] [WHERE] [GROUP BY]`. Returned with
1021    /// `unions` empty and `order_by` / `limit` `None`; the top-level
1022    /// `parse_select_stmt` is responsible for filling those in.
1023    fn parse_bare_select(&mut self) -> Result<SelectStatement, ParseError> {
1024        if !matches!(self.peek(), Token::Select) {
1025            return Err(self.err(format!(
1026                "expected SELECT to start a query block, got {:?}",
1027                self.peek()
1028            )));
1029        }
1030        self.advance();
1031        let distinct = if matches!(self.peek(), Token::Distinct) {
1032            self.advance();
1033            true
1034        } else {
1035            false
1036        };
1037        let items = self.parse_select_list()?;
1038        let from = if matches!(self.peek(), Token::From) {
1039            self.advance();
1040            Some(self.parse_from_clause()?)
1041        } else {
1042            None
1043        };
1044        let where_ = if matches!(self.peek(), Token::Where) {
1045            self.advance();
1046            Some(self.parse_expr(0)?)
1047        } else {
1048            None
1049        };
1050        let mut group_by_all = false;
1051        let group_by = if matches!(self.peek(), Token::Group) {
1052            self.advance();
1053            if !matches!(self.peek(), Token::By) {
1054                return Err(self.err(format!("expected BY after GROUP, got {:?}", self.peek())));
1055            }
1056            self.advance();
1057            // v6.4.1 — `GROUP BY ALL` shortcut. Planner expands to
1058            // every non-aggregate SELECT-list item later.
1059            if matches!(self.peek(), Token::All) {
1060                self.advance();
1061                group_by_all = true;
1062                None
1063            } else {
1064                let mut groups = Vec::new();
1065                loop {
1066                    groups.push(self.parse_expr(0)?);
1067                    if matches!(self.peek(), Token::Comma) {
1068                        self.advance();
1069                    } else {
1070                        break;
1071                    }
1072                }
1073                Some(groups)
1074            }
1075        } else {
1076            None
1077        };
1078        let having = if matches!(self.peek(), Token::Having) {
1079            self.advance();
1080            Some(self.parse_expr(0)?)
1081        } else {
1082            None
1083        };
1084        Ok(SelectStatement {
1085            ctes: Vec::new(),
1086            distinct,
1087            items,
1088            from,
1089            where_,
1090            group_by,
1091            group_by_all,
1092            having,
1093            unions: Vec::new(),
1094            order_by: Vec::new(),
1095            limit: None,
1096            offset: None,
1097        })
1098    }
1099
1100    fn parse_create_table_stmt_after_create(&mut self) -> Result<Statement, ParseError> {
1101        // Caller already consumed CREATE; we're sitting on TABLE.
1102        debug_assert!(matches!(self.peek(), Token::Table));
1103        self.advance();
1104        let if_not_exists = self.consume_if_not_exists();
1105        let name = self.expect_ident_like()?;
1106        if !matches!(self.peek(), Token::LParen) {
1107            return Err(self.err(format!(
1108                "expected '(' after table name, got {:?}",
1109                self.peek()
1110            )));
1111        }
1112        self.advance();
1113        let mut columns = Vec::new();
1114        let mut foreign_keys: Vec<ForeignKeyConstraint> = Vec::new();
1115        let mut table_constraints: Vec<crate::ast::TableConstraint> = Vec::new();
1116        loop {
1117            // v7.6.0 / v7.9.18 — distinguish table-level constraint
1118            // clauses from column definitions. Constraints start
1119            // with `CONSTRAINT <name> …`, `FOREIGN KEY (…)`,
1120            // `PRIMARY KEY (…)`, or `UNIQUE (…)`. Anything else is
1121            // a column.
1122            if self.peek_table_level_pk_start() {
1123                table_constraints.push(self.parse_table_level_primary_key()?);
1124            } else if self.peek_table_level_unique_start() {
1125                table_constraints.push(self.parse_table_level_unique()?);
1126            } else if self.peek_constraint_or_fk_start() {
1127                foreign_keys.push(self.parse_table_level_fk()?);
1128            } else {
1129                let (col, col_level_fk) = self.parse_column_def_with_fk()?;
1130                columns.push(col);
1131                if let Some(fk) = col_level_fk {
1132                    foreign_keys.push(fk);
1133                }
1134            }
1135            match self.peek() {
1136                Token::Comma => {
1137                    self.advance();
1138                }
1139                Token::RParen => {
1140                    self.advance();
1141                    break;
1142                }
1143                other => {
1144                    return Err(
1145                        self.err(format!("expected ',' or ')' in column list, got {other:?}"))
1146                    );
1147                }
1148            }
1149        }
1150        if columns.is_empty() {
1151            return Err(self.err("CREATE TABLE requires at least one column".into()));
1152        }
1153        Ok(Statement::CreateTable(CreateTableStatement {
1154            name,
1155            columns,
1156            if_not_exists,
1157            foreign_keys,
1158            table_constraints,
1159        }))
1160    }
1161
1162    /// v7.9.18 — true when the next tokens are `PRIMARY KEY (…)`.
1163    /// PRIMARY and KEY are bare idents; we look-ahead 2 to be
1164    /// sure (otherwise a column literally named `primary` would
1165    /// be mistaken).
1166    fn peek_table_level_pk_start(&self) -> bool {
1167        let cur = self.peek();
1168        let nxt = self.tokens.get(self.pos + 1);
1169        let nxt2 = self.tokens.get(self.pos + 2);
1170        let is_primary = matches!(cur, Token::Ident(s) if s.eq_ignore_ascii_case("primary"));
1171        let is_key = matches!(nxt, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("key"));
1172        let is_lparen = matches!(nxt2, Some(Token::LParen));
1173        is_primary && is_key && is_lparen
1174    }
1175
1176    /// v7.9.18 — true when the next tokens are `UNIQUE (…)`.
1177    fn peek_table_level_unique_start(&self) -> bool {
1178        let cur = self.peek();
1179        let nxt = self.tokens.get(self.pos + 1);
1180        let is_unique = matches!(cur, Token::Ident(s) if s.eq_ignore_ascii_case("unique"));
1181        let is_lparen = matches!(nxt, Some(Token::LParen));
1182        is_unique && is_lparen
1183    }
1184
1185    fn parse_table_level_primary_key(
1186        &mut self,
1187    ) -> Result<crate::ast::TableConstraint, ParseError> {
1188        self.advance(); // PRIMARY
1189        self.advance(); // KEY
1190        let columns = self.parse_paren_ident_list("PRIMARY KEY")?;
1191        Ok(crate::ast::TableConstraint::PrimaryKey {
1192            name: None,
1193            columns,
1194        })
1195    }
1196
1197    fn parse_table_level_unique(
1198        &mut self,
1199    ) -> Result<crate::ast::TableConstraint, ParseError> {
1200        self.advance(); // UNIQUE
1201        let columns = self.parse_paren_ident_list("UNIQUE")?;
1202        Ok(crate::ast::TableConstraint::Unique {
1203            name: None,
1204            columns,
1205        })
1206    }
1207
1208    fn parse_paren_ident_list(
1209        &mut self,
1210        ctx: &str,
1211    ) -> Result<Vec<String>, ParseError> {
1212        if !matches!(self.peek(), Token::LParen) {
1213            return Err(self.err(alloc::format!(
1214                "expected '(' after {ctx}, got {:?}",
1215                self.peek()
1216            )));
1217        }
1218        self.advance();
1219        let mut out = Vec::new();
1220        loop {
1221            out.push(self.expect_ident_like()?);
1222            match self.peek() {
1223                Token::Comma => {
1224                    self.advance();
1225                }
1226                Token::RParen => {
1227                    self.advance();
1228                    break;
1229                }
1230                other => {
1231                    return Err(self.err(alloc::format!(
1232                        "expected ',' or ')' in {ctx} list, got {other:?}"
1233                    )));
1234                }
1235            }
1236        }
1237        if out.is_empty() {
1238            return Err(self.err(alloc::format!("{ctx} requires at least one column")));
1239        }
1240        Ok(out)
1241    }
1242
1243    /// v7.6.0 — true when the next tokens are `CONSTRAINT <name>
1244    /// FOREIGN KEY` or bare `FOREIGN KEY`. Both introduce a
1245    /// table-level FK; a column def never starts with either keyword
1246    /// (column names are not in this reserved set).
1247    fn peek_constraint_or_fk_start(&self) -> bool {
1248        let is_constraint_kw = matches!(
1249            self.peek(),
1250            Token::Ident(s) if s.eq_ignore_ascii_case("constraint")
1251        );
1252        let is_foreign_kw = matches!(
1253            self.peek(),
1254            Token::Ident(s) if s.eq_ignore_ascii_case("foreign")
1255        );
1256        is_constraint_kw || is_foreign_kw
1257    }
1258
1259    /// v7.6.0 — parse a table-level FK clause:
1260    /// `[CONSTRAINT <name>] FOREIGN KEY (<col>[,<col>]*) REFERENCES
1261    /// <tbl> [(<pcol>[,<pcol>]*)] [ON DELETE <action>] [ON UPDATE <action>]`.
1262    fn parse_table_level_fk(&mut self) -> Result<ForeignKeyConstraint, ParseError> {
1263        let mut name: Option<String> = None;
1264        if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("constraint")) {
1265            self.advance();
1266            name = Some(self.expect_ident_like()?);
1267        }
1268        // `FOREIGN`
1269        match self.advance() {
1270            Token::Ident(s) if s.eq_ignore_ascii_case("foreign") => {}
1271            other => return Err(self.err(format!("expected FOREIGN, got {other:?}"))),
1272        }
1273        // `KEY`
1274        match self.advance() {
1275            Token::Ident(s) if s.eq_ignore_ascii_case("key") => {}
1276            other => return Err(self.err(format!("expected KEY after FOREIGN, got {other:?}"))),
1277        }
1278        // `(col, col, ...)`
1279        if !matches!(self.peek(), Token::LParen) {
1280            return Err(self.err(format!("expected '(' after FOREIGN KEY, got {:?}", self.peek())));
1281        }
1282        self.advance();
1283        let mut columns = Vec::new();
1284        loop {
1285            columns.push(self.expect_ident_like()?);
1286            match self.peek() {
1287                Token::Comma => {
1288                    self.advance();
1289                }
1290                Token::RParen => {
1291                    self.advance();
1292                    break;
1293                }
1294                other => return Err(self.err(format!("expected ',' or ')' in FK column list, got {other:?}"))),
1295            }
1296        }
1297        if columns.is_empty() {
1298            return Err(self.err("FOREIGN KEY requires at least one column".into()));
1299        }
1300        let (parent_table, parent_columns, on_delete, on_update) =
1301            self.parse_references_tail(columns.len())?;
1302        Ok(ForeignKeyConstraint {
1303            name,
1304            columns,
1305            parent_table,
1306            parent_columns,
1307            on_delete,
1308            on_update,
1309        })
1310    }
1311
1312    /// v7.6.0 — parse the tail `REFERENCES <tbl> [(<pcol>...)] [ON
1313    /// DELETE <action>] [ON UPDATE <action>]`. `expected_arity` is
1314    /// the local column count, used to default the parent column
1315    /// list when omitted (SQL spec: parent's PK is implied).
1316    fn parse_references_tail(
1317        &mut self,
1318        expected_arity: usize,
1319    ) -> Result<(String, Vec<String>, FkAction, FkAction), ParseError> {
1320        match self.advance() {
1321            Token::Ident(s) if s.eq_ignore_ascii_case("references") => {}
1322            other => return Err(self.err(format!("expected REFERENCES, got {other:?}"))),
1323        }
1324        let parent_table = self.expect_ident_like()?;
1325        let mut parent_columns: Vec<String> = Vec::new();
1326        if matches!(self.peek(), Token::LParen) {
1327            self.advance();
1328            loop {
1329                parent_columns.push(self.expect_ident_like()?);
1330                match self.peek() {
1331                    Token::Comma => {
1332                        self.advance();
1333                    }
1334                    Token::RParen => {
1335                        self.advance();
1336                        break;
1337                    }
1338                    other => return Err(self.err(format!("expected ',' or ')' in REFERENCES column list, got {other:?}"))),
1339                }
1340            }
1341        }
1342        if !parent_columns.is_empty() && parent_columns.len() != expected_arity {
1343            return Err(self.err(format!(
1344                "FK arity mismatch: {} local column(s) vs {} parent column(s)",
1345                expected_arity,
1346                parent_columns.len()
1347            )));
1348        }
1349        // v7.6.7 — accept and reject `[NOT] DEFERRABLE [INITIALLY
1350        // {DEFERRED | IMMEDIATE}]` so existing PG dumps don't fail
1351        // at parse time. SPG's single-writer model has no deferred
1352        // constraint window, so we surface this as a clean
1353        // unsupported-feature error rather than a syntax error.
1354        loop {
1355            if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("deferrable")) {
1356                return Err(self.err(
1357                    "DEFERRABLE constraints are not supported (SPG is single-writer; \
1358                     constraints are always evaluated immediately at commit)"
1359                        .into(),
1360                ));
1361            }
1362            if matches!(self.peek(), Token::Not) {
1363                let look = self.tokens.get(self.pos + 1);
1364                if matches!(look, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("deferrable")) {
1365                    // NOT DEFERRABLE — accept as the SPG default
1366                    // and consume both tokens silently.
1367                    self.advance();
1368                    self.advance();
1369                    // Optional `INITIALLY IMMEDIATE` clause.
1370                    if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("initially"))
1371                    {
1372                        self.advance();
1373                        match self.advance() {
1374                            Token::Ident(s) if s.eq_ignore_ascii_case("immediate") => {}
1375                            other => {
1376                                return Err(self.err(format!(
1377                                    "expected IMMEDIATE after INITIALLY for NOT DEFERRABLE, \
1378                                     got {other:?}"
1379                                )));
1380                            }
1381                        }
1382                    }
1383                    continue;
1384                }
1385                break;
1386            }
1387            break;
1388        }
1389        // Optional `ON DELETE <action>` and `ON UPDATE <action>` in
1390        // either order, each at most once.
1391        let mut on_delete = FkAction::Restrict;
1392        let mut on_update = FkAction::Restrict;
1393        let mut seen_on_delete = false;
1394        let mut seen_on_update = false;
1395        loop {
1396            if !matches!(self.peek(), Token::On) {
1397                break;
1398            }
1399            self.advance();
1400            let which = self.advance();
1401            let action = self.parse_fk_action()?;
1402            match which {
1403                Token::Ident(ref s) if s.eq_ignore_ascii_case("delete") => {
1404                    if seen_on_delete {
1405                        return Err(self.err("ON DELETE specified twice".into()));
1406                    }
1407                    seen_on_delete = true;
1408                    on_delete = action;
1409                }
1410                Token::Ident(ref s) if s.eq_ignore_ascii_case("update") => {
1411                    if seen_on_update {
1412                        return Err(self.err("ON UPDATE specified twice".into()));
1413                    }
1414                    seen_on_update = true;
1415                    on_update = action;
1416                }
1417                other => {
1418                    return Err(self.err(format!(
1419                        "expected DELETE or UPDATE after ON, got {other:?}"
1420                    )));
1421                }
1422            }
1423        }
1424        Ok((parent_table, parent_columns, on_delete, on_update))
1425    }
1426
1427    /// v7.6.0 — parse `CASCADE | RESTRICT | SET NULL | SET DEFAULT |
1428    /// NO ACTION`.
1429    fn parse_fk_action(&mut self) -> Result<FkAction, ParseError> {
1430        match self.advance() {
1431            Token::Ident(s) if s.eq_ignore_ascii_case("cascade") => Ok(FkAction::Cascade),
1432            Token::Ident(s) if s.eq_ignore_ascii_case("restrict") => Ok(FkAction::Restrict),
1433            Token::Ident(s) if s.eq_ignore_ascii_case("set") => {
1434                match self.advance() {
1435                    Token::Null => Ok(FkAction::SetNull),
1436                    Token::Default => Ok(FkAction::SetDefault),
1437                    other => Err(self.err(format!(
1438                        "expected NULL or DEFAULT after SET in FK action, got {other:?}"
1439                    ))),
1440                }
1441            }
1442            Token::Ident(s) if s.eq_ignore_ascii_case("no") => {
1443                match self.advance() {
1444                    Token::Ident(s) if s.eq_ignore_ascii_case("action") => Ok(FkAction::NoAction),
1445                    other => Err(self.err(format!(
1446                        "expected ACTION after NO in FK action, got {other:?}"
1447                    ))),
1448                }
1449            }
1450            other => Err(self.err(format!(
1451                "expected CASCADE | RESTRICT | SET NULL | SET DEFAULT | NO ACTION, got {other:?}"
1452            ))),
1453        }
1454    }
1455
1456    /// Recognise the optional `IF NOT EXISTS` prefix shared by `CREATE
1457    /// TABLE` and `CREATE INDEX`. Returns `true` if consumed.
1458    fn consume_if_not_exists(&mut self) -> bool {
1459        // `IF` arrives as a bare Ident (we don't reserve it because it
1460        // also appears mid-expression in PG, though we don't support
1461        // those forms yet).
1462        let looks_like_if = matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("if"));
1463        if !looks_like_if {
1464            return false;
1465        }
1466        // Peek one ahead before committing: only consume IF when it's
1467        // actually `IF NOT EXISTS`.
1468        if !matches!(self.tokens.get(self.pos + 1), Some(Token::Not)) {
1469            return false;
1470        }
1471        if !matches!(
1472            self.tokens.get(self.pos + 2),
1473            Some(Token::Ident(s)) if s.eq_ignore_ascii_case("exists")
1474        ) {
1475            return false;
1476        }
1477        self.advance(); // IF
1478        self.advance(); // NOT
1479        self.advance(); // EXISTS
1480        true
1481    }
1482
1483    /// v7.9.14 — consume `ASC | DESC | NULLS FIRST | NULLS LAST`
1484    /// qualifiers after an index column ref. ASC / DESC are
1485    /// reserved tokens; NULLS / FIRST / LAST are bare idents.
1486    /// We accept and discard them since single-column BTree
1487    /// stores rows in natural key order today.
1488    fn consume_optional_index_column_qualifiers(&mut self) {
1489        loop {
1490            match self.peek() {
1491                Token::Asc | Token::Desc => {
1492                    self.advance();
1493                }
1494                Token::Ident(s) if s.eq_ignore_ascii_case("nulls") => {
1495                    let look = self.tokens.get(self.pos + 1);
1496                    if matches!(
1497                        look,
1498                        Some(Token::Ident(k)) if k.eq_ignore_ascii_case("first")
1499                            || k.eq_ignore_ascii_case("last")
1500                    ) {
1501                        self.advance();
1502                        self.advance();
1503                    } else {
1504                        break;
1505                    }
1506                }
1507                _ => break,
1508            }
1509        }
1510    }
1511
1512    fn parse_create_index_stmt_after_create(&mut self) -> Result<Statement, ParseError> {
1513        // Caller consumed CREATE; we're on INDEX.
1514        debug_assert!(matches!(self.peek(), Token::Index));
1515        self.advance();
1516        let if_not_exists = self.consume_if_not_exists();
1517        let name = self.expect_ident_like()?;
1518        if !matches!(self.peek(), Token::On) {
1519            return Err(self.err(format!(
1520                "expected ON after CREATE INDEX <name>, got {:?}",
1521                self.peek()
1522            )));
1523        }
1524        self.advance();
1525        let table = self.expect_ident_like()?;
1526        // Optional `USING <method>` — only recognised method in v2.0 is
1527        // `hnsw` (a single-layer NSW graph for kNN). `USING` is the bare
1528        // ident `using` (we don't promote it to a reserved keyword
1529        // because it isn't reserved anywhere else in our SQL surface).
1530        let method = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("using")) {
1531            self.advance();
1532            let m = self.expect_ident_like()?;
1533            match m.to_ascii_lowercase().as_str() {
1534                "hnsw" => IndexMethod::Hnsw,
1535                "btree" => IndexMethod::BTree,
1536                "brin" => IndexMethod::Brin,
1537                // v7.9.26b — PG `pg_dump` emits `USING gin` /
1538                // `USING gist` / `USING spgist` / `USING hash` for
1539                // their built-in index AMs. SPG doesn't have a
1540                // matching implementation; degrade to BTree on the
1541                // leading column so the schema loads + the index
1542                // catalogue stays consistent. Operator pays the
1543                // planner cost only for the queries that would have
1544                // used the specialised AM.
1545                "gin" | "gist" | "spgist" | "hash" => IndexMethod::BTree,
1546                other => {
1547                    return Err(self.err(alloc::format!(
1548                        "unknown index method {other:?}; supported: hnsw, btree, brin (gin/gist/spgist/hash accepted as BTree fallback)"
1549                    )));
1550                }
1551            }
1552        } else {
1553            IndexMethod::BTree
1554        };
1555        if !matches!(self.peek(), Token::LParen) {
1556            return Err(self.err(format!(
1557                "expected '(' before indexed column, got {:?}",
1558                self.peek()
1559            )));
1560        }
1561        self.advance();
1562        // v6.8.2 — accept either a bare column ident (legacy) or
1563        // an expression `fn(col, …)` for expression indexes.
1564        // Distinguish by peeking the token *after* the current
1565        // ident: `ident )` is the legacy column-only path;
1566        // anything else triggers the Pratt expression parser.
1567        // (`advance()` uses `mem::replace` to nil out the current
1568        // slot, so we can't save+rewind cleanly — peek-ahead via
1569        // direct index avoids the mutation.)
1570        let (column, expression): (String, Option<Expr>) = match self.peek().clone() {
1571            // Single column with `)` immediately after — fast path.
1572            Token::Ident(s) | Token::QuotedIdent(s)
1573                if matches!(self.tokens.get(self.pos + 1), Some(Token::RParen)) =>
1574            {
1575                self.advance();
1576                (s, None)
1577            }
1578            // v7.9.22 — single column followed by a pgvector
1579            // opclass ident: `(col vector_cosine_ops)`. mailrs G5.
1580            // SPG's HNSW currently picks its distance metric from
1581            // the query's operator (`<->` / `<#>` / `<=>`), so the
1582            // opclass is informational — accepted and discarded.
1583            // Recognised opclasses: vector_cosine_ops, vector_l2_ops,
1584            // vector_ip_ops, halfvec_*_ops, sq8_*_ops.
1585            Token::Ident(s) | Token::QuotedIdent(s)
1586                if matches!(
1587                    self.tokens.get(self.pos + 1),
1588                    Some(Token::Ident(op) | Token::QuotedIdent(op))
1589                        if is_vector_opclass_name(op)
1590                ) =>
1591            {
1592                self.advance(); // column name
1593                self.advance(); // opclass ident — drop
1594                (s, None)
1595            }
1596            Token::Ident(_) | Token::QuotedIdent(_) => {
1597                let key_expr = self.parse_expr(0)?;
1598                let primary = extract_first_column(&key_expr).ok_or_else(|| {
1599                    self.err(
1600                        "expression index key must reference at least one column".into(),
1601                    )
1602                })?;
1603                (primary, Some(key_expr))
1604            }
1605            other => {
1606                return Err(self.err(format!(
1607                    "expected column ident or expression, got {other:?}"
1608                )));
1609            }
1610        };
1611        // v7.9.14 — accept extra comma-separated columns inside
1612        // the index key parens (`CREATE INDEX … (a, b, c)`).
1613        // mailrs F2. Each extra column may carry an optional
1614        // `ASC` / `DESC` / `NULLS FIRST` / `NULLS LAST` clause
1615        // — parsed and discarded; SPG doesn't honour direction
1616        // on a BTree index today (column ordering is intrinsic
1617        // to the storage). v7.10 will widen to genuine composite
1618        // index keys.
1619        let mut extra_columns: Vec<String> = Vec::new();
1620        // The leading column may also have ASC/DESC after it.
1621        self.consume_optional_index_column_qualifiers();
1622        while matches!(self.peek(), Token::Comma) {
1623            self.advance();
1624            let extra = self.expect_ident_like()?;
1625            self.consume_optional_index_column_qualifiers();
1626            extra_columns.push(extra);
1627        }
1628        if !matches!(self.peek(), Token::RParen) {
1629            return Err(self.err(format!(
1630                "expected ')' after indexed column / expression, got {:?}",
1631                self.peek()
1632            )));
1633        }
1634        self.advance();
1635        // v6.8.0 — optional `INCLUDE (col1, col2, …)` clause for
1636        // index-only-scan annotation. Bare ident (not a reserved
1637        // keyword) so we test by case-insensitive string match.
1638        let included_columns =
1639            if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("include")) {
1640                self.advance();
1641                if !matches!(self.peek(), Token::LParen) {
1642                    return Err(self.err(format!(
1643                        "expected '(' after INCLUDE, got {:?}",
1644                        self.peek()
1645                    )));
1646                }
1647                self.advance();
1648                let mut cols = Vec::new();
1649                loop {
1650                    cols.push(self.expect_ident_like()?);
1651                    match self.peek() {
1652                        Token::Comma => {
1653                            self.advance();
1654                        }
1655                        Token::RParen => {
1656                            self.advance();
1657                            break;
1658                        }
1659                        other => {
1660                            return Err(self.err(format!(
1661                                "expected ',' or ')' in INCLUDE list, got {other:?}"
1662                            )));
1663                        }
1664                    }
1665                }
1666                cols
1667            } else {
1668                Vec::new()
1669            };
1670        // v6.8.1 — optional `WHERE <expr>` partial-index predicate.
1671        let partial_predicate = if matches!(self.peek(), Token::Where) {
1672            self.advance();
1673            Some(self.parse_expr(0)?)
1674        } else {
1675            None
1676        };
1677        Ok(Statement::CreateIndex(CreateIndexStatement {
1678            name,
1679            table,
1680            column,
1681            method,
1682            if_not_exists,
1683            included_columns,
1684            partial_predicate,
1685            extra_columns: extra_columns.clone(),
1686            expression,
1687        }))
1688    }
1689
1690    /// v7.6.0 — wraps `parse_column_def` and consumes an optional
1691    /// column-level `REFERENCES ...` clause. The trailing FK is
1692    /// normalised into table-level shape (single-element columns +
1693    /// parent_columns) so the engine sees one uniform constraint list.
1694    fn parse_column_def_with_fk(
1695        &mut self,
1696    ) -> Result<(ColumnDef, Option<ForeignKeyConstraint>), ParseError> {
1697        let col = self.parse_column_def()?;
1698        // Inline form: `col INT REFERENCES tbl(pcol) [ON DELETE ...] [ON UPDATE ...]`.
1699        let inline_references = matches!(
1700            self.peek(),
1701            Token::Ident(s) if s.eq_ignore_ascii_case("references")
1702        );
1703        if !inline_references {
1704            return Ok((col, None));
1705        }
1706        let (parent_table, parent_columns, on_delete, on_update) =
1707            self.parse_references_tail(1)?;
1708        let fk = ForeignKeyConstraint {
1709            name: None,
1710            columns: vec![col.name.clone()],
1711            parent_table,
1712            parent_columns,
1713            on_delete,
1714            on_update,
1715        };
1716        Ok((col, Some(fk)))
1717    }
1718
1719    fn parse_column_def(&mut self) -> Result<ColumnDef, ParseError> {
1720        let name = self.expect_ident_like()?;
1721        // Type keyword arrives as a bare Ident (we did not promote type names
1722        // to keyword tokens — see lexer rationale).
1723        let ty_ident = match self.advance() {
1724            Token::Ident(s) => s,
1725            other => {
1726                return Err(ParseError {
1727                    message: format!("expected column type, got {other:?}"),
1728                    token_pos: self.pos.saturating_sub(1),
1729                });
1730            }
1731        };
1732        // v7.9.6 — PG `SERIAL` / `BIGSERIAL` shorthand for
1733        // `INT/BIGINT NOT NULL AUTO_INCREMENT`. PG also defines
1734        // SMALLSERIAL → SMALLINT; we accept that too. The implicit
1735        // NOT NULL + AUTO_INCREMENT flags get baked in after the
1736        // type tag so the rest of the constraint-loop parser sees
1737        // them as if user-supplied (rejecting duplicates).
1738        let mut implied_auto_increment = false;
1739        let mut implied_not_null = false;
1740        let ty = match ty_ident.as_str() {
1741            // PG SERIAL family. Implies NOT NULL + AUTO_INCREMENT.
1742            "smallserial" | "serial2" => {
1743                implied_auto_increment = true;
1744                implied_not_null = true;
1745                ColumnTypeName::SmallInt
1746            }
1747            "serial" | "serial4" => {
1748                implied_auto_increment = true;
1749                implied_not_null = true;
1750                ColumnTypeName::Int
1751            }
1752            "bigserial" | "serial8" => {
1753                implied_auto_increment = true;
1754                implied_not_null = true;
1755                ColumnTypeName::BigInt
1756            }
1757            // MySQL flavours we accept by aliasing to the closest SPG
1758            // type. TINYINT covers MySQL's i8 — held inside SMALLINT
1759            // since SPG doesn't have a dedicated i8. MEDIUMINT (MySQL
1760            // 24-bit) → INT. UNSIGNED modifiers are consumed below
1761            // without semantic effect.
1762            "smallint" | "tinyint" => ColumnTypeName::SmallInt,
1763            // INTEGER is MySQL's spelling for INT; MEDIUMINT widens up.
1764            "int" | "integer" | "mediumint" => ColumnTypeName::Int,
1765            "bigint" => ColumnTypeName::BigInt,
1766            // DOUBLE / REAL are 64-bit IEEE — same as our FLOAT.
1767            "float" | "double" | "real" => ColumnTypeName::Float,
1768            "text" => ColumnTypeName::Text,
1769            "bool" | "boolean" => ColumnTypeName::Bool,
1770            "varchar" => ColumnTypeName::Varchar(self.parse_paren_size("VARCHAR")?),
1771            "char" => ColumnTypeName::Char(self.parse_paren_size("CHAR")?),
1772            "vector" => {
1773                let dim = self.parse_paren_size("VECTOR")?;
1774                let encoding = self.parse_optional_vector_encoding()?;
1775                ColumnTypeName::Vector { dim, encoding }
1776            }
1777            "numeric" => {
1778                let (precision, scale) = self.parse_optional_numeric_params()?;
1779                ColumnTypeName::Numeric(precision, scale)
1780            }
1781            "date" => ColumnTypeName::Date,
1782            // MySQL's `DATETIME` is the same domain as standard
1783            // `TIMESTAMP` — accept both spellings.
1784            "timestamp" | "datetime" => ColumnTypeName::Timestamp,
1785            // v7.9.2 — `TIMESTAMPTZ` and full PG spelling
1786            // `TIMESTAMP WITH TIME ZONE`. Same storage as TIMESTAMP;
1787            // only PG-wire OID differs.
1788            "timestamptz" => ColumnTypeName::Timestamptz,
1789            // v4.9: JSON / JSONB. Stored as raw text — no parse-time
1790            // validation. We accept the JSONB spelling too because
1791            // most PG clients default to it; SPG doesn't distinguish
1792            // the two (no path-operator perf advantage to model).
1793            "json" => ColumnTypeName::Json,
1794            "jsonb" => ColumnTypeName::Jsonb,
1795            other => {
1796                return Err(ParseError {
1797                    message: format!("unsupported column type {other:?}"),
1798                    token_pos: self.pos.saturating_sub(1),
1799                });
1800            }
1801        };
1802        // MySQL's `UNSIGNED` modifier sits right after the type
1803        // keyword. SPG doesn't carry a separate unsigned variant —
1804        // accepting the keyword keeps existing schemas compatible
1805        // without changing semantics. Drop it silently.
1806        if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("unsigned")) {
1807            self.advance();
1808        }
1809        // Column constraints: `DEFAULT <expr>`, `NOT NULL`, and the
1810        // MySQL-flavoured `AUTO_INCREMENT` may appear in any order;
1811        // each at most once.
1812        let mut default: Option<Expr> = None;
1813        let mut nullable = !implied_not_null;
1814        let mut nullability_seen = implied_not_null;
1815        let mut auto_increment = implied_auto_increment;
1816        let mut is_primary_key = false;
1817        loop {
1818            if matches!(self.peek(), Token::Default) {
1819                if default.is_some() {
1820                    return Err(self.err("DEFAULT specified twice".into()));
1821                }
1822                self.advance();
1823                default = Some(self.parse_expr(0)?);
1824                continue;
1825            }
1826            if matches!(self.peek(), Token::Not) {
1827                if nullability_seen {
1828                    return Err(self.err("NOT NULL specified twice".into()));
1829                }
1830                self.advance();
1831                if !matches!(self.peek(), Token::Null) {
1832                    return Err(self.err(format!(
1833                        "expected NULL after NOT in column def, got {:?}",
1834                        self.peek()
1835                    )));
1836                }
1837                self.advance();
1838                nullable = false;
1839                nullability_seen = true;
1840                continue;
1841            }
1842            // `AUTO_INCREMENT` or its abbreviated form `AUTOINCREMENT`
1843            // arrives as a bare Ident. Match either, case-insensitive.
1844            if let Token::Ident(s) = self.peek()
1845                && (s.eq_ignore_ascii_case("auto_increment")
1846                    || s.eq_ignore_ascii_case("autoincrement"))
1847            {
1848                if auto_increment {
1849                    return Err(self.err("AUTO_INCREMENT specified twice".into()));
1850                }
1851                self.advance();
1852                auto_increment = true;
1853                continue;
1854            }
1855            // v7.9.13 — inline `PRIMARY KEY` column constraint
1856            // (mailrs F1). Implies `NOT NULL`. The engine creates
1857            // a BTree index for the PK column at CREATE TABLE time
1858            // so FK parent-side index lookups resolve.
1859            if let Token::Ident(s) = self.peek()
1860                && s.eq_ignore_ascii_case("primary")
1861            {
1862                if is_primary_key {
1863                    return Err(self.err("PRIMARY KEY specified twice".into()));
1864                }
1865                // Peek-ahead for the required `KEY` token.
1866                let next = self.tokens.get(self.pos + 1);
1867                let next_is_key = matches!(
1868                    next,
1869                    Some(Token::Ident(k)) if k.eq_ignore_ascii_case("key")
1870                );
1871                if !next_is_key {
1872                    return Err(self.err(format!(
1873                        "expected KEY after PRIMARY in column def, got {:?}",
1874                        next
1875                    )));
1876                }
1877                self.advance(); // PRIMARY
1878                self.advance(); // KEY
1879                is_primary_key = true;
1880                if nullability_seen && nullable {
1881                    return Err(self.err(
1882                        "column declared NULL but inline PRIMARY KEY implies NOT NULL".into(),
1883                    ));
1884                }
1885                nullable = false;
1886                nullability_seen = true;
1887                continue;
1888            }
1889            break;
1890        }
1891        Ok(ColumnDef {
1892            name,
1893            ty,
1894            nullable,
1895            default,
1896            auto_increment,
1897            is_primary_key,
1898        })
1899    }
1900
1901    /// `NUMERIC` may appear without parameters, with one (precision
1902    /// only, scale=0), or with both. Returns `(precision, scale)` with
1903    /// 0 = unspecified for the bare form.
1904    fn parse_optional_numeric_params(&mut self) -> Result<(u8, u8), ParseError> {
1905        if !matches!(self.peek(), Token::LParen) {
1906            // Bare `NUMERIC` — PG treats this as "unlimited precision";
1907            // we surface it as precision=0 to mean "unconstrained" so
1908            // the engine doesn't need a separate variant.
1909            return Ok((0, 0));
1910        }
1911        self.advance();
1912        let precision = match self.advance() {
1913            Token::Integer(n) if (1..=38).contains(&n) => u8::try_from(n).expect("range-checked"),
1914            other => {
1915                return Err(ParseError {
1916                    message: format!(
1917                        "NUMERIC precision must be an integer in 1..=38, got {other:?}"
1918                    ),
1919                    token_pos: self.pos.saturating_sub(1),
1920                });
1921            }
1922        };
1923        let scale = if matches!(self.peek(), Token::Comma) {
1924            self.advance();
1925            match self.advance() {
1926                Token::Integer(n) if (0..=i64::from(precision)).contains(&n) => {
1927                    u8::try_from(n).expect("range-checked")
1928                }
1929                other => {
1930                    return Err(ParseError {
1931                        message: format!(
1932                            "NUMERIC scale must be a non-negative integer ≤ precision, got {other:?}"
1933                        ),
1934                        token_pos: self.pos.saturating_sub(1),
1935                    });
1936                }
1937            }
1938        } else {
1939            0
1940        };
1941        if !matches!(self.peek(), Token::RParen) {
1942            return Err(self.err(format!(
1943                "expected ')' to close NUMERIC params, got {:?}",
1944                self.peek()
1945            )));
1946        }
1947        self.advance();
1948        Ok((precision, scale))
1949    }
1950
1951    /// Parse `(N)` where `N` is a positive integer literal — used by the
1952    /// `VARCHAR`/`CHAR`/`VECTOR` column types. `label` is the type name
1953    /// for the error message.
1954    /// v6.0.1: parse the optional `USING <encoding>` clause that
1955    /// follows `VECTOR(N)` in a column definition. Missing clause
1956    /// → `VecEncoding::F32` (pre-v6 default). Unknown encoding
1957    /// ident → `ParseError` listing the encodings recognised today.
1958    fn parse_optional_vector_encoding(&mut self) -> Result<VecEncoding, ParseError> {
1959        if !matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("using")) {
1960            return Ok(VecEncoding::F32);
1961        }
1962        self.advance();
1963        let enc_ident = match self.advance() {
1964            Token::Ident(s) => s,
1965            other => {
1966                return Err(self.err(format!(
1967                    "expected vector encoding after USING, got {other:?}"
1968                )));
1969            }
1970        };
1971        match enc_ident.to_ascii_lowercase().as_str() {
1972            "sq8" => Ok(VecEncoding::Sq8),
1973            // v6.0.3: `HALF` (pgvector convention) selects IEEE-754
1974            // binary16 per-element storage.
1975            "half" => Ok(VecEncoding::F16),
1976            other => Err(self.err(format!(
1977                "unknown vector encoding {other:?}; supported: SQ8, HALF"
1978            ))),
1979        }
1980    }
1981
1982    fn parse_paren_size(&mut self, label: &str) -> Result<u32, ParseError> {
1983        if !matches!(self.peek(), Token::LParen) {
1984            return Err(self.err(format!("{label} type requires (N), got {:?}", self.peek())));
1985        }
1986        self.advance();
1987        let n = match self.advance() {
1988            Token::Integer(n) if n > 0 => u32::try_from(n).map_err(|_| ParseError {
1989                message: format!("{label} size too large: {n}"),
1990                token_pos: self.pos.saturating_sub(1),
1991            })?,
1992            other => {
1993                return Err(ParseError {
1994                    message: format!("expected positive integer {label} size, got {other:?}"),
1995                    token_pos: self.pos.saturating_sub(1),
1996                });
1997            }
1998        };
1999        if !matches!(self.peek(), Token::RParen) {
2000            return Err(self.err(format!(
2001                "expected ')' after {label} size, got {:?}",
2002                self.peek()
2003            )));
2004        }
2005        self.advance();
2006        Ok(n)
2007    }
2008
2009    fn parse_insert_stmt(&mut self) -> Result<Statement, ParseError> {
2010        debug_assert!(matches!(self.peek(), Token::Insert));
2011        self.advance();
2012        if !matches!(self.peek(), Token::Into) {
2013            return Err(self.err(format!("expected INTO after INSERT, got {:?}", self.peek())));
2014        }
2015        self.advance();
2016        let table = self.expect_ident_like()?;
2017        // Optional column list — `INSERT INTO t (a, b) VALUES ...`.
2018        let columns = if matches!(self.peek(), Token::LParen) {
2019            self.advance();
2020            let mut names = Vec::new();
2021            loop {
2022                names.push(self.expect_ident_like()?);
2023                match self.peek() {
2024                    Token::Comma => {
2025                        self.advance();
2026                    }
2027                    Token::RParen => {
2028                        self.advance();
2029                        break;
2030                    }
2031                    other => {
2032                        return Err(self.err(format!(
2033                            "expected ',' or ')' in INSERT column list, got {other:?}"
2034                        )));
2035                    }
2036                }
2037            }
2038            Some(names)
2039        } else {
2040            None
2041        };
2042        if !matches!(self.peek(), Token::Values) {
2043            return Err(self.err(format!(
2044                "expected VALUES after table name, got {:?}",
2045                self.peek()
2046            )));
2047        }
2048        self.advance();
2049        if !matches!(self.peek(), Token::LParen) {
2050            return Err(self.err(format!("expected '(' after VALUES, got {:?}", self.peek())));
2051        }
2052        let mut rows = Vec::new();
2053        loop {
2054            // Each iteration consumes one `(expr, expr, …)` tuple.
2055            if !matches!(self.peek(), Token::LParen) {
2056                return Err(self.err(format!(
2057                    "expected '(' for next VALUES tuple, got {:?}",
2058                    self.peek()
2059                )));
2060            }
2061            self.advance();
2062            let mut tuple = Vec::new();
2063            loop {
2064                tuple.push(self.parse_expr(0)?);
2065                match self.peek() {
2066                    Token::Comma => {
2067                        self.advance();
2068                    }
2069                    Token::RParen => {
2070                        self.advance();
2071                        break;
2072                    }
2073                    other => {
2074                        return Err(self.err(format!(
2075                            "expected ',' or ')' in VALUES tuple, got {other:?}"
2076                        )));
2077                    }
2078                }
2079            }
2080            if tuple.is_empty() {
2081                return Err(self.err("INSERT VALUES tuple requires at least one value".into()));
2082            }
2083            rows.push(tuple);
2084            // Continue with comma-separated tuples.
2085            if matches!(self.peek(), Token::Comma) {
2086                self.advance();
2087            } else {
2088                break;
2089            }
2090        }
2091        let on_conflict = self.parse_optional_on_conflict()?;
2092        let returning = self.parse_optional_returning()?;
2093        Ok(Statement::Insert(InsertStatement {
2094            table,
2095            columns,
2096            rows,
2097            on_conflict,
2098            returning,
2099        }))
2100    }
2101
2102    /// v7.9.7 — parse the optional `ON CONFLICT (cols) DO …`
2103    /// clause sitting between the INSERT body and the trailing
2104    /// RETURNING. All keywords come in as bare idents; `ON` is
2105    /// a reserved Token though.
2106    fn parse_optional_on_conflict(
2107        &mut self,
2108    ) -> Result<Option<crate::ast::OnConflictClause>, ParseError> {
2109        if !matches!(self.peek(), Token::On) {
2110            return Ok(None);
2111        }
2112        // Peek further: we want exactly "ON CONFLICT ...". If the
2113        // next ident isn't "conflict", let some other parser handle.
2114        let next_is_conflict = matches!(
2115            self.tokens.get(self.pos + 1),
2116            Some(Token::Ident(s) | Token::QuotedIdent(s)) if s.eq_ignore_ascii_case("conflict")
2117        );
2118        if !next_is_conflict {
2119            return Ok(None);
2120        }
2121        self.advance(); // ON
2122        self.advance(); // CONFLICT
2123        // Optional `(col [, col]*)` target list.
2124        let mut target_columns: Vec<String> = Vec::new();
2125        if matches!(self.peek(), Token::LParen) {
2126            self.advance();
2127            loop {
2128                target_columns.push(self.expect_ident_like()?);
2129                match self.peek() {
2130                    Token::Comma => {
2131                        self.advance();
2132                    }
2133                    Token::RParen => {
2134                        self.advance();
2135                        break;
2136                    }
2137                    other => {
2138                        return Err(self.err(alloc::format!(
2139                            "expected ',' or ')' in ON CONFLICT target list, got {other:?}"
2140                        )));
2141                    }
2142                }
2143            }
2144        }
2145        // Required `DO`.
2146        match self.advance() {
2147            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("do") => {}
2148            other => {
2149                return Err(self.err(alloc::format!(
2150                    "expected DO after ON CONFLICT [(…)], got {other:?}"
2151                )));
2152            }
2153        }
2154        // Action: NOTHING | UPDATE SET …
2155        let action = match self.advance() {
2156            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("nothing") => {
2157                crate::ast::OnConflictAction::Nothing
2158            }
2159            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("update") => {
2160                self.parse_on_conflict_update_action()?
2161            }
2162            other => {
2163                return Err(self.err(alloc::format!(
2164                    "expected NOTHING or UPDATE after ON CONFLICT DO, got {other:?}"
2165                )));
2166            }
2167        };
2168        Ok(Some(crate::ast::OnConflictClause {
2169            target_columns,
2170            action,
2171        }))
2172    }
2173
2174    /// v7.9.7 — tail of `ON CONFLICT … DO UPDATE`: parse
2175    /// `SET col = expr [, …] [WHERE cond]`. Caller already
2176    /// consumed `UPDATE`.
2177    fn parse_on_conflict_update_action(
2178        &mut self,
2179    ) -> Result<crate::ast::OnConflictAction, ParseError> {
2180        // `SET`
2181        match self.advance() {
2182            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("set") => {}
2183            other => {
2184                return Err(self.err(alloc::format!(
2185                    "expected SET after ON CONFLICT DO UPDATE, got {other:?}"
2186                )));
2187            }
2188        }
2189        let mut assignments: Vec<(String, Expr)> = Vec::new();
2190        loop {
2191            let col = self.expect_ident_like()?;
2192            if !matches!(self.peek(), Token::Eq) {
2193                return Err(self.err(alloc::format!(
2194                    "expected `=` after column in ON CONFLICT DO UPDATE SET, got {:?}",
2195                    self.peek()
2196                )));
2197            }
2198            self.advance();
2199            let value = self.parse_expr(0)?;
2200            assignments.push((col, value));
2201            if matches!(self.peek(), Token::Comma) {
2202                self.advance();
2203                continue;
2204            }
2205            break;
2206        }
2207        let where_ = if matches!(self.peek(), Token::Where) {
2208            self.advance();
2209            Some(self.parse_expr(0)?)
2210        } else {
2211            None
2212        };
2213        Ok(crate::ast::OnConflictAction::Update {
2214            assignments,
2215            where_,
2216        })
2217    }
2218
2219    fn parse_select_list(&mut self) -> Result<Vec<SelectItem>, ParseError> {
2220        let mut items = Vec::new();
2221        loop {
2222            items.push(self.parse_select_item()?);
2223            if matches!(self.peek(), Token::Comma) {
2224                self.advance();
2225            } else {
2226                break;
2227            }
2228        }
2229        Ok(items)
2230    }
2231
2232    fn parse_select_item(&mut self) -> Result<SelectItem, ParseError> {
2233        if matches!(self.peek(), Token::Star) {
2234            self.advance();
2235            return Ok(SelectItem::Wildcard);
2236        }
2237        let expr = self.parse_expr(0)?;
2238        let alias = self.parse_optional_alias();
2239        Ok(SelectItem::Expr { expr, alias })
2240    }
2241
2242    fn parse_table_ref(&mut self) -> Result<TableRef, ParseError> {
2243        let name = self.expect_ident_like()?;
2244        // v6.10.2 — optional `AS OF SEGMENT '<id>'` cold-tier
2245        // time-travel clause. Parse BEFORE the alias so the
2246        // alias can still ride at the tail (`tbl AS OF SEGMENT
2247        // '5' alias`). `AS` is a reserved keyword token, while
2248        // `OF` and `SEGMENT` are bare idents.
2249        let as_of_segment = if matches!(self.peek(), Token::As)
2250            && matches!(self.tokens.get(self.pos + 1), Some(Token::Ident(s) | Token::QuotedIdent(s)) if s.eq_ignore_ascii_case("of"))
2251        {
2252            self.advance(); // AS
2253            self.advance(); // OF
2254            let kw = match self.peek().clone() {
2255                Token::Ident(s) | Token::QuotedIdent(s) => s,
2256                other => {
2257                    return Err(self.err(format!(
2258                        "expected SEGMENT after AS OF, got {other:?}"
2259                    )));
2260                }
2261            };
2262            if !kw.eq_ignore_ascii_case("segment") {
2263                return Err(self.err(format!(
2264                    "expected SEGMENT after AS OF, got {kw:?}; v6.10.2 supports SEGMENT only"
2265                )));
2266            }
2267            self.advance();
2268            // Segment id literal — accept either a string or
2269            // integer for operator ergonomics.
2270            let id = match self.advance() {
2271                Token::String(s) => s
2272                    .parse::<u32>()
2273                    .map_err(|e| self.err(format!("AS OF SEGMENT id parse: {e}")))?,
2274                Token::Integer(n) => u32::try_from(n).map_err(|e| {
2275                    self.err(format!("AS OF SEGMENT id parse: {e}"))
2276                })?,
2277                other => {
2278                    return Err(self.err(format!(
2279                        "expected segment id literal after AS OF SEGMENT, got {other:?}"
2280                    )));
2281                }
2282            };
2283            Some(id)
2284        } else {
2285            None
2286        };
2287        let alias = self.parse_optional_alias();
2288        Ok(TableRef {
2289            name,
2290            alias,
2291            as_of_segment,
2292        })
2293    }
2294
2295    /// FROM-clause: a primary table reference plus zero-or-more joined
2296    /// peers expressed via either `, <table>` (cross-product, no ON) or
2297    /// `[INNER|LEFT [OUTER]|CROSS] JOIN <table> [ON expr]`. v1.10 keeps
2298    /// the join list flat (left-associative nested-loop semantics).
2299    fn parse_from_clause(&mut self) -> Result<FromClause, ParseError> {
2300        let primary = self.parse_table_ref()?;
2301        let mut joins = Vec::new();
2302        loop {
2303            // `, <table>` — cross-product with no ON.
2304            if matches!(self.peek(), Token::Comma) {
2305                self.advance();
2306                let table = self.parse_table_ref()?;
2307                joins.push(FromJoin {
2308                    kind: JoinKind::Cross,
2309                    table,
2310                    on: None,
2311                });
2312                continue;
2313            }
2314            // Explicit JOIN syntax. Accept INNER JOIN, LEFT [OUTER] JOIN,
2315            // CROSS JOIN, and bare JOIN (defaults to INNER).
2316            let kind =
2317                match self.peek() {
2318                    Token::Inner => {
2319                        self.advance();
2320                        if !matches!(self.peek(), Token::Join) {
2321                            return Err(self
2322                                .err(format!("expected JOIN after INNER, got {:?}", self.peek())));
2323                        }
2324                        self.advance();
2325                        JoinKind::Inner
2326                    }
2327                    Token::Left => {
2328                        self.advance();
2329                        if matches!(self.peek(), Token::Outer) {
2330                            self.advance();
2331                        }
2332                        if !matches!(self.peek(), Token::Join) {
2333                            return Err(self.err(format!(
2334                                "expected JOIN after LEFT [OUTER], got {:?}",
2335                                self.peek()
2336                            )));
2337                        }
2338                        self.advance();
2339                        JoinKind::Left
2340                    }
2341                    Token::Cross => {
2342                        self.advance();
2343                        if !matches!(self.peek(), Token::Join) {
2344                            return Err(self
2345                                .err(format!("expected JOIN after CROSS, got {:?}", self.peek())));
2346                        }
2347                        self.advance();
2348                        JoinKind::Cross
2349                    }
2350                    Token::Join => {
2351                        self.advance();
2352                        JoinKind::Inner
2353                    }
2354                    _ => break,
2355                };
2356            let table = self.parse_table_ref()?;
2357            let on = if matches!(self.peek(), Token::On) {
2358                self.advance();
2359                Some(self.parse_expr(0)?)
2360            } else if kind == JoinKind::Cross {
2361                None
2362            } else {
2363                return Err(self.err(format!(
2364                    "expected ON after {:?} JOIN, got {:?}",
2365                    kind,
2366                    self.peek()
2367                )));
2368            };
2369            joins.push(FromJoin { kind, table, on });
2370        }
2371        Ok(FromClause { primary, joins })
2372    }
2373
2374    /// Optional alias after an expression or table:
2375    /// `AS <ident>` is unambiguous; a bare `<ident>` directly after is also
2376    /// accepted (PG-style implicit alias). Returns `None` if the next token
2377    /// is not alias-shaped (e.g. comma, FROM, WHERE, semicolon, EOF, operator).
2378    fn parse_optional_alias(&mut self) -> Option<String> {
2379        if matches!(self.peek(), Token::As) {
2380            self.advance();
2381            // After AS, the next token MUST be an identifier-like — if not,
2382            // we still return None and let the caller surface the error on the
2383            // next expectation. v0.2 keeps the alias path forgiving; the
2384            // corpus tests don't exercise the malformed case.
2385            if let Token::Ident(_) | Token::QuotedIdent(_) = self.peek() {
2386                return self.expect_ident_like().ok();
2387            }
2388            return None;
2389        }
2390        if let Token::Ident(_) | Token::QuotedIdent(_) = self.peek() {
2391            return self.expect_ident_like().ok();
2392        }
2393        None
2394    }
2395
2396    /// Pratt loop. `min_prec` is the minimum binary-op precedence we'll accept.
2397    fn parse_expr(&mut self, min_prec: u8) -> Result<Expr, ParseError> {
2398        let mut lhs = self.parse_unary()?;
2399        while let Some((op, prec)) = binop_from(self.peek()) {
2400            if prec < min_prec {
2401                break;
2402            }
2403            self.advance();
2404            let rhs = self.parse_expr(prec + 1)?;
2405            lhs = Expr::Binary {
2406                lhs: Box::new(lhs),
2407                op,
2408                rhs: Box::new(rhs),
2409            };
2410        }
2411        Ok(lhs)
2412    }
2413
2414    fn parse_unary(&mut self) -> Result<Expr, ParseError> {
2415        match self.peek() {
2416            Token::Not => {
2417                self.advance();
2418                // NOT sits between AND (2) and comparisons (4) — bind everything
2419                // ≥3, which leaves AND/OR outside.
2420                let e = self.parse_expr(3)?;
2421                Ok(Expr::Unary {
2422                    op: UnOp::Not,
2423                    expr: Box::new(e),
2424                })
2425            }
2426            Token::Minus => {
2427                self.advance();
2428                // Unary minus binds tighter than `*`/`/` (now at prec 7 after
2429                // `<->` slotted into 5 and arithmetic shifted up).
2430                let e = self.parse_expr(8)?;
2431                Ok(Expr::Unary {
2432                    op: UnOp::Neg,
2433                    expr: Box::new(e),
2434                })
2435            }
2436            _ => self.parse_atom(),
2437        }
2438    }
2439
2440    fn parse_atom(&mut self) -> Result<Expr, ParseError> {
2441        let tok_pos = self.pos;
2442        match self.advance() {
2443            Token::Integer(n) => Ok(Expr::Literal(Literal::Integer(n))),
2444            Token::Float(x) => Ok(Expr::Literal(Literal::Float(x))),
2445            Token::String(s) => Ok(Expr::Literal(Literal::String(s))),
2446            Token::True => Ok(Expr::Literal(Literal::Bool(true))),
2447            Token::False => Ok(Expr::Literal(Literal::Bool(false))),
2448            Token::Null => Ok(Expr::Literal(Literal::Null)),
2449            // v6.1.1 — `$N` placeholder. The actual Value lookup
2450            // happens in the engine eval path against the prepared-
2451            // statement bind buffer.
2452            Token::Placeholder(n) => Ok(Expr::Placeholder(n)),
2453            Token::LParen => {
2454                // v4.10: `(SELECT ...)` in expression position is a
2455                // scalar subquery; otherwise it's a parenthesised
2456                // expression. Peek for SELECT keyword to dispatch.
2457                if matches!(self.peek(), Token::Select) {
2458                    let inner = self.parse_select_stmt()?;
2459                    match self.advance() {
2460                        Token::RParen => {
2461                            let Statement::Select(s) = inner else {
2462                                unreachable!("parse_select_stmt returns Select")
2463                            };
2464                            Ok(Expr::ScalarSubquery(Box::new(s)))
2465                        }
2466                        other => Err(ParseError {
2467                            message: format!("expected ')' after scalar subquery, got {other:?}"),
2468                            token_pos: self.pos.saturating_sub(1),
2469                        }),
2470                    }
2471                } else {
2472                    let e = self.parse_expr(0)?;
2473                    match self.advance() {
2474                        Token::RParen => Ok(e),
2475                        other => Err(ParseError {
2476                            message: format!("expected ')', got {other:?}"),
2477                            token_pos: self.pos.saturating_sub(1),
2478                        }),
2479                    }
2480                }
2481            }
2482            Token::LBracket => self.parse_vector_literal_body(),
2483            Token::Extract => self.parse_extract_atom(),
2484            Token::Interval => self.parse_interval_atom(),
2485            // v4.10: EXISTS / NOT EXISTS. EXISTS isn't a reserved
2486            // token; we match on the bare ident. NOT is a token
2487            // (consumed in the comparison rung), but `EXISTS (...)`
2488            // at the top of an expression starts here.
2489            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("exists") => {
2490                self.parse_exists_atom(false)
2491            }
2492            Token::Ident(s) | Token::QuotedIdent(s) => self.finish_ident_atom(s),
2493            other => Err(ParseError {
2494                message: format!("unexpected token {other:?} in expression"),
2495                token_pos: tok_pos,
2496            }),
2497        }
2498        // After parsing the atom, fold any postfix `::vector` casts.
2499        .and_then(|atom| self.finish_postfix_casts(atom))
2500    }
2501
2502    /// Postfix operators on an atom: `::TYPE` cast and `IS [NOT] NULL`.
2503    /// Both bind tighter than any binary op.
2504    fn finish_postfix_casts(&mut self, mut expr: Expr) -> Result<Expr, ParseError> {
2505        loop {
2506            if matches!(self.peek(), Token::DoubleColon) {
2507                self.advance();
2508                // v7.9.25 / v7.9.26 — broaden the postfix `::` cast
2509                // target set to include INTERVAL (reserved Token),
2510                // TIMESTAMPTZ, and PG catalog regtype / regclass.
2511                // mailrs follow-up H3a + H3b.
2512                let target = match self.advance() {
2513                    Token::Ident(s) => match s.to_ascii_lowercase().as_str() {
2514                        "int" | "integer" | "int4" => CastTarget::Int,
2515                        "bigint" | "int8" => CastTarget::BigInt,
2516                        "float" | "double" | "real" => CastTarget::Float,
2517                        "text" => CastTarget::Text,
2518                        "bool" | "boolean" => CastTarget::Bool,
2519                        "vector" => CastTarget::Vector,
2520                        "date" => CastTarget::Date,
2521                        "timestamp" | "datetime" => CastTarget::Timestamp,
2522                        "timestamptz" => CastTarget::Timestamptz,
2523                        "interval" => CastTarget::Interval,
2524                        "json" => CastTarget::Json,
2525                        "jsonb" => CastTarget::Jsonb,
2526                        "regtype" => CastTarget::RegType,
2527                        "regclass" => CastTarget::RegClass,
2528                        other => {
2529                            return Err(ParseError {
2530                                message: format!("unsupported cast target `::{other}`"),
2531                                token_pos: self.pos.saturating_sub(1),
2532                            });
2533                        }
2534                    },
2535                    Token::Interval => CastTarget::Interval,
2536                    other => {
2537                        return Err(ParseError {
2538                            message: format!("expected type ident after `::`, got {other:?}"),
2539                            token_pos: self.pos.saturating_sub(1),
2540                        });
2541                    }
2542                };
2543                expr = Expr::Cast {
2544                    expr: Box::new(expr),
2545                    target,
2546                };
2547                continue;
2548            }
2549            if matches!(self.peek(), Token::Is) {
2550                self.advance();
2551                let negated = if matches!(self.peek(), Token::Not) {
2552                    self.advance();
2553                    true
2554                } else {
2555                    false
2556                };
2557                // v7.9.27b — `IS [NOT] DISTINCT FROM <rhs>`.
2558                // mailrs pg_dump.
2559                if matches!(self.peek(), Token::Distinct) {
2560                    self.advance();
2561                    if !matches!(self.peek(), Token::From) {
2562                        return Err(self.err(format!(
2563                            "expected FROM after IS{} DISTINCT, got {:?}",
2564                            if negated { " NOT" } else { "" },
2565                            self.peek()
2566                        )));
2567                    }
2568                    self.advance();
2569                    // Right-hand side: parse at the same precedence
2570                    // tier as comparison so `x IS DISTINCT FROM a + b`
2571                    // groups as `x IS DISTINCT FROM (a + b)`.
2572                    let rhs = self.parse_expr(20)?;
2573                    let op = if negated {
2574                        BinOp::IsNotDistinctFrom
2575                    } else {
2576                        BinOp::IsDistinctFrom
2577                    };
2578                    expr = Expr::Binary {
2579                        op,
2580                        lhs: Box::new(expr),
2581                        rhs: Box::new(rhs),
2582                    };
2583                    continue;
2584                }
2585                if !matches!(self.peek(), Token::Null) {
2586                    return Err(self.err(format!(
2587                        "expected NULL or DISTINCT after IS{}, got {:?}",
2588                        if negated { " NOT" } else { "" },
2589                        self.peek()
2590                    )));
2591                }
2592                self.advance();
2593                expr = Expr::IsNull {
2594                    expr: Box::new(expr),
2595                    negated,
2596                };
2597                continue;
2598            }
2599            // `x [NOT] BETWEEN a AND b`, `x [NOT] IN (...)`, `x [NOT] LIKE p`.
2600            // Look one token ahead so a stray `NOT` not followed by any of
2601            // these flows through to the early return below untouched.
2602            let negated = if matches!(self.peek(), Token::Not) {
2603                let next = self.tokens.get(self.pos + 1);
2604                matches!(next, Some(Token::Between | Token::In | Token::Like))
2605            } else {
2606                false
2607            };
2608            if negated {
2609                self.advance();
2610            }
2611            if matches!(self.peek(), Token::Between) {
2612                expr = self.parse_between_tail(expr, negated)?;
2613                continue;
2614            }
2615            if matches!(self.peek(), Token::In) {
2616                expr = self.parse_in_tail(expr, negated)?;
2617                continue;
2618            }
2619            if matches!(self.peek(), Token::Like) {
2620                self.advance();
2621                // Pattern at the same precedence as other comparison RHSes —
2622                // 5 leaves AND/OR alone so `a LIKE 'x%' AND b` parses right.
2623                let pattern = self.parse_expr(5)?;
2624                expr = Expr::Like {
2625                    expr: Box::new(expr),
2626                    pattern: Box::new(pattern),
2627                    negated,
2628                };
2629                continue;
2630            }
2631            return Ok(expr);
2632        }
2633    }
2634
2635    /// `x BETWEEN low AND high`  →  `(x >= low) AND (x <= high)`, wrapped in
2636    /// `NOT` when `negated`. Bounds parse at precedence 5 so the trailing
2637    /// `AND` is not swallowed.
2638    fn parse_between_tail(&mut self, expr: Expr, negated: bool) -> Result<Expr, ParseError> {
2639        self.advance(); // BETWEEN
2640        let low = self.parse_expr(5)?;
2641        if !matches!(self.peek(), Token::And) {
2642            return Err(self.err(format!(
2643                "expected AND after BETWEEN low bound, got {:?}",
2644                self.peek()
2645            )));
2646        }
2647        self.advance();
2648        let high = self.parse_expr(5)?;
2649        let target = Box::new(expr);
2650        let combined = Expr::Binary {
2651            lhs: Box::new(Expr::Binary {
2652                lhs: target.clone(),
2653                op: BinOp::GtEq,
2654                rhs: Box::new(low),
2655            }),
2656            op: BinOp::And,
2657            rhs: Box::new(Expr::Binary {
2658                lhs: target,
2659                op: BinOp::LtEq,
2660                rhs: Box::new(high),
2661            }),
2662        };
2663        Ok(maybe_not(combined, negated))
2664    }
2665
2666    /// `x IN (a, b, c)`  →  chained OR of equalities. Empty list collapses
2667    /// to FALSE (TRUE under NOT IN), matching standard SQL semantics.
2668    /// v4.11: parse `WITH name AS (SELECT ...) [, ...] SELECT ...`.
2669    /// Caller already consumed the leading `WITH` ident.
2670    fn parse_with_cte_then_select(&mut self) -> Result<Statement, ParseError> {
2671        // v4.22: WITH RECURSIVE — optional keyword right after WITH.
2672        // Comes through as an identifier; consume it if present and
2673        // mark every CTE in the clause as recursive (PG semantics —
2674        // the flag is per-WITH, not per-CTE).
2675        let mut recursive = false;
2676        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
2677            && s.eq_ignore_ascii_case("recursive")
2678        {
2679            self.advance();
2680            recursive = true;
2681        }
2682        let mut ctes = Vec::new();
2683        loop {
2684            let name = self.expect_ident_like()?;
2685            // v4.22: optional column-name list — `WITH t(a,b,c) AS ...`.
2686            // PG uses these to rename the body's output columns; we
2687            // do the same below by overriding `columns[i].name`.
2688            let column_overrides: Vec<String> = if matches!(self.peek(), Token::LParen) {
2689                self.advance();
2690                let mut names = Vec::new();
2691                loop {
2692                    names.push(self.expect_ident_like()?);
2693                    if matches!(self.peek(), Token::Comma) {
2694                        self.advance();
2695                        continue;
2696                    }
2697                    break;
2698                }
2699                if !matches!(self.peek(), Token::RParen) {
2700                    return Err(self.err(format!(
2701                        "expected ')' to close CTE column list, got {:?}",
2702                        self.peek()
2703                    )));
2704                }
2705                self.advance();
2706                names
2707            } else {
2708                Vec::new()
2709            };
2710            // AS is a reserved Token::As (used by SELECT-item / FROM
2711            // aliasing) — handle it specially rather than as a bare
2712            // ident.
2713            if !matches!(self.peek(), Token::As) {
2714                return Err(self.err(format!(
2715                    "expected AS after CTE name {name:?}, got {:?}",
2716                    self.peek()
2717                )));
2718            }
2719            self.advance();
2720            if !matches!(self.peek(), Token::LParen) {
2721                return Err(self.err(format!(
2722                    "expected '(' after AS in WITH clause, got {:?}",
2723                    self.peek()
2724                )));
2725            }
2726            self.advance();
2727            if !matches!(self.peek(), Token::Select) {
2728                return Err(self.err(format!("WITH body must be a SELECT, got {:?}", self.peek())));
2729            }
2730            let inner = self.parse_select_stmt()?;
2731            if !matches!(self.peek(), Token::RParen) {
2732                return Err(self.err(format!(
2733                    "expected ')' after CTE body, got {:?}",
2734                    self.peek()
2735                )));
2736            }
2737            self.advance();
2738            let Statement::Select(body) = inner else {
2739                unreachable!("parse_select_stmt returns Select")
2740            };
2741            ctes.push(crate::ast::Cte {
2742                name,
2743                body,
2744                recursive,
2745                column_overrides,
2746            });
2747            if matches!(self.peek(), Token::Comma) {
2748                self.advance();
2749                continue;
2750            }
2751            break;
2752        }
2753        // The body SELECT follows. Must start with SELECT.
2754        if !matches!(self.peek(), Token::Select) {
2755            return Err(self.err(format!(
2756                "expected SELECT after WITH clause, got {:?}",
2757                self.peek()
2758            )));
2759        }
2760        let body_stmt = self.parse_select_stmt()?;
2761        let Statement::Select(mut body) = body_stmt else {
2762            unreachable!()
2763        };
2764        body.ctes = ctes;
2765        Ok(Statement::Select(body))
2766    }
2767
2768    /// v4.10: parse `EXISTS (SELECT ...)`. Caller (`parse_atom`)
2769    /// already consumed the leading `EXISTS` ident via
2770    /// `self.advance()`.
2771    fn parse_exists_atom(&mut self, negated: bool) -> Result<Expr, ParseError> {
2772        if !matches!(self.peek(), Token::LParen) {
2773            return Err(self.err(format!("expected '(' after EXISTS, got {:?}", self.peek())));
2774        }
2775        self.advance();
2776        let inner = self.parse_select_stmt()?;
2777        if !matches!(self.peek(), Token::RParen) {
2778            return Err(self.err(format!(
2779                "expected ')' after EXISTS-subquery, got {:?}",
2780                self.peek()
2781            )));
2782        }
2783        self.advance();
2784        let Statement::Select(s) = inner else {
2785            unreachable!("parse_select_stmt returns Select")
2786        };
2787        Ok(Expr::Exists {
2788            subquery: Box::new(s),
2789            negated,
2790        })
2791    }
2792
2793    fn parse_in_tail(&mut self, expr: Expr, negated: bool) -> Result<Expr, ParseError> {
2794        self.advance(); // IN
2795        if !matches!(self.peek(), Token::LParen) {
2796            return Err(self.err(format!("expected '(' after IN, got {:?}", self.peek())));
2797        }
2798        self.advance();
2799        // v4.10: `IN (SELECT ...)` — subquery branch.
2800        if matches!(self.peek(), Token::Select) {
2801            let inner = self.parse_select_stmt()?;
2802            if !matches!(self.peek(), Token::RParen) {
2803                return Err(self.err(format!(
2804                    "expected ')' after IN-subquery, got {:?}",
2805                    self.peek()
2806                )));
2807            }
2808            self.advance();
2809            let Statement::Select(s) = inner else {
2810                unreachable!("parse_select_stmt always returns Statement::Select")
2811            };
2812            return Ok(Expr::InSubquery {
2813                expr: Box::new(expr),
2814                subquery: Box::new(s),
2815                negated,
2816            });
2817        }
2818        let mut elements = Vec::new();
2819        if !matches!(self.peek(), Token::RParen) {
2820            loop {
2821                elements.push(self.parse_expr(0)?);
2822                match self.peek() {
2823                    Token::Comma => {
2824                        self.advance();
2825                    }
2826                    Token::RParen => break,
2827                    other => {
2828                        return Err(
2829                            self.err(format!("expected ',' or ')' in IN list, got {other:?}"))
2830                        );
2831                    }
2832                }
2833            }
2834        }
2835        self.advance(); // ')'
2836        let target = Box::new(expr);
2837        let combined = if elements.is_empty() {
2838            Expr::Literal(Literal::Bool(false))
2839        } else {
2840            let mut iter = elements.into_iter();
2841            let first = iter.next().unwrap();
2842            let mut acc = Expr::Binary {
2843                lhs: target.clone(),
2844                op: BinOp::Eq,
2845                rhs: Box::new(first),
2846            };
2847            for elt in iter {
2848                acc = Expr::Binary {
2849                    lhs: Box::new(acc),
2850                    op: BinOp::Or,
2851                    rhs: Box::new(Expr::Binary {
2852                        lhs: target.clone(),
2853                        op: BinOp::Eq,
2854                        rhs: Box::new(elt),
2855                    }),
2856                };
2857            }
2858            acc
2859        };
2860        Ok(maybe_not(combined, negated))
2861    }
2862
2863    /// Parse a pgvector array literal `[ x1, x2, ... ]`. The opening `[` is
2864    /// already consumed by the caller. Elements must be numeric literals
2865    /// (with optional unary `-`); any compound expression is rejected at
2866    /// parse time so the runtime never needs to evaluate inside a vector.
2867    /// `EXTRACT(<field> FROM <source>)`. The dispatching `parse_atom`
2868    /// has already consumed the `EXTRACT` token before calling us —
2869    /// we pick up at the opening `(`.
2870    fn parse_extract_atom(&mut self) -> Result<Expr, ParseError> {
2871        if !matches!(self.peek(), Token::LParen) {
2872            return Err(self.err(format!("expected '(' after EXTRACT, got {:?}", self.peek())));
2873        }
2874        self.advance();
2875        let field_name = self.expect_ident_like()?;
2876        let field = match field_name.to_ascii_lowercase().as_str() {
2877            "year" => ExtractField::Year,
2878            "month" => ExtractField::Month,
2879            "day" => ExtractField::Day,
2880            "hour" => ExtractField::Hour,
2881            "minute" => ExtractField::Minute,
2882            "second" => ExtractField::Second,
2883            "microsecond" | "microseconds" => ExtractField::Microsecond,
2884            other => {
2885                return Err(self.err(format!(
2886                    "unknown EXTRACT field {other:?}; \
2887                     supported: YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MICROSECOND"
2888                )));
2889            }
2890        };
2891        if !matches!(self.peek(), Token::From) {
2892            return Err(self.err(format!(
2893                "expected FROM after EXTRACT field, got {:?}",
2894                self.peek()
2895            )));
2896        }
2897        self.advance();
2898        let source = self.parse_expr(0)?;
2899        if !matches!(self.peek(), Token::RParen) {
2900            return Err(self.err(format!(
2901                "expected ')' to close EXTRACT, got {:?}",
2902                self.peek()
2903            )));
2904        }
2905        self.advance();
2906        Ok(Expr::Extract {
2907            field,
2908            source: Box::new(source),
2909        })
2910    }
2911
2912    /// `INTERVAL '<n> <unit> [<n> <unit> ...]'` — the `INTERVAL` keyword
2913    /// is already consumed; we expect a single string literal next and
2914    /// resolve it into `Literal::Interval` at parse time so the engine
2915    /// never has to re-tokenise inside the string.
2916    fn parse_interval_atom(&mut self) -> Result<Expr, ParseError> {
2917        let tok = self.advance();
2918        let Token::String(text) = tok else {
2919            return Err(self.err(format!(
2920                "expected string literal after INTERVAL, got {tok:?}"
2921            )));
2922        };
2923        let (months, micros) = parse_interval_text(&text).ok_or_else(|| ParseError {
2924            message: format!(
2925                "cannot parse INTERVAL {text:?}; \
2926                     expected `<n> <unit> [<n> <unit> ...]` with units \
2927                     microsecond[s], millisecond[s], second[s], minute[s], \
2928                     hour[s], day[s], week[s], month[s], year[s]"
2929            ),
2930            token_pos: self.pos.saturating_sub(1),
2931        })?;
2932        Ok(Expr::Literal(Literal::Interval {
2933            months,
2934            micros,
2935            text,
2936        }))
2937    }
2938
2939    fn parse_vector_literal_body(&mut self) -> Result<Expr, ParseError> {
2940        let mut elems = Vec::new();
2941        if matches!(self.peek(), Token::RBracket) {
2942            self.advance();
2943            return Ok(Expr::Literal(Literal::Vector(elems)));
2944        }
2945        loop {
2946            let e = self.parse_expr(0)?;
2947            let x = extract_numeric_literal(&e).ok_or_else(|| ParseError {
2948                message: format!("vector element must be a numeric literal, got {e:?}"),
2949                token_pos: self.pos,
2950            })?;
2951            elems.push(x);
2952            match self.peek() {
2953                Token::Comma => {
2954                    self.advance();
2955                }
2956                Token::RBracket => {
2957                    self.advance();
2958                    break;
2959                }
2960                other => {
2961                    return Err(self.err(format!("expected ',' or ']' in vector, got {other:?}")));
2962                }
2963            }
2964        }
2965        Ok(Expr::Literal(Literal::Vector(elems)))
2966    }
2967
2968    /// Atom that started with an identifier: could be `t.col`, `col`, or
2969    /// `func(arg, ...)`. Detect each shape by looking at the next token.
2970    /// v4.12: parse `(PARTITION BY expr, ... ORDER BY expr [DESC]
2971    /// [, ...])`. Caller has already consumed `OVER`. Either clause
2972    /// is optional; an empty `()` is also legal (PG semantics).
2973    /// v6.4.2 — consume an optional `IGNORE NULLS` / `RESPECT NULLS`
2974    /// modifier between `name(args)` and `OVER (...)`. Default is
2975    /// `Respect`. Unrecognised idents leave the stream unchanged.
2976    fn parse_null_treatment_modifier(&mut self) -> NullTreatment {
2977        let Token::Ident(s) = self.peek().clone() else {
2978            return NullTreatment::Respect;
2979        };
2980        let is_ignore = s.eq_ignore_ascii_case("ignore");
2981        let is_respect = s.eq_ignore_ascii_case("respect");
2982        if !is_ignore && !is_respect {
2983            return NullTreatment::Respect;
2984        }
2985        // Lookahead for NULLS — only consume both tokens together.
2986        // pos+1 must hold a "nulls" ident.
2987        if self.pos + 1 < self.tokens.len()
2988            && let Token::Ident(s2) = &self.tokens[self.pos + 1]
2989            && s2.eq_ignore_ascii_case("nulls")
2990        {
2991            self.advance();
2992            self.advance();
2993            return if is_ignore {
2994                NullTreatment::Ignore
2995            } else {
2996                NullTreatment::Respect
2997            };
2998        }
2999        NullTreatment::Respect
3000    }
3001
3002    /// No frame clause is supported.
3003    #[allow(clippy::type_complexity)] // (partitions, ordered-keys-with-desc) is the natural shape
3004    fn parse_over_clause(
3005        &mut self,
3006    ) -> Result<(Vec<Expr>, Vec<(Expr, bool)>, Option<WindowFrame>), ParseError> {
3007        if !matches!(self.peek(), Token::LParen) {
3008            return Err(self.err(format!("expected '(' after OVER, got {:?}", self.peek())));
3009        }
3010        self.advance();
3011        let mut partition_by = Vec::new();
3012        let mut order_by = Vec::new();
3013        // PARTITION BY ?
3014        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
3015            && s.eq_ignore_ascii_case("partition")
3016        {
3017            self.advance();
3018            if !matches!(self.peek(), Token::By) {
3019                return Err(self.err(format!(
3020                    "expected BY after PARTITION, got {:?}",
3021                    self.peek()
3022                )));
3023            }
3024            self.advance();
3025            loop {
3026                partition_by.push(self.parse_expr(0)?);
3027                if matches!(self.peek(), Token::Comma) {
3028                    self.advance();
3029                    continue;
3030                }
3031                break;
3032            }
3033        }
3034        // ORDER BY ?
3035        if matches!(self.peek(), Token::Order) {
3036            self.advance();
3037            if !matches!(self.peek(), Token::By) {
3038                return Err(self.err(format!("expected BY after ORDER, got {:?}", self.peek())));
3039            }
3040            self.advance();
3041            loop {
3042                let e = self.parse_expr(0)?;
3043                let desc = if matches!(self.peek(), Token::Desc) {
3044                    self.advance();
3045                    true
3046                } else if matches!(self.peek(), Token::Asc) {
3047                    self.advance();
3048                    false
3049                } else {
3050                    false
3051                };
3052                order_by.push((e, desc));
3053                if matches!(self.peek(), Token::Comma) {
3054                    self.advance();
3055                    continue;
3056                }
3057                break;
3058            }
3059        }
3060        // v4.20: optional explicit frame, `ROWS ...` / `RANGE ...`.
3061        // Both keywords come through the lexer as identifiers; match
3062        // case-insensitively.
3063        let mut frame: Option<WindowFrame> = None;
3064        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek() {
3065            let kind = if s.eq_ignore_ascii_case("rows") {
3066                Some(FrameKind::Rows)
3067            } else if s.eq_ignore_ascii_case("range") {
3068                Some(FrameKind::Range)
3069            } else {
3070                None
3071            };
3072            if let Some(kind) = kind {
3073                self.advance();
3074                frame = Some(self.parse_frame_tail(kind)?);
3075            }
3076        }
3077        if !matches!(self.peek(), Token::RParen) {
3078            return Err(self.err(format!(
3079                "expected ')' to close OVER clause, got {:?}",
3080                self.peek()
3081            )));
3082        }
3083        self.advance();
3084        Ok((partition_by, order_by, frame))
3085    }
3086
3087    /// v4.20: parse the tail of an explicit frame, given the `ROWS`
3088    /// or `RANGE` keyword was just consumed. Accepts both
3089    /// `BETWEEN <bound> AND <bound>` and the single-bound shorthand
3090    /// (`ROWS UNBOUNDED PRECEDING`, `ROWS 5 PRECEDING`, etc.) which
3091    /// PG normalises to `BETWEEN <bound> AND CURRENT ROW`.
3092    fn parse_frame_tail(&mut self, kind: FrameKind) -> Result<WindowFrame, ParseError> {
3093        if matches!(self.peek(), Token::Between) {
3094            self.advance();
3095            let start = self.parse_frame_bound()?;
3096            if !matches!(self.peek(), Token::And) {
3097                return Err(self.err(format!("expected AND in frame spec, got {:?}", self.peek())));
3098            }
3099            self.advance();
3100            let end = self.parse_frame_bound()?;
3101            Ok(WindowFrame {
3102                kind,
3103                start,
3104                end: Some(end),
3105            })
3106        } else {
3107            let start = self.parse_frame_bound()?;
3108            Ok(WindowFrame {
3109                kind,
3110                start,
3111                end: None,
3112            })
3113        }
3114    }
3115
3116    /// Parse one frame bound: `UNBOUNDED PRECEDING`, `<n> PRECEDING`,
3117    /// `CURRENT ROW`, `<n> FOLLOWING`, `UNBOUNDED FOLLOWING`.
3118    fn parse_frame_bound(&mut self) -> Result<FrameBound, ParseError> {
3119        // Number-led: "<n> PRECEDING" / "<n> FOLLOWING".
3120        if let Token::Integer(n) = *self.peek() {
3121            self.advance();
3122            let n: u64 = u64::try_from(n).map_err(|_| {
3123                self.err(format!(
3124                    "invalid frame offset {n} — expected non-negative integer"
3125                ))
3126            })?;
3127            let dir = self.expect_ident_like()?;
3128            return if dir.eq_ignore_ascii_case("preceding") {
3129                Ok(FrameBound::OffsetPreceding(n))
3130            } else if dir.eq_ignore_ascii_case("following") {
3131                Ok(FrameBound::OffsetFollowing(n))
3132            } else {
3133                Err(self.err(format!(
3134                    "expected PRECEDING or FOLLOWING after offset, got {dir:?}"
3135                )))
3136            };
3137        }
3138        let first = self.expect_ident_like()?;
3139        if first.eq_ignore_ascii_case("unbounded") {
3140            let dir = self.expect_ident_like()?;
3141            return if dir.eq_ignore_ascii_case("preceding") {
3142                Ok(FrameBound::UnboundedPreceding)
3143            } else if dir.eq_ignore_ascii_case("following") {
3144                Ok(FrameBound::UnboundedFollowing)
3145            } else {
3146                Err(self.err(format!(
3147                    "expected PRECEDING or FOLLOWING after UNBOUNDED, got {dir:?}"
3148                )))
3149            };
3150        }
3151        if first.eq_ignore_ascii_case("current") {
3152            let row = self.expect_ident_like()?;
3153            if !row.eq_ignore_ascii_case("row") {
3154                return Err(self.err(format!("expected ROW after CURRENT, got {row:?}")));
3155            }
3156            return Ok(FrameBound::CurrentRow);
3157        }
3158        Err(self.err(format!(
3159            "expected frame bound (UNBOUNDED/CURRENT/<n>), got {first:?}"
3160        )))
3161    }
3162
3163    fn finish_ident_atom(&mut self, first: String) -> Result<Expr, ParseError> {
3164        if matches!(self.peek(), Token::Dot) {
3165            self.advance();
3166            let name = self.expect_ident_like()?;
3167            return Ok(Expr::Column(ColumnName {
3168                qualifier: Some(first),
3169                name,
3170            }));
3171        }
3172        if matches!(self.peek(), Token::LParen) {
3173            self.advance();
3174            // `COUNT(*)` — special-cased here because `*` isn't a normal
3175            // expression token. Lower-case match on `first` since the lexer
3176            // folds identifiers.
3177            if first.eq_ignore_ascii_case("count") && matches!(self.peek(), Token::Star) {
3178                self.advance();
3179                if !matches!(self.peek(), Token::RParen) {
3180                    return Err(self.err(format!(
3181                        "expected ')' after COUNT(*), got {:?}",
3182                        self.peek()
3183                    )));
3184                }
3185                self.advance();
3186                // v4.12: COUNT(*) OVER (...) — same window tail.
3187                let null_treatment = self.parse_null_treatment_modifier();
3188                if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
3189                    && s.eq_ignore_ascii_case("over")
3190                {
3191                    self.advance();
3192                    let (partition_by, order_by, frame) = self.parse_over_clause()?;
3193                    return Ok(Expr::WindowFunction {
3194                        name: "count_star".into(),
3195                        args: Vec::new(),
3196                        partition_by,
3197                        order_by,
3198                        frame,
3199                        null_treatment,
3200                    });
3201                }
3202                return Ok(Expr::FunctionCall {
3203                    name: "count_star".into(),
3204                    args: Vec::new(),
3205                });
3206            }
3207            // Function call. PG-style: zero-or-more comma-separated args.
3208            let mut args = Vec::new();
3209            if !matches!(self.peek(), Token::RParen) {
3210                loop {
3211                    args.push(self.parse_expr(0)?);
3212                    match self.peek() {
3213                        Token::Comma => {
3214                            self.advance();
3215                        }
3216                        Token::RParen => break,
3217                        other => {
3218                            return Err(self.err(format!(
3219                                "expected ',' or ')' in function args, got {other:?}"
3220                            )));
3221                        }
3222                    }
3223                }
3224            }
3225            self.advance(); // consume ')'
3226            // v4.12: window-function tail — `name(args) OVER (...)`.
3227            // Promotes the just-parsed FunctionCall into a
3228            // WindowFunction node carrying partition + order.
3229            // v6.4.2: also accepts `name(args) IGNORE NULLS OVER (...)`
3230            // / `RESPECT NULLS OVER (...)` between the closing paren
3231            // and `OVER`.
3232            let null_treatment = self.parse_null_treatment_modifier();
3233            if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
3234                && s.eq_ignore_ascii_case("over")
3235            {
3236                self.advance();
3237                let (partition_by, order_by, frame) = self.parse_over_clause()?;
3238                return Ok(Expr::WindowFunction {
3239                    name: first,
3240                    args,
3241                    partition_by,
3242                    order_by,
3243                    frame,
3244                    null_treatment,
3245                });
3246            }
3247            return Ok(Expr::FunctionCall { name: first, args });
3248        }
3249        // v7.9.20 — SQL-standard parenless keyword expressions
3250        // (PG treats these as functions called without parens).
3251        // Resolve to a synthetic FunctionCall so the engine's
3252        // eval path reuses the existing function-call routing.
3253        // mailrs G3.
3254        let lc = first.to_ascii_lowercase();
3255        if matches!(
3256            lc.as_str(),
3257            "current_date"
3258                | "current_time"
3259                | "current_timestamp"
3260                | "localtimestamp"
3261                | "localtime"
3262        ) {
3263            return Ok(Expr::FunctionCall {
3264                name: lc,
3265                args: Vec::new(),
3266            });
3267        }
3268        Ok(Expr::Column(ColumnName {
3269            qualifier: None,
3270            name: first,
3271        }))
3272    }
3273}
3274
3275/// v6.8.2 — walk an expression tree and return the first column
3276/// reference's bare name. Used by `parse_create_index_stmt_after_create`
3277/// to derive `CreateIndexStatement.column` from an expression
3278/// key (so downstream planner code resolving a primary column
3279/// position keeps working with expression indexes). Returns
3280/// `None` when the expression has no column ref at all — caller
3281/// surfaces that as a parse error.
3282fn extract_first_column(expr: &Expr) -> Option<String> {
3283    match expr {
3284        Expr::Column(cn) => Some(cn.name.clone()),
3285        Expr::FunctionCall { args, .. } => args.iter().find_map(extract_first_column),
3286        Expr::Binary { lhs, rhs, .. } => {
3287            extract_first_column(lhs).or_else(|| extract_first_column(rhs))
3288        }
3289        Expr::Unary { expr: e, .. } => extract_first_column(e),
3290        _ => None,
3291    }
3292}
3293
3294fn maybe_not(expr: Expr, negated: bool) -> Expr {
3295    if negated {
3296        Expr::Unary {
3297            op: UnOp::Not,
3298            expr: Box::new(expr),
3299        }
3300    } else {
3301        expr
3302    }
3303}
3304
3305fn binop_from(tok: &Token) -> Option<(BinOp, u8)> {
3306    let pair = match tok {
3307        Token::Or => (BinOp::Or, 1),
3308        Token::And => (BinOp::And, 2),
3309        Token::Eq => (BinOp::Eq, 4),
3310        Token::NotEq => (BinOp::NotEq, 4),
3311        Token::Lt => (BinOp::Lt, 4),
3312        Token::LtEq => (BinOp::LtEq, 4),
3313        Token::Gt => (BinOp::Gt, 4),
3314        Token::GtEq => (BinOp::GtEq, 4),
3315        // pgvector distance ops all sit on the same rung — tighter than
3316        // comparisons (4) so `col <-> v < threshold` parses correctly.
3317        Token::L2Distance => (BinOp::L2Distance, 5),
3318        Token::InnerProduct => (BinOp::InnerProduct, 5),
3319        Token::CosineDistance => (BinOp::CosineDistance, 5),
3320        Token::Plus => (BinOp::Add, 6),
3321        Token::Minus => (BinOp::Sub, 6),
3322        // `||` sits beside `+`/`-` (matches PG conceptually — concat groups
3323        // by the same level as binary additive arithmetic).
3324        Token::Concat => (BinOp::Concat, 6),
3325        Token::Star => (BinOp::Mul, 7),
3326        Token::Slash => (BinOp::Div, 7),
3327        // v4.14: JSON path ops bind tighter than comparisons (4)
3328        // and additive (6) so `doc->'k' = 'v'` parses correctly.
3329        // Same rung as the multiplicative ops.
3330        Token::JsonGet => (BinOp::JsonGet, 7),
3331        Token::JsonGetText => (BinOp::JsonGetText, 7),
3332        Token::JsonGetPath => (BinOp::JsonGetPath, 7),
3333        Token::JsonGetPathText => (BinOp::JsonGetPathText, 7),
3334        Token::JsonContains => (BinOp::JsonContains, 7),
3335        _ => return None,
3336    };
3337    Some(pair)
3338}
3339
3340#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
3341// `as f32` here is intentional: vector elements widen / narrow into f32 on
3342// purpose. i64 → f32 loses precision past 2^24, f64 → f32 loses precision
3343// past ~15 decimal digits — both are acceptable for a fixed-precision
3344// pgvector column.
3345fn extract_numeric_literal(e: &Expr) -> Option<f32> {
3346    match e {
3347        Expr::Literal(Literal::Integer(n)) => Some(*n as f32),
3348        Expr::Literal(Literal::Float(x)) => Some(*x as f32),
3349        Expr::Unary {
3350            op: UnOp::Neg,
3351            expr,
3352        } => extract_numeric_literal(expr).map(|x| -x),
3353        _ => None,
3354    }
3355}
3356
3357/// Parse the text inside `INTERVAL '...'` into `(months, micros)`. Accepts
3358/// one or more `<n> <unit>` pairs separated by whitespace. `<n>` may be
3359/// negative. Returns `None` if any pair fails to parse or no pair is found.
3360///
3361/// Recognised units (case-insensitive, optional trailing `s`):
3362/// `microsecond`, `millisecond`, `second`, `minute`, `hour`, `day`, `week`,
3363/// `month`, `year`. `week` widens to 7 days; `year` widens to 12 months.
3364pub fn parse_interval_text(s: &str) -> Option<(i32, i64)> {
3365    let parts: Vec<&str> = s.split_whitespace().collect();
3366    if parts.is_empty() || !parts.len().is_multiple_of(2) {
3367        return None;
3368    }
3369    let mut months: i32 = 0;
3370    let mut micros: i64 = 0;
3371    let mut i = 0;
3372    while i < parts.len() {
3373        let n: i64 = parts[i].parse().ok()?;
3374        let unit = parts[i + 1].to_ascii_lowercase();
3375        let unit_stripped = unit.strip_suffix('s').unwrap_or(&unit);
3376        match unit_stripped {
3377            "microsecond" => micros = micros.checked_add(n)?,
3378            "millisecond" => micros = micros.checked_add(n.checked_mul(1_000)?)?,
3379            "second" => micros = micros.checked_add(n.checked_mul(1_000_000)?)?,
3380            "minute" => micros = micros.checked_add(n.checked_mul(60_000_000)?)?,
3381            "hour" => micros = micros.checked_add(n.checked_mul(3_600_000_000)?)?,
3382            "day" => micros = micros.checked_add(n.checked_mul(86_400_000_000)?)?,
3383            "week" => micros = micros.checked_add(n.checked_mul(604_800_000_000)?)?,
3384            "month" => {
3385                let n32 = i32::try_from(n).ok()?;
3386                months = months.checked_add(n32)?;
3387            }
3388            "year" => {
3389                let n32 = i32::try_from(n).ok()?;
3390                months = months.checked_add(n32.checked_mul(12)?)?;
3391            }
3392            _ => return None,
3393        }
3394        i += 2;
3395    }
3396    Some((months, micros))
3397}
3398
3399#[cfg(test)]
3400mod tests {
3401    use super::*;
3402    use alloc::string::ToString;
3403
3404    fn parse(s: &str) -> Statement {
3405        parse_statement(s).expect("parse ok")
3406    }
3407
3408    fn lit_int(n: i64) -> Expr {
3409        Expr::Literal(Literal::Integer(n))
3410    }
3411
3412    fn col(name: &str) -> Expr {
3413        Expr::Column(ColumnName {
3414            qualifier: None,
3415            name: name.into(),
3416        })
3417    }
3418
3419    #[test]
3420    fn select_single_integer() {
3421        let s = parse("SELECT 1");
3422        let Statement::Select(s) = s else {
3423            panic!("expected SELECT")
3424        };
3425        assert_eq!(s.items.len(), 1);
3426        assert!(s.from.is_none());
3427        assert!(s.where_.is_none());
3428    }
3429
3430    #[test]
3431    fn select_multiple_literal_kinds() {
3432        let s = parse("SELECT 1, 'hi', NULL, TRUE, 1.5");
3433        let Statement::Select(s) = s else {
3434            panic!("expected SELECT")
3435        };
3436        assert_eq!(s.items.len(), 5);
3437    }
3438
3439    #[test]
3440    fn select_wildcard_from_table() {
3441        let s = parse("SELECT * FROM users");
3442        let Statement::Select(s) = s else {
3443            panic!("expected SELECT")
3444        };
3445        assert!(matches!(s.items[..], [SelectItem::Wildcard]));
3446        assert_eq!(s.from.as_ref().unwrap().primary.name, "users");
3447    }
3448
3449    #[test]
3450    fn select_with_table_alias() {
3451        let s = parse("SELECT * FROM users AS u");
3452        let Statement::Select(s) = s else {
3453            panic!("expected SELECT")
3454        };
3455        let t = &s.from.as_ref().unwrap().primary;
3456        assert_eq!(t.name, "users");
3457        assert_eq!(t.alias.as_deref(), Some("u"));
3458    }
3459
3460    #[test]
3461    fn select_with_where_eq() {
3462        let s = parse("SELECT a FROM t WHERE a = 1");
3463        let Statement::Select(s) = s else {
3464            panic!("expected SELECT")
3465        };
3466        let w = s.where_.unwrap();
3467        assert_eq!(
3468            w,
3469            Expr::Binary {
3470                lhs: Box::new(col("a")),
3471                op: BinOp::Eq,
3472                rhs: Box::new(lit_int(1)),
3473            }
3474        );
3475    }
3476
3477    #[test]
3478    fn arithmetic_precedence() {
3479        let s = parse("SELECT 1 + 2 * 3");
3480        let Statement::Select(s) = s else {
3481            panic!("expected SELECT")
3482        };
3483        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3484            panic!("wildcard?")
3485        };
3486        assert_eq!(
3487            expr,
3488            &Expr::Binary {
3489                lhs: Box::new(lit_int(1)),
3490                op: BinOp::Add,
3491                rhs: Box::new(Expr::Binary {
3492                    lhs: Box::new(lit_int(2)),
3493                    op: BinOp::Mul,
3494                    rhs: Box::new(lit_int(3)),
3495                }),
3496            }
3497        );
3498    }
3499
3500    #[test]
3501    fn parentheses_override_precedence() {
3502        let s = parse("SELECT (1 + 2) * 3");
3503        let Statement::Select(s) = s else {
3504            panic!("expected SELECT")
3505        };
3506        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3507            panic!()
3508        };
3509        assert_eq!(
3510            expr,
3511            &Expr::Binary {
3512                lhs: Box::new(Expr::Binary {
3513                    lhs: Box::new(lit_int(1)),
3514                    op: BinOp::Add,
3515                    rhs: Box::new(lit_int(2)),
3516                }),
3517                op: BinOp::Mul,
3518                rhs: Box::new(lit_int(3)),
3519            }
3520        );
3521    }
3522
3523    #[test]
3524    fn not_binds_below_comparison() {
3525        // `NOT a = 1` should parse as `NOT (a = 1)`.
3526        let s = parse("SELECT NOT a = 1 FROM t");
3527        let Statement::Select(s) = s else {
3528            panic!("expected SELECT")
3529        };
3530        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3531            panic!()
3532        };
3533        assert_eq!(
3534            expr,
3535            &Expr::Unary {
3536                op: UnOp::Not,
3537                expr: Box::new(Expr::Binary {
3538                    lhs: Box::new(col("a")),
3539                    op: BinOp::Eq,
3540                    rhs: Box::new(lit_int(1)),
3541                }),
3542            }
3543        );
3544    }
3545
3546    #[test]
3547    fn unary_minus_binds_above_multiplication() {
3548        // `-a * 2` should be `(-a) * 2`.
3549        let s = parse("SELECT -a * 2 FROM t");
3550        let Statement::Select(s) = s else {
3551            panic!("expected SELECT")
3552        };
3553        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3554            panic!()
3555        };
3556        assert_eq!(
3557            expr,
3558            &Expr::Binary {
3559                lhs: Box::new(Expr::Unary {
3560                    op: UnOp::Neg,
3561                    expr: Box::new(col("a")),
3562                }),
3563                op: BinOp::Mul,
3564                rhs: Box::new(lit_int(2)),
3565            }
3566        );
3567    }
3568
3569    #[test]
3570    fn qualified_column() {
3571        let s = parse("SELECT t.col FROM t");
3572        let Statement::Select(s) = s else {
3573            panic!("expected SELECT")
3574        };
3575        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3576            panic!()
3577        };
3578        assert_eq!(
3579            expr,
3580            &Expr::Column(ColumnName {
3581                qualifier: Some("t".into()),
3582                name: "col".into()
3583            })
3584        );
3585    }
3586
3587    #[test]
3588    fn select_item_alias_with_as() {
3589        let s = parse("SELECT a AS y FROM t");
3590        let Statement::Select(s) = s else {
3591            panic!("expected SELECT")
3592        };
3593        let SelectItem::Expr { alias, .. } = &s.items[0] else {
3594            panic!()
3595        };
3596        assert_eq!(alias.as_deref(), Some("y"));
3597    }
3598
3599    #[test]
3600    fn trailing_semicolon_accepted() {
3601        let s = parse("SELECT 1;");
3602        let Statement::Select(s) = s else {
3603            panic!("expected SELECT")
3604        };
3605        assert_eq!(s.items.len(), 1);
3606    }
3607
3608    #[test]
3609    fn boolean_chain_with_and_or_not() {
3610        // (NOT a) OR (b AND (NOT c))
3611        let s = parse("SELECT NOT a OR b AND NOT c FROM t");
3612        let Statement::Select(s) = s else {
3613            panic!("expected SELECT")
3614        };
3615        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3616            panic!()
3617        };
3618        let expected = Expr::Binary {
3619            lhs: Box::new(Expr::Unary {
3620                op: UnOp::Not,
3621                expr: Box::new(col("a")),
3622            }),
3623            op: BinOp::Or,
3624            rhs: Box::new(Expr::Binary {
3625                lhs: Box::new(col("b")),
3626                op: BinOp::And,
3627                rhs: Box::new(Expr::Unary {
3628                    op: UnOp::Not,
3629                    expr: Box::new(col("c")),
3630                }),
3631            }),
3632        };
3633        assert_eq!(expr, &expected);
3634    }
3635
3636    #[test]
3637    fn empty_input_errors() {
3638        let err = parse_statement("").unwrap_err();
3639        assert!(err.message.contains("SELECT"));
3640    }
3641
3642    #[test]
3643    fn unmatched_paren_errors() {
3644        assert!(parse_statement("SELECT (1 + 2").is_err());
3645    }
3646
3647    #[test]
3648    fn display_round_trip_simple_select() {
3649        let original = parse("SELECT a + 1 FROM t WHERE a > 0");
3650        let text = original.to_string();
3651        let again = parse_statement(&text).expect("re-parse");
3652        assert_eq!(original, again);
3653    }
3654
3655    // --- CREATE TABLE & INSERT (v0.3) ---------------------------------------
3656
3657    #[test]
3658    fn create_table_single_column() {
3659        let s = parse("CREATE TABLE foo (a INT)");
3660        let Statement::CreateTable(c) = s else {
3661            panic!("expected CreateTable")
3662        };
3663        assert_eq!(c.name, "foo");
3664        assert_eq!(c.columns.len(), 1);
3665        assert_eq!(c.columns[0].name, "a");
3666        assert_eq!(c.columns[0].ty, ColumnTypeName::Int);
3667        assert!(c.columns[0].nullable);
3668    }
3669
3670    #[test]
3671    fn create_table_multi_column_with_not_null_mix() {
3672        let s = parse("CREATE TABLE u (id INT NOT NULL, name TEXT, score FLOAT NOT NULL, ok BOOL)");
3673        let Statement::CreateTable(c) = s else {
3674            panic!()
3675        };
3676        assert_eq!(c.columns.len(), 4);
3677        assert_eq!(c.columns[0].ty, ColumnTypeName::Int);
3678        assert!(!c.columns[0].nullable);
3679        assert_eq!(c.columns[1].ty, ColumnTypeName::Text);
3680        assert!(c.columns[1].nullable);
3681        assert_eq!(c.columns[2].ty, ColumnTypeName::Float);
3682        assert!(!c.columns[2].nullable);
3683        assert_eq!(c.columns[3].ty, ColumnTypeName::Bool);
3684    }
3685
3686    #[test]
3687    fn create_table_bigint_supported() {
3688        let s = parse("CREATE TABLE accounts (id BIGINT NOT NULL)");
3689        let Statement::CreateTable(c) = s else {
3690            panic!()
3691        };
3692        assert_eq!(c.columns[0].ty, ColumnTypeName::BigInt);
3693    }
3694
3695    #[test]
3696    fn create_table_vector_default_is_f32() {
3697        let s = parse("CREATE TABLE t (v VECTOR(128))");
3698        let Statement::CreateTable(c) = s else {
3699            panic!()
3700        };
3701        assert_eq!(
3702            c.columns[0].ty,
3703            ColumnTypeName::Vector {
3704                dim: 128,
3705                encoding: VecEncoding::F32,
3706            },
3707        );
3708    }
3709
3710    #[test]
3711    fn create_table_vector_using_sq8() {
3712        // v6.0.1: `USING SQ8` selects scalar-quantised encoding.
3713        // Case-insensitive on both `USING` and the encoding name.
3714        for sql in [
3715            "CREATE TABLE t (v VECTOR(128) USING SQ8)",
3716            "CREATE TABLE t (v VECTOR(128) using sq8)",
3717        ] {
3718            let s = parse(sql);
3719            let Statement::CreateTable(c) = s else {
3720                panic!()
3721            };
3722            assert_eq!(
3723                c.columns[0].ty,
3724                ColumnTypeName::Vector {
3725                    dim: 128,
3726                    encoding: VecEncoding::Sq8,
3727                },
3728                "{sql}",
3729            );
3730        }
3731    }
3732
3733    #[test]
3734    fn create_table_vector_using_unknown_errors() {
3735        let err = parse_statement("CREATE TABLE t (v VECTOR(8) USING PQ8)").unwrap_err();
3736        assert!(
3737            err.message.contains("unknown vector encoding"),
3738            "got: {}",
3739            err.message
3740        );
3741    }
3742
3743    #[test]
3744    fn vector_using_sq8_display_roundtrips() {
3745        // The Display impl must produce text that re-parses to the
3746        // same AST. Guard for the v6.0.1 `USING SQ8` suffix.
3747        let s = parse("CREATE TABLE t (v VECTOR(64) USING SQ8)");
3748        let Statement::CreateTable(c) = s else {
3749            panic!()
3750        };
3751        assert_eq!(c.columns[0].ty.to_string(), "VECTOR(64) USING SQ8");
3752    }
3753
3754    #[test]
3755    fn parser_recognises_placeholders() {
3756        use crate::ast::{Expr, SelectItem, Statement};
3757        // $N in expression position parses as Expr::Placeholder(N).
3758        let s = parse("SELECT $1, $2 + 1 FROM t WHERE x = $3");
3759        let Statement::Select(sel) = s else { panic!() };
3760        assert!(matches!(
3761            sel.items[0],
3762            SelectItem::Expr {
3763                expr: Expr::Placeholder(1),
3764                alias: None
3765            }
3766        ));
3767        // $2 + 1
3768        let SelectItem::Expr {
3769            expr: Expr::Binary { lhs, rhs, .. },
3770            ..
3771        } = &sel.items[1]
3772        else {
3773            panic!()
3774        };
3775        assert!(matches!(**lhs, Expr::Placeholder(2)));
3776        assert!(matches!(**rhs, Expr::Literal(Literal::Integer(1))));
3777        // WHERE x = $3
3778        let Some(Expr::Binary { rhs, .. }) = sel.where_.as_ref() else {
3779            panic!()
3780        };
3781        assert!(matches!(**rhs, Expr::Placeholder(3)));
3782    }
3783
3784    #[test]
3785    fn parser_rejects_dollar_zero() {
3786        // $0 is not valid in PG; the lexer rejects it.
3787        assert!(parse_statement("SELECT $0").is_err());
3788    }
3789
3790    #[test]
3791    fn placeholder_display_roundtrips() {
3792        // The Display impl must produce text that re-lexes to the
3793        // same Placeholder token.
3794        let s = parse("SELECT $42 FROM t");
3795        let printed = s.to_string();
3796        assert!(printed.contains("$42"));
3797        let again = parse(&printed);
3798        assert_eq!(s, again);
3799    }
3800
3801    #[test]
3802    fn alter_index_rebuild_bare() {
3803        use crate::ast::{AlterIndexTarget, Statement};
3804        let s = parse("ALTER INDEX my_idx REBUILD");
3805        let Statement::AlterIndex(a) = s else {
3806            panic!("expected AlterIndex, got {s:?}")
3807        };
3808        assert_eq!(a.name, "my_idx");
3809        assert_eq!(a.target, AlterIndexTarget::Rebuild { encoding: None });
3810    }
3811
3812    #[test]
3813    fn alter_index_rebuild_with_encoding() {
3814        use crate::ast::{AlterIndexTarget, Statement};
3815        for (sql, want) in [
3816            (
3817                "ALTER INDEX my_idx REBUILD WITH (encoding = F32)",
3818                VecEncoding::F32,
3819            ),
3820            (
3821                "ALTER INDEX my_idx REBUILD WITH (encoding = sq8)",
3822                VecEncoding::Sq8,
3823            ),
3824            (
3825                "ALTER INDEX my_idx REBUILD WITH (encoding = HALF)",
3826                VecEncoding::F16,
3827            ),
3828        ] {
3829            let s = parse(sql);
3830            let Statement::AlterIndex(a) = s else {
3831                panic!("{sql}: expected AlterIndex")
3832            };
3833            assert_eq!(a.name, "my_idx");
3834            assert_eq!(
3835                a.target,
3836                AlterIndexTarget::Rebuild {
3837                    encoding: Some(want)
3838                },
3839                "{sql}"
3840            );
3841        }
3842    }
3843
3844    #[test]
3845    fn alter_index_rebuild_unknown_encoding_errors() {
3846        let err = parse_statement("ALTER INDEX my_idx REBUILD WITH (encoding = PQ8)").unwrap_err();
3847        assert!(
3848            err.message.contains("unknown vector encoding"),
3849            "got: {}",
3850            err.message
3851        );
3852    }
3853
3854    #[test]
3855    fn alter_index_rebuild_display_roundtrips() {
3856        for (input, want) in [
3857            ("ALTER INDEX my_idx REBUILD", "ALTER INDEX my_idx REBUILD"),
3858            (
3859                "ALTER INDEX my_idx REBUILD WITH (encoding = SQ8)",
3860                "ALTER INDEX my_idx REBUILD WITH (encoding = SQ8)",
3861            ),
3862            (
3863                "ALTER INDEX my_idx REBUILD WITH (encoding = HALF)",
3864                "ALTER INDEX my_idx REBUILD WITH (encoding = HALF)",
3865            ),
3866        ] {
3867            let s = parse(input);
3868            assert_eq!(s.to_string(), want);
3869        }
3870    }
3871
3872    #[test]
3873    fn create_table_unknown_type_errors() {
3874        // v4.9: JSON is now real; pick an actually unsupported keyword
3875        // (XML never landed and isn't planned).
3876        let err = parse_statement("CREATE TABLE x (a xml)").unwrap_err();
3877        assert!(err.message.contains("unsupported column type"));
3878    }
3879
3880    #[test]
3881    fn create_table_missing_table_keyword_errors() {
3882        assert!(parse_statement("CREATE x (a INT)").is_err());
3883    }
3884
3885    #[test]
3886    fn insert_single_value() {
3887        let s = parse("INSERT INTO foo VALUES (42)");
3888        let Statement::Insert(i) = s else {
3889            panic!("expected Insert")
3890        };
3891        assert_eq!(i.table, "foo");
3892        assert_eq!(i.rows.len(), 1);
3893        assert_eq!(i.rows[0].len(), 1);
3894        assert!(matches!(i.rows[0][0], Expr::Literal(Literal::Integer(42))));
3895    }
3896
3897    #[test]
3898    fn insert_multi_value_with_mixed_literals() {
3899        let s = parse("INSERT INTO foo VALUES (1, 'hi', 3.14, TRUE, NULL)");
3900        let Statement::Insert(i) = s else { panic!() };
3901        assert_eq!(i.rows.len(), 1);
3902        assert_eq!(i.rows[0].len(), 5);
3903    }
3904
3905    #[test]
3906    fn insert_missing_into_errors() {
3907        assert!(parse_statement("INSERT foo VALUES (1)").is_err());
3908    }
3909
3910    #[test]
3911    fn create_table_round_trip() {
3912        let original =
3913            parse("CREATE TABLE foo (id BIGINT NOT NULL, label TEXT, score FLOAT NOT NULL)");
3914        let text = original.to_string();
3915        let again = parse_statement(&text).expect("re-parse");
3916        assert_eq!(original, again);
3917    }
3918
3919    #[test]
3920    fn insert_round_trip_with_negation_and_string() {
3921        let original = parse("INSERT INTO t VALUES (-1, 'it''s', NULL)");
3922        let text = original.to_string();
3923        let again = parse_statement(&text).expect("re-parse");
3924        assert_eq!(original, again);
3925    }
3926
3927    #[test]
3928    fn unknown_keyword_at_statement_start_errors() {
3929        // v4.4: UPDATE is real SQL now. Use a fabricated keyword so
3930        // the top-level dispatch still has no branch to take.
3931        let err = parse_statement("FROBNICATE foo SET x = 1").unwrap_err();
3932        assert!(err.message.contains("expected SELECT"));
3933    }
3934
3935    // --- v0.8 CREATE INDEX --------------------------------------------------
3936
3937    #[test]
3938    fn create_index_basic() {
3939        let s = parse("CREATE INDEX idx_id ON users (id)");
3940        let Statement::CreateIndex(c) = s else {
3941            panic!("expected CreateIndex")
3942        };
3943        assert_eq!(c.name, "idx_id");
3944        assert_eq!(c.table, "users");
3945        assert_eq!(c.column, "id");
3946    }
3947
3948    #[test]
3949    fn create_index_missing_on_errors() {
3950        assert!(parse_statement("CREATE INDEX foo users (id)").is_err());
3951    }
3952
3953    #[test]
3954    fn create_index_missing_paren_errors() {
3955        assert!(parse_statement("CREATE INDEX foo ON users id").is_err());
3956    }
3957
3958    #[test]
3959    fn create_index_round_trip() {
3960        let original = parse("CREATE INDEX by_name ON users (name)");
3961        let again = parse_statement(&original.to_string()).unwrap();
3962        assert_eq!(original, again);
3963    }
3964
3965    // --- v0.9 transactions -------------------------------------------------
3966
3967    #[test]
3968    fn begin_commit_rollback_parse_as_unit_variants() {
3969        assert_eq!(parse("BEGIN"), Statement::Begin);
3970        assert_eq!(parse("COMMIT"), Statement::Commit);
3971        assert_eq!(parse("ROLLBACK"), Statement::Rollback);
3972        // Trailing semicolons accepted too.
3973        assert_eq!(parse("BEGIN;"), Statement::Begin);
3974    }
3975
3976    // --- v1.2: pgvector distance ops + ::vector cast --------------------
3977
3978    #[test]
3979    fn inner_product_binop_parses() {
3980        let s = parse("SELECT v <#> [1.0, 2.0] FROM t");
3981        let Statement::Select(s) = s else { panic!() };
3982        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3983            panic!()
3984        };
3985        assert!(matches!(
3986            expr,
3987            Expr::Binary {
3988                op: BinOp::InnerProduct,
3989                ..
3990            }
3991        ));
3992    }
3993
3994    #[test]
3995    fn cosine_distance_binop_parses() {
3996        let s = parse("SELECT v <=> [1.0, 2.0] FROM t");
3997        let Statement::Select(s) = s else { panic!() };
3998        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3999            panic!()
4000        };
4001        assert!(matches!(
4002            expr,
4003            Expr::Binary {
4004                op: BinOp::CosineDistance,
4005                ..
4006            }
4007        ));
4008    }
4009
4010    #[test]
4011    fn vector_cast_postfix_wraps_string_literal() {
4012        let s = parse("SELECT '[1,2,3]'::vector FROM t");
4013        let Statement::Select(s) = s else { panic!() };
4014        let SelectItem::Expr { expr, .. } = &s.items[0] else {
4015            panic!()
4016        };
4017        assert!(matches!(
4018            expr,
4019            Expr::Cast {
4020                target: CastTarget::Vector,
4021                ..
4022            }
4023        ));
4024    }
4025
4026    #[test]
4027    fn unsupported_cast_target_errors() {
4028        // `::numeric` isn't in the v1.3 cast target set.
4029        let err = parse_statement("SELECT 1::numeric FROM t").unwrap_err();
4030        assert!(err.message.contains("unsupported cast target"));
4031    }
4032
4033    #[test]
4034    fn tx_statements_round_trip() {
4035        for q in ["BEGIN", "COMMIT", "ROLLBACK"] {
4036            let original = parse(q);
4037            let again = parse_statement(&original.to_string()).unwrap();
4038            assert_eq!(original, again);
4039        }
4040    }
4041
4042    #[test]
4043    fn interval_text_parsing_units() {
4044        // Single unit.
4045        assert_eq!(parse_interval_text("1 day"), Some((0, 86_400_000_000)));
4046        assert_eq!(parse_interval_text("1 second"), Some((0, 1_000_000)));
4047        assert_eq!(parse_interval_text("1 month"), Some((1, 0)));
4048        assert_eq!(parse_interval_text("2 years"), Some((24, 0)));
4049        // Compound spans accumulate.
4050        assert_eq!(parse_interval_text("1 year 6 months"), Some((18, 0)));
4051        assert_eq!(
4052            parse_interval_text("1 day 2 hours"),
4053            Some((0, 86_400_000_000 + 7_200_000_000))
4054        );
4055        // Negative numbers carry through.
4056        assert_eq!(parse_interval_text("-1 day"), Some((0, -86_400_000_000)));
4057        // Bad shapes return None.
4058        assert_eq!(parse_interval_text(""), None);
4059        assert_eq!(parse_interval_text("garbage"), None);
4060        assert_eq!(parse_interval_text("1 fortnight"), None);
4061        assert_eq!(parse_interval_text("1"), None);
4062    }
4063
4064    #[test]
4065    fn interval_literal_roundtrips_via_display() {
4066        let parsed = parse("SELECT INTERVAL '1 day 2 hours'");
4067        let s = parsed.to_string();
4068        // Display preserves the original text verbatim.
4069        assert!(s.contains("INTERVAL '1 day 2 hours'"), "got: {s}");
4070        // And re-parsing yields a structurally equal statement.
4071        let again = parse_statement(&s).unwrap();
4072        assert_eq!(parsed, again);
4073    }
4074
4075    // ── v6.1.2: CREATE / DROP PUBLICATION ────────────────────
4076
4077    #[test]
4078    fn parser_recognises_create_publication_bare() {
4079        let s = parse("CREATE PUBLICATION pub_a");
4080        let Statement::CreatePublication(p) = s else {
4081            panic!("expected CreatePublication, got {s:?}")
4082        };
4083        assert_eq!(p.name, "pub_a");
4084        assert_eq!(p.scope, PublicationScope::AllTables);
4085    }
4086
4087    #[test]
4088    fn parser_recognises_create_publication_for_all_tables() {
4089        let s = parse("CREATE PUBLICATION pub_a FOR ALL TABLES");
4090        let Statement::CreatePublication(p) = s else {
4091            panic!("expected CreatePublication, got {s:?}")
4092        };
4093        assert_eq!(p.name, "pub_a");
4094        assert_eq!(p.scope, PublicationScope::AllTables);
4095    }
4096
4097    #[test]
4098    fn parser_recognises_drop_publication() {
4099        let s = parse("DROP PUBLICATION pub_a");
4100        let Statement::DropPublication(name) = s else {
4101            panic!("expected DropPublication, got {s:?}")
4102        };
4103        assert_eq!(name, "pub_a");
4104    }
4105
4106    #[test]
4107    fn parser_recognises_for_table_list() {
4108        let s = parse("CREATE PUBLICATION pub_a FOR TABLE t1, t2, t3");
4109        let Statement::CreatePublication(p) = s else {
4110            panic!("expected CreatePublication, got {s:?}")
4111        };
4112        assert_eq!(p.name, "pub_a");
4113        let PublicationScope::ForTables(ts) = p.scope else {
4114            panic!("expected ForTables scope")
4115        };
4116        assert_eq!(ts, alloc::vec!["t1", "t2", "t3"]);
4117    }
4118
4119    #[test]
4120    fn parser_recognises_for_tables_plural() {
4121        // PG 19 accepts both `FOR TABLE` and `FOR TABLES` — match.
4122        let s = parse("CREATE PUBLICATION pub_a FOR TABLES t1, t2");
4123        let Statement::CreatePublication(p) = s else {
4124            panic!("expected CreatePublication, got {s:?}")
4125        };
4126        let PublicationScope::ForTables(ts) = p.scope else {
4127            panic!("expected ForTables")
4128        };
4129        assert_eq!(ts, alloc::vec!["t1", "t2"]);
4130    }
4131
4132    #[test]
4133    fn parser_recognises_for_all_tables_except_list() {
4134        let s = parse("CREATE PUBLICATION p FOR ALL TABLES EXCEPT t1, t2");
4135        let Statement::CreatePublication(p) = s else {
4136            panic!()
4137        };
4138        let PublicationScope::AllTablesExcept(ts) = p.scope else {
4139            panic!("expected AllTablesExcept")
4140        };
4141        assert_eq!(ts, alloc::vec!["t1", "t2"]);
4142    }
4143
4144    #[test]
4145    fn parser_rejects_for_table_with_empty_list() {
4146        // `FOR TABLE` with nothing after is a parse error.
4147        let err = parse_statement("CREATE PUBLICATION p FOR TABLE")
4148            .expect_err("must error on empty list");
4149        // No specific message asserted — the call falls through to
4150        // expect_ident_like which yields "expected identifier, got …".
4151        assert!(!err.message.is_empty());
4152    }
4153
4154    #[test]
4155    fn parser_recognises_show_publications() {
4156        // v6.1.3 — SHOW PUBLICATIONS lands here. PUBLICATIONS is a
4157        // bare ident in this position, NOT a reserved keyword.
4158        let s = parse("SHOW PUBLICATIONS");
4159        assert!(matches!(s, Statement::ShowPublications));
4160    }
4161
4162    // ── v6.1.4: CREATE / DROP SUBSCRIPTION + SHOW SUBSCRIPTIONS ─
4163
4164    #[test]
4165    fn parser_recognises_create_subscription_single_publication() {
4166        let s = parse("CREATE SUBSCRIPTION sub_a CONNECTION 'host=127.0.0.1 port=20002' PUBLICATION pub_a");
4167        let Statement::CreateSubscription(c) = s else {
4168            panic!("expected CreateSubscription, got {s:?}")
4169        };
4170        assert_eq!(c.name, "sub_a");
4171        assert_eq!(c.conn_str, "host=127.0.0.1 port=20002");
4172        assert_eq!(c.publications, alloc::vec!["pub_a"]);
4173    }
4174
4175    #[test]
4176    fn parser_recognises_create_subscription_multi_publication() {
4177        let s = parse(
4178            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=h' PUBLICATION p1, p2, p3",
4179        );
4180        let Statement::CreateSubscription(c) = s else {
4181            panic!()
4182        };
4183        assert_eq!(c.publications, alloc::vec!["p1", "p2", "p3"]);
4184    }
4185
4186    #[test]
4187    fn parser_rejects_create_subscription_missing_connection() {
4188        let err = parse_statement("CREATE SUBSCRIPTION s PUBLICATION p")
4189            .expect_err("must error on missing CONNECTION");
4190        assert!(err.message.contains("CONNECTION"), "got: {}", err.message);
4191    }
4192
4193    #[test]
4194    fn parser_rejects_create_subscription_missing_publication() {
4195        let err = parse_statement("CREATE SUBSCRIPTION s CONNECTION 'host=x'")
4196            .expect_err("must error on missing PUBLICATION");
4197        assert!(err.message.contains("PUBLICATION"), "got: {}", err.message);
4198    }
4199
4200    #[test]
4201    fn parser_recognises_drop_subscription() {
4202        let s = parse("DROP SUBSCRIPTION sub_a");
4203        let Statement::DropSubscription(name) = s else {
4204            panic!("expected DropSubscription, got {s:?}")
4205        };
4206        assert_eq!(name, "sub_a");
4207    }
4208
4209    #[test]
4210    fn parser_recognises_show_subscriptions() {
4211        let s = parse("SHOW SUBSCRIPTIONS");
4212        assert!(matches!(s, Statement::ShowSubscriptions));
4213    }
4214
4215    #[test]
4216    fn parser_recognises_wait_for_wal_position_no_timeout() {
4217        let s = parse("WAIT FOR WAL POSITION 12345");
4218        let Statement::WaitForWalPosition { pos, timeout_ms } = s else {
4219            panic!("expected WaitForWalPosition, got {s:?}")
4220        };
4221        assert_eq!(pos, 12345);
4222        assert!(timeout_ms.is_none());
4223    }
4224
4225    #[test]
4226    fn parser_recognises_wait_for_wal_position_with_timeout() {
4227        let s = parse("WAIT FOR WAL POSITION 67890 WITH TIMEOUT 5000");
4228        let Statement::WaitForWalPosition { pos, timeout_ms } = s else {
4229            panic!()
4230        };
4231        assert_eq!(pos, 67890);
4232        assert_eq!(timeout_ms, Some(5000));
4233    }
4234
4235    #[test]
4236    fn parser_rejects_wait_with_negative_position() {
4237        // The lexer treats `-` as a token; `expect_u64_literal`
4238        // only sees the Integer that follows, so the negative
4239        // arrives as a unary-minus expression at higher levels.
4240        // Bare `WAIT FOR WAL POSITION -1` thus surfaces as a
4241        // parse error one way or another.
4242        let err = parse_statement("WAIT FOR WAL POSITION -1").unwrap_err();
4243        assert!(!err.message.is_empty());
4244    }
4245
4246    #[test]
4247    fn parser_recognises_bare_analyze() {
4248        let s = parse("ANALYZE");
4249        assert!(matches!(s, Statement::Analyze(None)));
4250    }
4251
4252    #[test]
4253    fn parser_recognises_analyze_with_table() {
4254        let s = parse("ANALYZE users");
4255        let Statement::Analyze(Some(name)) = s else {
4256            panic!("expected Analyze, got {s:?}")
4257        };
4258        assert_eq!(name, "users");
4259    }
4260
4261    #[test]
4262    fn parser_recognises_analyze_with_quoted_table() {
4263        let s = parse("ANALYZE \"Mixed Case\"");
4264        let Statement::Analyze(Some(name)) = s else {
4265            panic!()
4266        };
4267        assert_eq!(name, "Mixed Case");
4268    }
4269
4270    #[test]
4271    fn parser_rejects_analyze_with_garbage_token() {
4272        let err = parse_statement("ANALYZE 42").expect_err("must error");
4273        assert!(!err.message.is_empty());
4274    }
4275
4276    #[test]
4277    fn analyze_display_roundtrips() {
4278        for sql in ["ANALYZE", "ANALYZE users"] {
4279            let s = parse(sql);
4280            let printed = s.to_string();
4281            let again = parse_statement(&printed)
4282                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
4283            assert_eq!(s, again);
4284        }
4285    }
4286
4287    #[test]
4288    fn wait_for_display_roundtrips() {
4289        for sql in [
4290            "WAIT FOR WAL POSITION 12345",
4291            "WAIT FOR WAL POSITION 67890 WITH TIMEOUT 5000",
4292        ] {
4293            let s = parse(sql);
4294            let printed = s.to_string();
4295            let again = parse_statement(&printed)
4296                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
4297            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
4298        }
4299    }
4300
4301    #[test]
4302    fn subscription_ddl_display_roundtrips() {
4303        for sql in [
4304            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=h port=20002' PUBLICATION pub_a",
4305            "CREATE SUBSCRIPTION sub_b CONNECTION 'host=h' PUBLICATION p1, p2",
4306            "DROP SUBSCRIPTION sub_a",
4307            "SHOW SUBSCRIPTIONS",
4308        ] {
4309            let s = parse(sql);
4310            let printed = s.to_string();
4311            let again = parse_statement(&printed)
4312                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
4313            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
4314        }
4315    }
4316
4317    #[test]
4318    fn parser_drop_dispatches_user_vs_publication() {
4319        // Pre-v6.1.2 DROP USER took the bare-ident path; v6.1.2
4320        // tokenises DROP. Both targets must still parse.
4321        let s = parse("DROP USER 'alice'");
4322        let Statement::DropUser(name) = s else {
4323            panic!("expected DropUser, got {s:?}")
4324        };
4325        assert_eq!(name, "alice");
4326        // And DROP PUBLICATION lands the new variant.
4327        let s = parse("DROP PUBLICATION p1");
4328        assert!(matches!(s, Statement::DropPublication(_)));
4329    }
4330
4331    #[test]
4332    fn publication_ddl_display_roundtrips() {
4333        // Every CREATE PUBLICATION variant must Display → parse →
4334        // same AST. v6.1.3 covers all three scope shapes.
4335        for sql in [
4336            "CREATE PUBLICATION pub_a",
4337            "CREATE PUBLICATION pub_a FOR ALL TABLES",
4338            "CREATE PUBLICATION pub_a FOR TABLE t1, t2",
4339            "CREATE PUBLICATION pub_a FOR ALL TABLES EXCEPT t1",
4340            "DROP PUBLICATION pub_a",
4341            "SHOW PUBLICATIONS",
4342        ] {
4343            let s = parse(sql);
4344            let printed = s.to_string();
4345            let again = parse_statement(&printed)
4346                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
4347            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
4348        }
4349    }
4350}