Skip to main content

spg_sql/
parser.rs

1//! Recursive-descent parser with a Pratt (precedence-climbing) sub-parser for
2//! expressions.
3//!
4//! Precedence (lowest → highest binding):
5//! `OR` (1) `<` `AND` (2) `<` `NOT` unary (3) `<`
6//! comparisons `=` `<>` `<` `<=` `>` `>=` (4) `<`
7//! `+` `-` (5) `<` `*` `/` (6) `<` unary `-` (7) `<` parens / atom.
8//!
9//! This matches PG's behaviour for the operators we support — e.g. `NOT a = b`
10//! parses as `NOT (a = b)` and `-a * b` as `(-a) * b`.
11
12use alloc::boxed::Box;
13use alloc::format;
14use alloc::string::{String, ToString};
15use alloc::vec;
16use alloc::vec::Vec;
17use core::fmt;
18use core::mem;
19
20use crate::ast::{
21    BinOp, CastTarget, ColumnDef, ColumnName, ColumnTypeName, CreateIndexStatement,
22    CreatePublicationStatement, CreateSubscriptionStatement, CreateTableStatement, Expr,
23    ExtractField, FkAction, ForeignKeyConstraint, FrameBound, FrameKind, FromClause, FromJoin,
24    IndexMethod, InsertStatement, JoinKind, Literal, NullTreatment, OrderBy, PublicationScope,
25    SelectItem, SelectStatement, Statement, TableRef, UnOp, UnionKind, VecEncoding, WindowFrame,
26};
27use crate::lexer::{self, LexError, Token};
28
29/// v7.9.22 — recognise pgvector / SPG vector-index opclass names
30/// in CREATE INDEX. SPG's HNSW already routes by query operator;
31/// the opclass is accepted for `pg_dump` compatibility (mailrs
32/// migration follow-up G5).
33fn is_vector_opclass_name(name: &str) -> bool {
34    let lc = name.to_ascii_lowercase();
35    matches!(
36        lc.as_str(),
37        "vector_cosine_ops"
38            | "vector_l2_ops"
39            | "vector_ip_ops"
40            | "halfvec_cosine_ops"
41            | "halfvec_l2_ops"
42            | "halfvec_ip_ops"
43            | "sq8_cosine_ops"
44            | "sq8_l2_ops"
45            | "sq8_ip_ops"
46    )
47}
48
49#[derive(Debug, Clone, PartialEq, Eq)]
50pub struct ParseError {
51    pub message: String,
52    /// Index into the token stream where parsing tripped. Not a byte offset.
53    pub token_pos: usize,
54}
55
56impl fmt::Display for ParseError {
57    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
58        write!(
59            f,
60            "parse error at token #{}: {}",
61            self.token_pos, self.message
62        )
63    }
64}
65
66impl From<LexError> for ParseError {
67    fn from(e: LexError) -> Self {
68        Self {
69            message: format!("lex: {e}"),
70            token_pos: 0,
71        }
72    }
73}
74
75/// v7.9.30 — parse a single expression (no trailing junk). Used by
76/// the engine to re-hydrate stored partial-index / unique-index
77/// predicates from their canonical Display form. The same Pratt
78/// parser the statement path uses; this entry point just skips the
79/// statement dispatch.
80pub fn parse_expression(input: &str) -> Result<Expr, ParseError> {
81    let tokens = lexer::tokenize(input)?;
82    let mut p = Parser::new(tokens);
83    let expr = p.parse_expr(0)?;
84    p.expect_eof()?;
85    Ok(expr)
86}
87
88/// Parse exactly one statement, swallow an optional trailing `;`, and require
89/// the token stream to end there.
90pub fn parse_statement(input: &str) -> Result<Statement, ParseError> {
91    let tokens = lexer::tokenize(input)?;
92    let mut p = Parser::new(tokens);
93    let stmt = p.parse_one_statement()?;
94    if matches!(p.peek(), Token::Semicolon) {
95        p.advance();
96    }
97    p.expect_eof()?;
98    Ok(stmt)
99}
100
101struct Parser {
102    tokens: Vec<Token>,
103    pos: usize,
104}
105
106impl Parser {
107    fn new(tokens: Vec<Token>) -> Self {
108        Self { tokens, pos: 0 }
109    }
110
111    fn peek(&self) -> &Token {
112        // tokens always ends with Eof; pos is clamped in advance().
113        &self.tokens[self.pos]
114    }
115
116    fn advance(&mut self) -> Token {
117        let t = mem::replace(&mut self.tokens[self.pos], Token::Eof);
118        if self.pos + 1 < self.tokens.len() {
119            self.pos += 1;
120        }
121        t
122    }
123
124    fn err(&self, message: String) -> ParseError {
125        ParseError {
126            message,
127            token_pos: self.pos,
128        }
129    }
130
131    fn expect_eof(&self) -> Result<(), ParseError> {
132        if matches!(self.peek(), Token::Eof) {
133            Ok(())
134        } else {
135            Err(self.err(format!("expected end of input, got {:?}", self.peek())))
136        }
137    }
138
139    fn expect_ident_like(&mut self) -> Result<String, ParseError> {
140        match self.advance() {
141            Token::Ident(s) | Token::QuotedIdent(s) => Ok(s),
142            other => Err(ParseError {
143                message: format!("expected identifier, got {other:?}"),
144                token_pos: self.pos.saturating_sub(1),
145            }),
146        }
147    }
148
149    #[allow(clippy::too_many_lines)]
150    fn parse_one_statement(&mut self) -> Result<Statement, ParseError> {
151        match self.peek() {
152            Token::Select => self.parse_select_stmt(),
153            // v7.9.27 — `DO $$ … $$ [LANGUAGE plpgsql]`. PG-only;
154            // SPG has no PL/pgSQL so the body is consumed (lexer
155            // already turned it into a Token::String) and the whole
156            // DO statement returns CommandOk no-op. mailrs H1 +
157            // pg_dump compat.
158            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("do") => {
159                self.advance();
160                // Body — single string token (dollar-quoted or
161                // ordinary).
162                match self.advance() {
163                    Token::String(_) => {}
164                    other => {
165                        return Err(self.err(alloc::format!(
166                            "expected dollar-quoted body after DO, got {other:?}"
167                        )));
168                    }
169                }
170                // Optional `LANGUAGE <name>` trailer (idents only).
171                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("language")) {
172                    self.advance();
173                    let _ = self.expect_ident_like()?;
174                }
175                Ok(Statement::DoBlock)
176            }
177            // v4.11: `WITH name AS (SELECT ...) [, ...] SELECT ...`.
178            // WITH isn't a reserved token in our lexer — comes through
179            // as `Token::Ident("with")` (case-insensitive).
180            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("with") => {
181                self.advance();
182                self.parse_with_cte_then_select()
183            }
184            // v4.26: `EXPLAIN [ANALYZE] <select>`. Comes through as
185            // an identifier — not a reserved keyword.
186            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("explain") => {
187                self.advance();
188                let mut analyze = false;
189                let mut suggest = false;
190                // v6.8.3 — `EXPLAIN (SUGGEST)` opt-in.
191                if matches!(self.peek(), Token::LParen) {
192                    self.advance();
193                    let opt = match self.peek().clone() {
194                        Token::Ident(s) | Token::QuotedIdent(s) => s,
195                        other => {
196                            return Err(self.err(format!(
197                                "expected option keyword inside EXPLAIN (…), got {other:?}"
198                            )));
199                        }
200                    };
201                    if !opt.eq_ignore_ascii_case("suggest") {
202                        return Err(self.err(format!(
203                            "unknown EXPLAIN option {opt:?}; v6.8.3 supports SUGGEST"
204                        )));
205                    }
206                    self.advance();
207                    if !matches!(self.peek(), Token::RParen) {
208                        return Err(self.err(format!(
209                            "expected ')' after EXPLAIN option, got {:?}",
210                            self.peek()
211                        )));
212                    }
213                    self.advance();
214                    suggest = true;
215                } else if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
216                    && (s.eq_ignore_ascii_case("analyze") || s.eq_ignore_ascii_case("analyse"))
217                {
218                    self.advance();
219                    analyze = true;
220                }
221                let inner = self.parse_select_stmt()?;
222                let Statement::Select(s) = inner else {
223                    return Err(self.err(format!("EXPLAIN body must be a SELECT, got {inner:?}")));
224                };
225                Ok(Statement::Explain(crate::ast::ExplainStatement {
226                    analyze,
227                    inner: Box::new(s),
228                    suggest,
229                }))
230            }
231            Token::Create => self.parse_create_stmt(),
232            Token::Insert => self.parse_insert_stmt(),
233            Token::Begin => {
234                self.advance();
235                Ok(Statement::Begin)
236            }
237            Token::Commit => {
238                self.advance();
239                Ok(Statement::Commit)
240            }
241            Token::Rollback => {
242                self.advance();
243                // `ROLLBACK TO [SAVEPOINT] <name>` returns to that
244                // savepoint without ending the transaction. Bare
245                // `ROLLBACK` drops the whole TX.
246                if matches!(self.peek(), Token::To) {
247                    self.advance();
248                    if matches!(self.peek(), Token::Savepoint) {
249                        self.advance();
250                    }
251                    let name = self.expect_ident_like()?;
252                    Ok(Statement::RollbackToSavepoint(name))
253                } else {
254                    Ok(Statement::Rollback)
255                }
256            }
257            Token::Savepoint => {
258                self.advance();
259                let name = self.expect_ident_like()?;
260                Ok(Statement::Savepoint(name))
261            }
262            Token::Release => {
263                self.advance();
264                // `RELEASE [SAVEPOINT] <name>` — the `SAVEPOINT` keyword
265                // is optional in standard SQL.
266                if matches!(self.peek(), Token::Savepoint) {
267                    self.advance();
268                }
269                let name = self.expect_ident_like()?;
270                Ok(Statement::ReleaseSavepoint(name))
271            }
272            Token::Show => {
273                self.advance();
274                // `SHOW TABLES` / `SHOW USERS` / `SHOW COLUMNS FROM <table>`.
275                // v6.1.2 promoted TABLES to a reserved keyword (for
276                // `CREATE PUBLICATION … FOR ALL TABLES`), so it now
277                // arrives as `Token::Tables` rather than a bare ident.
278                // USERS / COLUMNS remain bare idents.
279                let target = match self.advance() {
280                    Token::Tables => "tables".to_string(),
281                    Token::Ident(s) | Token::QuotedIdent(s) => s.to_ascii_lowercase(),
282                    other => {
283                        return Err(self.err(format!(
284                            "expected SHOW target, got {other:?}"
285                        )));
286                    }
287                };
288                match target.as_str() {
289                    "tables" => Ok(Statement::ShowTables),
290                    "users" => Ok(Statement::ShowUsers),
291                    // v6.1.3 — PUBLICATIONS plural is NOT a reserved
292                    // keyword on its own; it lands here as a bare
293                    // ident. Returning all publications + their
294                    // scope summary.
295                    "publications" => Ok(Statement::ShowPublications),
296                    // v6.1.4 — same shape for SUBSCRIPTIONS plural.
297                    "subscriptions" => Ok(Statement::ShowSubscriptions),
298                    "columns" => {
299                        if !matches!(self.peek(), Token::From) {
300                            return Err(self.err(format!(
301                                "expected FROM after SHOW COLUMNS, got {:?}",
302                                self.peek()
303                            )));
304                        }
305                        self.advance();
306                        let table = self.expect_ident_like()?;
307                        Ok(Statement::ShowColumns(table))
308                    }
309                    other => Err(self.err(format!(
310                        "unknown SHOW target {other:?}; supported: TABLES, COLUMNS, USERS, PUBLICATIONS"
311                    ))),
312                }
313            }
314            // v6.1.2: `DROP` is now a reserved keyword (it dispatches
315            // to DROP USER and DROP PUBLICATION today; DROP TABLE /
316            // DROP INDEX are still SHOW-shaped admin ops). Pre-6.1.2
317            // arrived as a bare ident; tokenising it dedicatedly
318            // keeps the dispatch tree small.
319            Token::Drop => {
320                self.advance();
321                match self.peek() {
322                    Token::Publication => {
323                        self.advance();
324                        let name = self.expect_ident_or_string()?;
325                        Ok(Statement::DropPublication(name))
326                    }
327                    Token::Subscription => {
328                        self.advance();
329                        let name = self.expect_ident_or_string()?;
330                        Ok(Statement::DropSubscription(name))
331                    }
332                    Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("user") => {
333                        self.advance();
334                        let name = self.expect_ident_or_string()?;
335                        Ok(Statement::DropUser(name))
336                    }
337                    other => Err(self.err(format!(
338                        "expected USER / PUBLICATION / SUBSCRIPTION after DROP, got {other:?}"
339                    ))),
340                }
341            }
342            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("update") => {
343                self.advance();
344                self.parse_update_after_keyword()
345            }
346            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("delete") => {
347                self.advance();
348                self.parse_delete_after_keyword()
349            }
350            // v6.0.4: ALTER INDEX <name> REBUILD [WITH (encoding = ...)].
351            // ALTER is not a reserved keyword in the lexer — handled
352            // as a bare ident here.
353            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("alter") => {
354                self.advance();
355                self.parse_alter_after_keyword()
356            }
357            // v6.1.7: WAIT FOR WAL POSITION <pos> [WITH TIMEOUT <ms>].
358            // WAIT / POSITION / TIMEOUT are bare idents — no lexer
359            // additions needed.
360            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("wait") => {
361                self.advance();
362                self.parse_wait_after_keyword()
363            }
364            // v6.2.0: ANALYZE [<table>]. ANALYZE is a bare ident.
365            // Bare ANALYZE → analyse every user table; ANALYZE
366            // <name> → re-stats one. The argument is an optional
367            // ident (or quoted ident); anything else is a parse
368            // error.
369            // v6.7.3 — `COMPACT COLD SEGMENTS`. No arguments, no
370            // `WHERE` filter (carved out per V6_7_DESIGN.md
371            // STABILITY). Lex order: identifier "compact" → "cold"
372            // → "segments". Anything else after `COMPACT` is a
373            // parse error.
374            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("compact") => {
375                self.advance();
376                let next = self.peek().clone();
377                let cold = match next {
378                    Token::Ident(s) | Token::QuotedIdent(s) => s,
379                    _ => {
380                        return Err(
381                            self.err(format!("expected COLD after COMPACT, got {:?}", self.peek()))
382                        );
383                    }
384                };
385                if !cold.eq_ignore_ascii_case("cold") {
386                    return Err(self.err(format!("expected COLD after COMPACT, got {cold:?}")));
387                }
388                self.advance();
389                let next = self.peek().clone();
390                let segments = match next {
391                    Token::Ident(s) | Token::QuotedIdent(s) => s,
392                    _ => {
393                        return Err(self.err(format!(
394                            "expected SEGMENTS after COMPACT COLD, got {:?}",
395                            self.peek()
396                        )));
397                    }
398                };
399                if !segments.eq_ignore_ascii_case("segments") {
400                    return Err(self.err(format!(
401                        "expected SEGMENTS after COMPACT COLD, got {segments:?}"
402                    )));
403                }
404                self.advance();
405                Ok(Statement::CompactColdSegments)
406            }
407            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("analyze") => {
408                self.advance();
409                let target = match self.peek() {
410                    Token::Eof | Token::Semicolon => None,
411                    Token::Ident(_) | Token::QuotedIdent(_) => {
412                        Some(self.expect_ident_like()?)
413                    }
414                    other => {
415                        return Err(self.err(format!(
416                            "expected table name or end of statement after ANALYZE, got {other:?}"
417                        )));
418                    }
419                };
420                Ok(Statement::Analyze(target))
421            }
422            other => Err(self.err(format!(
423                "expected SELECT / CREATE / DROP / INSERT / UPDATE / DELETE / ALTER / BEGIN / COMMIT / \
424                 ROLLBACK / SAVEPOINT / RELEASE / SHOW at start of statement, got {other:?}"
425            ))),
426        }
427    }
428
429    fn parse_create_stmt(&mut self) -> Result<Statement, ParseError> {
430        debug_assert!(matches!(self.peek(), Token::Create));
431        self.advance();
432        match self.peek() {
433            Token::Table => self.parse_create_table_stmt_after_create(),
434            Token::Index => self.parse_create_index_stmt_after_create(false),
435            // v7.9.29 — `CREATE UNIQUE INDEX … [WHERE pred]`.
436            // The `UNIQUE` modifier turns a partial index into a
437            // partial-uniqueness invariant (only rows matching the
438            // WHERE predicate are checked for duplicates). mailrs
439            // K1 (3 hits: email_templates default, calendar_events
440            // master, calendar_events instance).
441            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("unique") => {
442                self.advance();
443                if !matches!(self.peek(), Token::Index) {
444                    return Err(self.err(alloc::format!(
445                        "expected INDEX after CREATE UNIQUE, got {:?}",
446                        self.peek()
447                    )));
448                }
449                self.parse_create_index_stmt_after_create(true)
450            }
451            Token::Publication => {
452                self.advance();
453                self.parse_create_publication_after_keyword()
454            }
455            Token::Subscription => {
456                self.advance();
457                self.parse_create_subscription_after_keyword()
458            }
459            // v4.1: CREATE USER 'name' WITH PASSWORD 'pw' [ROLE 'role'].
460            // USER isn't a reserved keyword — we look for the bare
461            // identifier so the lexer doesn't have to grow a token.
462            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("user") => {
463                self.advance();
464                self.parse_create_user_after_keyword()
465            }
466            // v7.9.15 — `CREATE EXTENSION [IF NOT EXISTS] <name>
467            // [WITH SCHEMA …] [VERSION '…'] [CASCADE]` as a
468            // no-op. mailrs follow-up F3.
469            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("extension") => {
470                self.advance();
471                self.parse_create_extension_after_keyword()
472            }
473            other => Err(self.err(format!(
474                "expected TABLE / INDEX / USER / EXTENSION / PUBLICATION / SUBSCRIPTION after CREATE, got {other:?}"
475            ))),
476        }
477    }
478
479    /// v7.9.15 — accept and discard `CREATE EXTENSION` DDL.
480    /// SPG doesn't have a registry; pgvector / similar are
481    /// either builtin (VECTOR(N) ↔ pgvector) or n/a. Parsing
482    /// the syntax lets dual-target schemas keep the line.
483    fn parse_create_extension_after_keyword(&mut self) -> Result<Statement, ParseError> {
484        // Optional `IF NOT EXISTS`.
485        self.consume_if_not_exists();
486        let name = self.expect_ident_like()?;
487        // Drain optional WITH SCHEMA <ident> / VERSION '<v>' /
488        // CASCADE / FROM '<v>' clauses; we don't model them.
489        loop {
490            match self.peek() {
491                Token::Ident(s) if s.eq_ignore_ascii_case("with") => {
492                    self.advance();
493                    continue;
494                }
495                Token::Ident(s) if s.eq_ignore_ascii_case("schema") => {
496                    self.advance();
497                    let _ = self.expect_ident_like()?;
498                    continue;
499                }
500                Token::Ident(s) if s.eq_ignore_ascii_case("version") => {
501                    self.advance();
502                    // String or ident literal.
503                    let _ = self.advance();
504                    continue;
505                }
506                Token::Ident(s) if s.eq_ignore_ascii_case("from") => {
507                    self.advance();
508                    let _ = self.advance();
509                    continue;
510                }
511                Token::Ident(s) if s.eq_ignore_ascii_case("cascade") => {
512                    self.advance();
513                    continue;
514                }
515                _ => break,
516            }
517        }
518        Ok(Statement::CreateExtension(name))
519    }
520
521    /// v6.1.2 → v6.1.3 — `CREATE PUBLICATION <name>` body. Accepts:
522    ///   - (no clause) → implicit `FOR ALL TABLES`
523    ///   - `FOR ALL TABLES`
524    ///   - `FOR ALL TABLES EXCEPT t1, t2, …` (v6.1.3)
525    ///   - `FOR TABLE t1, t2, …` (v6.1.3) — `FOR TABLES …` also
526    ///     accepted (PG accepts both forms in PG 19).
527    fn parse_create_publication_after_keyword(&mut self) -> Result<Statement, ParseError> {
528        let name = self.expect_ident_or_string()?;
529        // Bare DDL maps to FOR ALL TABLES — matches the v6.1.2
530        // shape so existing publications keep parsing identically.
531        let scope = if matches!(self.peek(), Token::For) {
532            self.advance();
533            if matches!(self.peek(), Token::All) {
534                self.advance();
535                if !matches!(self.peek(), Token::Tables) {
536                    return Err(self.err(format!(
537                        "expected TABLES after FOR ALL, got {:?}",
538                        self.peek()
539                    )));
540                }
541                self.advance();
542                if matches!(self.peek(), Token::Except) {
543                    self.advance();
544                    let tables = self.parse_publication_table_list()?;
545                    PublicationScope::AllTablesExcept(tables)
546                } else {
547                    PublicationScope::AllTables
548                }
549            } else if matches!(self.peek(), Token::Table | Token::Tables) {
550                // PG 19 accepts both `FOR TABLE …` (singular) and
551                // `FOR TABLES …` (plural); SPG matches.
552                self.advance();
553                let tables = self.parse_publication_table_list()?;
554                PublicationScope::ForTables(tables)
555            } else {
556                return Err(self.err(format!(
557                    "expected ALL TABLES or TABLE <list> after FOR, got {:?}",
558                    self.peek()
559                )));
560            }
561        } else {
562            PublicationScope::AllTables
563        };
564        Ok(Statement::CreatePublication(CreatePublicationStatement {
565            name,
566            scope,
567        }))
568    }
569
570    /// v6.1.3 — Comma-separated identifier list for the publication
571    /// FOR-clause. Requires at least one entry; empty list is a
572    /// parse error (PG behaviour). Quoted idents are accepted; the
573    /// names round-trip through `Display` as `quote_ident(name)`.
574    fn parse_publication_table_list(&mut self) -> Result<Vec<String>, ParseError> {
575        let first = self.expect_ident_like()?;
576        let mut out = alloc::vec![first];
577        while matches!(self.peek(), Token::Comma) {
578            self.advance();
579            out.push(self.expect_ident_like()?);
580        }
581        Ok(out)
582    }
583
584    /// v6.1.4 — `CREATE SUBSCRIPTION <name>
585    ///                 CONNECTION '<conn>'
586    ///                 PUBLICATION <pub> [, <pub> ...]`.
587    ///
588    /// The clause order is fixed (CONNECTION first, then
589    /// PUBLICATION) to match PG. No WITH-options accepted in
590    /// v6.1.4 — `enabled` defaults to true, no other knobs ship.
591    fn parse_create_subscription_after_keyword(&mut self) -> Result<Statement, ParseError> {
592        let name = self.expect_ident_or_string()?;
593        if !matches!(self.peek(), Token::Connection) {
594            return Err(self.err(format!(
595                "expected CONNECTION after CREATE SUBSCRIPTION <name>, got {:?}",
596                self.peek()
597            )));
598        }
599        self.advance();
600        let conn_str = self.expect_string_literal()?;
601        if !matches!(self.peek(), Token::Publication) {
602            return Err(self.err(format!(
603                "expected PUBLICATION after CONNECTION '<conn>', got {:?}",
604                self.peek()
605            )));
606        }
607        self.advance();
608        // Reuse the publication FOR-list parser shape: at least one
609        // identifier, comma-separated.
610        let first = self.expect_ident_like()?;
611        let mut publications = alloc::vec![first];
612        while matches!(self.peek(), Token::Comma) {
613            self.advance();
614            publications.push(self.expect_ident_like()?);
615        }
616        Ok(Statement::CreateSubscription(CreateSubscriptionStatement {
617            name,
618            conn_str,
619            publications,
620        }))
621    }
622
623    /// v6.1.7 — `WAIT FOR WAL POSITION <pos> [WITH TIMEOUT <ms>]`.
624    /// All keywords after `WAIT` are bare idents in v6.1.x; no
625    /// lexer churn. Both `<pos>` and `<ms>` are positive integers
626    /// that fit `u64`.
627    fn parse_wait_after_keyword(&mut self) -> Result<Statement, ParseError> {
628        // FOR is a v6.1.2-reserved keyword (Token::For). The
629        // other two are bare idents — they've never needed lexer
630        // support and we keep it that way.
631        if !matches!(self.peek(), Token::For) {
632            return Err(self.err(format!("expected FOR after WAIT, got {:?}", self.peek())));
633        }
634        self.advance();
635        self.expect_keyword_ident("wal")?;
636        self.expect_keyword_ident("position")?;
637        let pos = self.expect_u64_literal()?;
638        let timeout_ms = if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("with"))
639        {
640            self.advance();
641            self.expect_keyword_ident("timeout")?;
642            Some(self.expect_u64_literal()?)
643        } else {
644            None
645        };
646        Ok(Statement::WaitForWalPosition { pos, timeout_ms })
647    }
648
649    /// v6.1.7 helper — consume a `Token::Integer` and check it
650    /// fits `u64`. WAL positions and millisecond timeouts are
651    /// non-negative.
652    fn expect_u64_literal(&mut self) -> Result<u64, ParseError> {
653        match self.advance() {
654            Token::Integer(n) if n >= 0 => Ok(n as u64),
655            Token::Integer(n) => Err(ParseError {
656                message: format!("expected non-negative integer, got {n}"),
657                token_pos: self.pos.saturating_sub(1),
658            }),
659            other => Err(ParseError {
660                message: format!("expected integer literal, got {other:?}"),
661                token_pos: self.pos.saturating_sub(1),
662            }),
663        }
664    }
665
666    /// `CREATE USER` body — name + WITH PASSWORD '<pw>' + optional
667    /// ROLE '<role>' (defaults to readonly). All string slots accept
668    /// either a quoted ident or a quoted string literal.
669    fn parse_create_user_after_keyword(&mut self) -> Result<Statement, ParseError> {
670        let name = self.expect_ident_or_string()?;
671        self.expect_keyword_ident("with")?;
672        self.expect_keyword_ident("password")?;
673        let password = self.expect_string_literal()?;
674        let role = if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
675            && s.eq_ignore_ascii_case("role")
676        {
677            self.advance();
678            self.expect_string_literal()?
679        } else {
680            "readonly".to_string()
681        };
682        Ok(Statement::CreateUser(crate::ast::CreateUserStatement {
683            name,
684            password,
685            role,
686        }))
687    }
688
689    /// v4.4 `UPDATE <table> SET col = expr [, col = expr]* [WHERE cond]`.
690    /// Caller already consumed the leading `UPDATE` ident.
691    fn parse_update_after_keyword(&mut self) -> Result<Statement, ParseError> {
692        let table = self.expect_ident_like()?;
693        self.expect_keyword_ident("set")?;
694        let mut assignments = Vec::new();
695        loop {
696            let col = self.expect_ident_like()?;
697            if !matches!(self.peek(), Token::Eq) {
698                return Err(self.err(format!(
699                    "expected `=` after column name in UPDATE SET, got {:?}",
700                    self.peek()
701                )));
702            }
703            self.advance();
704            let value = self.parse_expr(0)?;
705            assignments.push((col, value));
706            if matches!(self.peek(), Token::Comma) {
707                self.advance();
708                continue;
709            }
710            break;
711        }
712        let where_ = if matches!(self.peek(), Token::Where) {
713            self.advance();
714            Some(self.parse_expr(0)?)
715        } else {
716            None
717        };
718        let returning = self.parse_optional_returning()?;
719        Ok(Statement::Update(crate::ast::UpdateStatement {
720            table,
721            assignments,
722            where_,
723            returning,
724        }))
725    }
726
727    /// v4.4 `DELETE FROM <table> [WHERE cond]`. Caller already consumed
728    /// the leading `DELETE` ident.
729    fn parse_delete_after_keyword(&mut self) -> Result<Statement, ParseError> {
730        if !matches!(self.peek(), Token::From) {
731            return Err(self.err(format!("expected FROM after DELETE, got {:?}", self.peek())));
732        }
733        self.advance();
734        let table = self.expect_ident_like()?;
735        let where_ = if matches!(self.peek(), Token::Where) {
736            self.advance();
737            Some(self.parse_expr(0)?)
738        } else {
739            None
740        };
741        let returning = self.parse_optional_returning()?;
742        Ok(Statement::Delete(crate::ast::DeleteStatement {
743            table,
744            where_,
745            returning,
746        }))
747    }
748
749    /// v7.9.4 — parse the optional trailing `RETURNING <projection>`
750    /// clause on INSERT / UPDATE / DELETE. Same projection grammar
751    /// as SELECT, so `RETURNING *`, `RETURNING col`,
752    /// `RETURNING expr AS alias`, and `RETURNING a, b, c` all work.
753    fn parse_optional_returning(
754        &mut self,
755    ) -> Result<Option<Vec<crate::ast::SelectItem>>, ParseError> {
756        let is_returning_kw = matches!(
757            self.peek(),
758            Token::Ident(s) if s.eq_ignore_ascii_case("returning")
759        );
760        if !is_returning_kw {
761            return Ok(None);
762        }
763        self.advance();
764        let mut items = Vec::new();
765        loop {
766            items.push(self.parse_select_item()?);
767            if matches!(self.peek(), Token::Comma) {
768                self.advance();
769                continue;
770            }
771            break;
772        }
773        Ok(Some(items))
774    }
775
776    /// v6.0.4 — parse the tail of an ALTER statement after the
777    /// leading `ALTER` keyword has been consumed. Only one form is
778    /// supported in v6.0.4:
779    ///
780    /// ```text
781    /// ALTER INDEX <name> REBUILD [WITH (encoding = <enc>)]
782    /// ```
783    fn parse_alter_after_keyword(&mut self) -> Result<Statement, ParseError> {
784        // ALTER INDEX <name> ... | ALTER TABLE <name> SET hot_tier_bytes = <n>
785        match self.advance() {
786            Token::Index => {}
787            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("index") => {}
788            // v6.7.2 — ALTER TABLE t SET hot_tier_bytes = X
789            Token::Table => return self.parse_alter_table_after_keyword(),
790            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("table") => {
791                return self.parse_alter_table_after_keyword();
792            }
793            other => {
794                return Err(self.err(format!(
795                    "expected INDEX or TABLE after ALTER, got {other:?}"
796                )));
797            }
798        }
799        let name = self.expect_ident_like()?;
800        // REBUILD
801        self.expect_keyword_ident("rebuild")?;
802        // Optional: WITH (encoding = <enc>)
803        let encoding = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("with")) {
804            self.advance();
805            if !matches!(self.peek(), Token::LParen) {
806                return Err(self.err(format!(
807                    "expected '(' after WITH in ALTER INDEX REBUILD, got {:?}",
808                    self.peek()
809                )));
810            }
811            self.advance();
812            self.expect_keyword_ident("encoding")?;
813            if !matches!(self.peek(), Token::Eq) {
814                return Err(self.err(format!(
815                    "expected '=' after encoding in ALTER INDEX REBUILD, got {:?}",
816                    self.peek()
817                )));
818            }
819            self.advance();
820            let enc_ident = match self.advance() {
821                Token::Ident(s) | Token::QuotedIdent(s) => s,
822                other => {
823                    return Err(self.err(format!("expected encoding name after =, got {other:?}")));
824                }
825            };
826            let enc = match enc_ident.to_ascii_lowercase().as_str() {
827                "f32" => VecEncoding::F32,
828                "sq8" => VecEncoding::Sq8,
829                "half" => VecEncoding::F16,
830                other => {
831                    return Err(self.err(format!(
832                        "unknown vector encoding {other:?} in ALTER INDEX REBUILD; supported: F32, SQ8, HALF"
833                    )));
834                }
835            };
836            if !matches!(self.peek(), Token::RParen) {
837                return Err(self.err(format!(
838                    "expected ')' after encoding value, got {:?}",
839                    self.peek()
840                )));
841            }
842            self.advance();
843            Some(enc)
844        } else {
845            None
846        };
847        Ok(Statement::AlterIndex(crate::ast::AlterIndexStatement {
848            name,
849            target: crate::ast::AlterIndexTarget::Rebuild { encoding },
850        }))
851    }
852
853    /// v6.7.2 — `ALTER TABLE <name> SET hot_tier_bytes = <n>`. The
854    /// only `SET` form currently supported; future v6.7.x can add
855    /// more SET subjects without changing the dispatch shape.
856    fn parse_alter_table_after_keyword(&mut self) -> Result<Statement, ParseError> {
857        let table_name = self.expect_ident_like()?;
858        // v7.6.8 — dispatch on the next keyword: SET / ADD / DROP.
859        // SET kept identical to v6.7.x. ADD / DROP CONSTRAINT routes
860        // to FK installation / removal.
861        match self.peek() {
862            Token::Ident(s) if s.eq_ignore_ascii_case("set") => {
863                self.advance();
864                let setting = self.expect_ident_like()?;
865                if !setting.eq_ignore_ascii_case("hot_tier_bytes") {
866                    return Err(self.err(alloc::format!(
867                        "ALTER TABLE SET: unknown setting {setting:?}; supported: hot_tier_bytes"
868                    )));
869                }
870                if !matches!(self.peek(), Token::Eq) {
871                    return Err(self.err(alloc::format!(
872                        "expected '=' after hot_tier_bytes, got {:?}",
873                        self.peek()
874                    )));
875                }
876                self.advance();
877                let n = self.expect_u64_literal()?;
878                Ok(Statement::AlterTable(crate::ast::AlterTableStatement {
879                    name: table_name,
880                    target: crate::ast::AlterTableTarget::SetHotTierBytes(n),
881                }))
882            }
883            Token::Ident(s) if s.eq_ignore_ascii_case("add") => {
884                self.advance();
885                // Optional `CONSTRAINT <name>` prefix, then the same
886                // FK clause shape as table-level CREATE TABLE FK.
887                let fk = self.parse_table_level_fk()?;
888                Ok(Statement::AlterTable(crate::ast::AlterTableStatement {
889                    name: table_name,
890                    target: crate::ast::AlterTableTarget::AddForeignKey(fk),
891                }))
892            }
893            Token::Drop => {
894                self.advance();
895                match self.advance() {
896                    Token::Ident(s) if s.eq_ignore_ascii_case("constraint") => {}
897                    other => {
898                        return Err(self.err(alloc::format!(
899                            "expected CONSTRAINT after DROP in ALTER TABLE, got {other:?}"
900                        )));
901                    }
902                }
903                let cname = self.expect_ident_like()?;
904                Ok(Statement::AlterTable(crate::ast::AlterTableStatement {
905                    name: table_name,
906                    target: crate::ast::AlterTableTarget::DropForeignKey(cname),
907                }))
908            }
909            other => Err(self.err(alloc::format!(
910                "expected SET / ADD / DROP in ALTER TABLE, got {other:?}"
911            ))),
912        }
913    }
914
915    /// Consume a bare ident if its lowercase matches `kw`, else err.
916    fn expect_keyword_ident(&mut self, kw: &str) -> Result<(), ParseError> {
917        match self.advance() {
918            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case(kw) => Ok(()),
919            other => Err(ParseError {
920                message: format!("expected {kw:?}, got {other:?}"),
921                token_pos: self.pos.saturating_sub(1),
922            }),
923        }
924    }
925
926    /// Accept either a quoted identifier (`"foo"`) or a quoted string
927    /// literal (`'foo'`) — same shape used by CREATE USER for the
928    /// username slot.
929    fn expect_ident_or_string(&mut self) -> Result<String, ParseError> {
930        match self.advance() {
931            Token::Ident(s) | Token::QuotedIdent(s) | Token::String(s) => Ok(s),
932            other => Err(ParseError {
933                message: format!("expected identifier or string, got {other:?}"),
934                token_pos: self.pos.saturating_sub(1),
935            }),
936        }
937    }
938
939    fn expect_string_literal(&mut self) -> Result<String, ParseError> {
940        match self.advance() {
941            Token::String(s) => Ok(s),
942            other => Err(ParseError {
943                message: format!("expected quoted string, got {other:?}"),
944                token_pos: self.pos.saturating_sub(1),
945            }),
946        }
947    }
948
949    fn parse_select_stmt(&mut self) -> Result<Statement, ParseError> {
950        // Caller dispatches on Token::Select; the inner helper handles
951        // the rest. ORDER BY / LIMIT bind at this top level; UNION peers
952        // get a fresh bare-select parse and may not have their own ORDER
953        // BY / LIMIT.
954        let mut head = self.parse_bare_select()?;
955        while matches!(self.peek(), Token::Union) {
956            self.advance();
957            let kind = if matches!(self.peek(), Token::All) {
958                self.advance();
959                UnionKind::All
960            } else {
961                UnionKind::Distinct
962            };
963            let peer = self.parse_bare_select()?;
964            head.unions.push((kind, peer));
965        }
966        head.order_by = if matches!(self.peek(), Token::Order) {
967            self.advance();
968            if !matches!(self.peek(), Token::By) {
969                return Err(self.err(format!("expected BY after ORDER, got {:?}", self.peek())));
970            }
971            self.advance();
972            // v6.4.0 — multi-key ORDER BY. Loop over comma-separated
973            // `<expr> [ASC|DESC]` items.
974            let mut keys = Vec::new();
975            loop {
976                let expr = self.parse_expr(0)?;
977                let desc = if matches!(self.peek(), Token::Desc) {
978                    self.advance();
979                    true
980                } else if matches!(self.peek(), Token::Asc) {
981                    self.advance();
982                    false
983                } else {
984                    false
985                };
986                keys.push(OrderBy { expr, desc });
987                if matches!(self.peek(), Token::Comma) {
988                    self.advance();
989                } else {
990                    break;
991                }
992            }
993            keys
994        } else {
995            Vec::new()
996        };
997        head.limit = if matches!(self.peek(), Token::Limit) {
998            self.advance();
999            Some(self.parse_limit_expr("LIMIT")?)
1000        } else {
1001            None
1002        };
1003        head.offset = if matches!(self.peek(), Token::Offset) {
1004            self.advance();
1005            Some(self.parse_limit_expr("OFFSET")?)
1006        } else {
1007            None
1008        };
1009        Ok(Statement::Select(head))
1010    }
1011
1012    /// v7.9.24 — accept `LIMIT <int>` or `LIMIT $N`. mailrs H2.
1013    /// Bind value gets resolved during prepared-statement Execute;
1014    /// the Pratt expression parser would over-accept here (e.g.
1015    /// `LIMIT 5 + 5`), so we narrowly accept only the two PG forms.
1016    fn parse_limit_expr(&mut self, label: &str) -> Result<crate::ast::LimitExpr, ParseError> {
1017        match self.advance() {
1018            Token::Integer(n) if n >= 0 => u32::try_from(n)
1019                .map(crate::ast::LimitExpr::Literal)
1020                .map_err(|_| ParseError {
1021                    message: alloc::format!("{label} value too large: {n}"),
1022                    token_pos: self.pos.saturating_sub(1),
1023                }),
1024            Token::Placeholder(n) => Ok(crate::ast::LimitExpr::Placeholder(n)),
1025            other => Err(ParseError {
1026                message: alloc::format!(
1027                    "expected non-negative integer or $N placeholder after {label}, got {other:?}"
1028                ),
1029                token_pos: self.pos.saturating_sub(1),
1030            }),
1031        }
1032    }
1033
1034    /// Parse one SELECT block without ORDER BY / LIMIT / UNION chaining —
1035    /// just `[DISTINCT] items [FROM] [WHERE] [GROUP BY]`. Returned with
1036    /// `unions` empty and `order_by` / `limit` `None`; the top-level
1037    /// `parse_select_stmt` is responsible for filling those in.
1038    fn parse_bare_select(&mut self) -> Result<SelectStatement, ParseError> {
1039        if !matches!(self.peek(), Token::Select) {
1040            return Err(self.err(format!(
1041                "expected SELECT to start a query block, got {:?}",
1042                self.peek()
1043            )));
1044        }
1045        self.advance();
1046        let distinct = if matches!(self.peek(), Token::Distinct) {
1047            self.advance();
1048            true
1049        } else {
1050            false
1051        };
1052        let items = self.parse_select_list()?;
1053        let from = if matches!(self.peek(), Token::From) {
1054            self.advance();
1055            Some(self.parse_from_clause()?)
1056        } else {
1057            None
1058        };
1059        let where_ = if matches!(self.peek(), Token::Where) {
1060            self.advance();
1061            Some(self.parse_expr(0)?)
1062        } else {
1063            None
1064        };
1065        let mut group_by_all = false;
1066        let group_by = if matches!(self.peek(), Token::Group) {
1067            self.advance();
1068            if !matches!(self.peek(), Token::By) {
1069                return Err(self.err(format!("expected BY after GROUP, got {:?}", self.peek())));
1070            }
1071            self.advance();
1072            // v6.4.1 — `GROUP BY ALL` shortcut. Planner expands to
1073            // every non-aggregate SELECT-list item later.
1074            if matches!(self.peek(), Token::All) {
1075                self.advance();
1076                group_by_all = true;
1077                None
1078            } else {
1079                let mut groups = Vec::new();
1080                loop {
1081                    groups.push(self.parse_expr(0)?);
1082                    if matches!(self.peek(), Token::Comma) {
1083                        self.advance();
1084                    } else {
1085                        break;
1086                    }
1087                }
1088                Some(groups)
1089            }
1090        } else {
1091            None
1092        };
1093        let having = if matches!(self.peek(), Token::Having) {
1094            self.advance();
1095            Some(self.parse_expr(0)?)
1096        } else {
1097            None
1098        };
1099        Ok(SelectStatement {
1100            ctes: Vec::new(),
1101            distinct,
1102            items,
1103            from,
1104            where_,
1105            group_by,
1106            group_by_all,
1107            having,
1108            unions: Vec::new(),
1109            order_by: Vec::new(),
1110            limit: None,
1111            offset: None,
1112        })
1113    }
1114
1115    fn parse_create_table_stmt_after_create(&mut self) -> Result<Statement, ParseError> {
1116        // Caller already consumed CREATE; we're sitting on TABLE.
1117        debug_assert!(matches!(self.peek(), Token::Table));
1118        self.advance();
1119        let if_not_exists = self.consume_if_not_exists();
1120        let name = self.expect_ident_like()?;
1121        if !matches!(self.peek(), Token::LParen) {
1122            return Err(self.err(format!(
1123                "expected '(' after table name, got {:?}",
1124                self.peek()
1125            )));
1126        }
1127        self.advance();
1128        let mut columns = Vec::new();
1129        let mut foreign_keys: Vec<ForeignKeyConstraint> = Vec::new();
1130        let mut table_constraints: Vec<crate::ast::TableConstraint> = Vec::new();
1131        loop {
1132            // v7.6.0 / v7.9.18 — distinguish table-level constraint
1133            // clauses from column definitions. Constraints start
1134            // with `CONSTRAINT <name> …`, `FOREIGN KEY (…)`,
1135            // `PRIMARY KEY (…)`, or `UNIQUE (…)`. Anything else is
1136            // a column.
1137            if self.peek_table_level_pk_start() {
1138                table_constraints.push(self.parse_table_level_primary_key()?);
1139            } else if self.peek_table_level_unique_start() {
1140                table_constraints.push(self.parse_table_level_unique()?);
1141            } else if self.peek_constraint_or_fk_start() {
1142                foreign_keys.push(self.parse_table_level_fk()?);
1143            } else {
1144                let (col, col_level_fk) = self.parse_column_def_with_fk()?;
1145                columns.push(col);
1146                if let Some(fk) = col_level_fk {
1147                    foreign_keys.push(fk);
1148                }
1149            }
1150            match self.peek() {
1151                Token::Comma => {
1152                    self.advance();
1153                }
1154                Token::RParen => {
1155                    self.advance();
1156                    break;
1157                }
1158                other => {
1159                    return Err(
1160                        self.err(format!("expected ',' or ')' in column list, got {other:?}"))
1161                    );
1162                }
1163            }
1164        }
1165        if columns.is_empty() {
1166            return Err(self.err("CREATE TABLE requires at least one column".into()));
1167        }
1168        Ok(Statement::CreateTable(CreateTableStatement {
1169            name,
1170            columns,
1171            if_not_exists,
1172            foreign_keys,
1173            table_constraints,
1174        }))
1175    }
1176
1177    /// v7.9.18 — true when the next tokens are `PRIMARY KEY (…)`.
1178    /// PRIMARY and KEY are bare idents; we look-ahead 2 to be
1179    /// sure (otherwise a column literally named `primary` would
1180    /// be mistaken).
1181    fn peek_table_level_pk_start(&self) -> bool {
1182        let cur = self.peek();
1183        let nxt = self.tokens.get(self.pos + 1);
1184        let nxt2 = self.tokens.get(self.pos + 2);
1185        let is_primary = matches!(cur, Token::Ident(s) if s.eq_ignore_ascii_case("primary"));
1186        let is_key = matches!(nxt, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("key"));
1187        let is_lparen = matches!(nxt2, Some(Token::LParen));
1188        is_primary && is_key && is_lparen
1189    }
1190
1191    /// v7.9.18 — true when the next tokens are `UNIQUE (…)`.
1192    fn peek_table_level_unique_start(&self) -> bool {
1193        let cur = self.peek();
1194        let nxt = self.tokens.get(self.pos + 1);
1195        let is_unique = matches!(cur, Token::Ident(s) if s.eq_ignore_ascii_case("unique"));
1196        let is_lparen = matches!(nxt, Some(Token::LParen));
1197        is_unique && is_lparen
1198    }
1199
1200    fn parse_table_level_primary_key(&mut self) -> Result<crate::ast::TableConstraint, ParseError> {
1201        self.advance(); // PRIMARY
1202        self.advance(); // KEY
1203        let columns = self.parse_paren_ident_list("PRIMARY KEY")?;
1204        Ok(crate::ast::TableConstraint::PrimaryKey {
1205            name: None,
1206            columns,
1207        })
1208    }
1209
1210    fn parse_table_level_unique(&mut self) -> Result<crate::ast::TableConstraint, ParseError> {
1211        self.advance(); // UNIQUE
1212        let columns = self.parse_paren_ident_list("UNIQUE")?;
1213        Ok(crate::ast::TableConstraint::Unique {
1214            name: None,
1215            columns,
1216        })
1217    }
1218
1219    fn parse_paren_ident_list(&mut self, ctx: &str) -> Result<Vec<String>, ParseError> {
1220        if !matches!(self.peek(), Token::LParen) {
1221            return Err(self.err(alloc::format!(
1222                "expected '(' after {ctx}, got {:?}",
1223                self.peek()
1224            )));
1225        }
1226        self.advance();
1227        let mut out = Vec::new();
1228        loop {
1229            out.push(self.expect_ident_like()?);
1230            match self.peek() {
1231                Token::Comma => {
1232                    self.advance();
1233                }
1234                Token::RParen => {
1235                    self.advance();
1236                    break;
1237                }
1238                other => {
1239                    return Err(self.err(alloc::format!(
1240                        "expected ',' or ')' in {ctx} list, got {other:?}"
1241                    )));
1242                }
1243            }
1244        }
1245        if out.is_empty() {
1246            return Err(self.err(alloc::format!("{ctx} requires at least one column")));
1247        }
1248        Ok(out)
1249    }
1250
1251    /// v7.6.0 — true when the next tokens are `CONSTRAINT <name>
1252    /// FOREIGN KEY` or bare `FOREIGN KEY`. Both introduce a
1253    /// table-level FK; a column def never starts with either keyword
1254    /// (column names are not in this reserved set).
1255    fn peek_constraint_or_fk_start(&self) -> bool {
1256        let is_constraint_kw = matches!(
1257            self.peek(),
1258            Token::Ident(s) if s.eq_ignore_ascii_case("constraint")
1259        );
1260        let is_foreign_kw = matches!(
1261            self.peek(),
1262            Token::Ident(s) if s.eq_ignore_ascii_case("foreign")
1263        );
1264        is_constraint_kw || is_foreign_kw
1265    }
1266
1267    /// v7.6.0 — parse a table-level FK clause:
1268    /// `[CONSTRAINT <name>] FOREIGN KEY (<col>[,<col>]*) REFERENCES
1269    /// <tbl> [(<pcol>[,<pcol>]*)] [ON DELETE <action>] [ON UPDATE <action>]`.
1270    fn parse_table_level_fk(&mut self) -> Result<ForeignKeyConstraint, ParseError> {
1271        let mut name: Option<String> = None;
1272        if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("constraint")) {
1273            self.advance();
1274            name = Some(self.expect_ident_like()?);
1275        }
1276        // `FOREIGN`
1277        match self.advance() {
1278            Token::Ident(s) if s.eq_ignore_ascii_case("foreign") => {}
1279            other => return Err(self.err(format!("expected FOREIGN, got {other:?}"))),
1280        }
1281        // `KEY`
1282        match self.advance() {
1283            Token::Ident(s) if s.eq_ignore_ascii_case("key") => {}
1284            other => return Err(self.err(format!("expected KEY after FOREIGN, got {other:?}"))),
1285        }
1286        // `(col, col, ...)`
1287        if !matches!(self.peek(), Token::LParen) {
1288            return Err(self.err(format!(
1289                "expected '(' after FOREIGN KEY, got {:?}",
1290                self.peek()
1291            )));
1292        }
1293        self.advance();
1294        let mut columns = Vec::new();
1295        loop {
1296            columns.push(self.expect_ident_like()?);
1297            match self.peek() {
1298                Token::Comma => {
1299                    self.advance();
1300                }
1301                Token::RParen => {
1302                    self.advance();
1303                    break;
1304                }
1305                other => {
1306                    return Err(self.err(format!(
1307                        "expected ',' or ')' in FK column list, got {other:?}"
1308                    )));
1309                }
1310            }
1311        }
1312        if columns.is_empty() {
1313            return Err(self.err("FOREIGN KEY requires at least one column".into()));
1314        }
1315        let (parent_table, parent_columns, on_delete, on_update) =
1316            self.parse_references_tail(columns.len())?;
1317        Ok(ForeignKeyConstraint {
1318            name,
1319            columns,
1320            parent_table,
1321            parent_columns,
1322            on_delete,
1323            on_update,
1324        })
1325    }
1326
1327    /// v7.6.0 — parse the tail `REFERENCES <tbl> [(<pcol>...)] [ON
1328    /// DELETE <action>] [ON UPDATE <action>]`. `expected_arity` is
1329    /// the local column count, used to default the parent column
1330    /// list when omitted (SQL spec: parent's PK is implied).
1331    fn parse_references_tail(
1332        &mut self,
1333        expected_arity: usize,
1334    ) -> Result<(String, Vec<String>, FkAction, FkAction), ParseError> {
1335        match self.advance() {
1336            Token::Ident(s) if s.eq_ignore_ascii_case("references") => {}
1337            other => return Err(self.err(format!("expected REFERENCES, got {other:?}"))),
1338        }
1339        let parent_table = self.expect_ident_like()?;
1340        let mut parent_columns: Vec<String> = Vec::new();
1341        if matches!(self.peek(), Token::LParen) {
1342            self.advance();
1343            loop {
1344                parent_columns.push(self.expect_ident_like()?);
1345                match self.peek() {
1346                    Token::Comma => {
1347                        self.advance();
1348                    }
1349                    Token::RParen => {
1350                        self.advance();
1351                        break;
1352                    }
1353                    other => {
1354                        return Err(self.err(format!(
1355                            "expected ',' or ')' in REFERENCES column list, got {other:?}"
1356                        )));
1357                    }
1358                }
1359            }
1360        }
1361        if !parent_columns.is_empty() && parent_columns.len() != expected_arity {
1362            return Err(self.err(format!(
1363                "FK arity mismatch: {} local column(s) vs {} parent column(s)",
1364                expected_arity,
1365                parent_columns.len()
1366            )));
1367        }
1368        // v7.6.7 — accept and reject `[NOT] DEFERRABLE [INITIALLY
1369        // {DEFERRED | IMMEDIATE}]` so existing PG dumps don't fail
1370        // at parse time. SPG's single-writer model has no deferred
1371        // constraint window, so we surface this as a clean
1372        // unsupported-feature error rather than a syntax error.
1373        loop {
1374            if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("deferrable")) {
1375                return Err(self.err(
1376                    "DEFERRABLE constraints are not supported (SPG is single-writer; \
1377                     constraints are always evaluated immediately at commit)"
1378                        .into(),
1379                ));
1380            }
1381            if matches!(self.peek(), Token::Not) {
1382                let look = self.tokens.get(self.pos + 1);
1383                if matches!(look, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("deferrable")) {
1384                    // NOT DEFERRABLE — accept as the SPG default
1385                    // and consume both tokens silently.
1386                    self.advance();
1387                    self.advance();
1388                    // Optional `INITIALLY IMMEDIATE` clause.
1389                    if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("initially"))
1390                    {
1391                        self.advance();
1392                        match self.advance() {
1393                            Token::Ident(s) if s.eq_ignore_ascii_case("immediate") => {}
1394                            other => {
1395                                return Err(self.err(format!(
1396                                    "expected IMMEDIATE after INITIALLY for NOT DEFERRABLE, \
1397                                     got {other:?}"
1398                                )));
1399                            }
1400                        }
1401                    }
1402                    continue;
1403                }
1404                break;
1405            }
1406            break;
1407        }
1408        // Optional `ON DELETE <action>` and `ON UPDATE <action>` in
1409        // either order, each at most once.
1410        let mut on_delete = FkAction::Restrict;
1411        let mut on_update = FkAction::Restrict;
1412        let mut seen_on_delete = false;
1413        let mut seen_on_update = false;
1414        loop {
1415            if !matches!(self.peek(), Token::On) {
1416                break;
1417            }
1418            self.advance();
1419            let which = self.advance();
1420            let action = self.parse_fk_action()?;
1421            match which {
1422                Token::Ident(ref s) if s.eq_ignore_ascii_case("delete") => {
1423                    if seen_on_delete {
1424                        return Err(self.err("ON DELETE specified twice".into()));
1425                    }
1426                    seen_on_delete = true;
1427                    on_delete = action;
1428                }
1429                Token::Ident(ref s) if s.eq_ignore_ascii_case("update") => {
1430                    if seen_on_update {
1431                        return Err(self.err("ON UPDATE specified twice".into()));
1432                    }
1433                    seen_on_update = true;
1434                    on_update = action;
1435                }
1436                other => {
1437                    return Err(
1438                        self.err(format!("expected DELETE or UPDATE after ON, got {other:?}"))
1439                    );
1440                }
1441            }
1442        }
1443        Ok((parent_table, parent_columns, on_delete, on_update))
1444    }
1445
1446    /// v7.6.0 — parse `CASCADE | RESTRICT | SET NULL | SET DEFAULT |
1447    /// NO ACTION`.
1448    fn parse_fk_action(&mut self) -> Result<FkAction, ParseError> {
1449        match self.advance() {
1450            Token::Ident(s) if s.eq_ignore_ascii_case("cascade") => Ok(FkAction::Cascade),
1451            Token::Ident(s) if s.eq_ignore_ascii_case("restrict") => Ok(FkAction::Restrict),
1452            Token::Ident(s) if s.eq_ignore_ascii_case("set") => match self.advance() {
1453                Token::Null => Ok(FkAction::SetNull),
1454                Token::Default => Ok(FkAction::SetDefault),
1455                other => Err(self.err(format!(
1456                    "expected NULL or DEFAULT after SET in FK action, got {other:?}"
1457                ))),
1458            },
1459            Token::Ident(s) if s.eq_ignore_ascii_case("no") => match self.advance() {
1460                Token::Ident(s) if s.eq_ignore_ascii_case("action") => Ok(FkAction::NoAction),
1461                other => Err(self.err(format!(
1462                    "expected ACTION after NO in FK action, got {other:?}"
1463                ))),
1464            },
1465            other => Err(self.err(format!(
1466                "expected CASCADE | RESTRICT | SET NULL | SET DEFAULT | NO ACTION, got {other:?}"
1467            ))),
1468        }
1469    }
1470
1471    /// Recognise the optional `IF NOT EXISTS` prefix shared by `CREATE
1472    /// TABLE` and `CREATE INDEX`. Returns `true` if consumed.
1473    fn consume_if_not_exists(&mut self) -> bool {
1474        // `IF` arrives as a bare Ident (we don't reserve it because it
1475        // also appears mid-expression in PG, though we don't support
1476        // those forms yet).
1477        let looks_like_if = matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("if"));
1478        if !looks_like_if {
1479            return false;
1480        }
1481        // Peek one ahead before committing: only consume IF when it's
1482        // actually `IF NOT EXISTS`.
1483        if !matches!(self.tokens.get(self.pos + 1), Some(Token::Not)) {
1484            return false;
1485        }
1486        if !matches!(
1487            self.tokens.get(self.pos + 2),
1488            Some(Token::Ident(s)) if s.eq_ignore_ascii_case("exists")
1489        ) {
1490            return false;
1491        }
1492        self.advance(); // IF
1493        self.advance(); // NOT
1494        self.advance(); // EXISTS
1495        true
1496    }
1497
1498    /// v7.9.14 — consume `ASC | DESC | NULLS FIRST | NULLS LAST`
1499    /// qualifiers after an index column ref. ASC / DESC are
1500    /// reserved tokens; NULLS / FIRST / LAST are bare idents.
1501    /// We accept and discard them since single-column BTree
1502    /// stores rows in natural key order today.
1503    fn consume_optional_index_column_qualifiers(&mut self) {
1504        loop {
1505            match self.peek() {
1506                Token::Asc | Token::Desc => {
1507                    self.advance();
1508                }
1509                Token::Ident(s) if s.eq_ignore_ascii_case("nulls") => {
1510                    let look = self.tokens.get(self.pos + 1);
1511                    if matches!(
1512                        look,
1513                        Some(Token::Ident(k)) if k.eq_ignore_ascii_case("first")
1514                            || k.eq_ignore_ascii_case("last")
1515                    ) {
1516                        self.advance();
1517                        self.advance();
1518                    } else {
1519                        break;
1520                    }
1521                }
1522                _ => break,
1523            }
1524        }
1525    }
1526
1527    fn parse_create_index_stmt_after_create(
1528        &mut self,
1529        is_unique: bool,
1530    ) -> Result<Statement, ParseError> {
1531        // Caller consumed CREATE (and the optional UNIQUE); we're on INDEX.
1532        debug_assert!(matches!(self.peek(), Token::Index));
1533        self.advance();
1534        let if_not_exists = self.consume_if_not_exists();
1535        let name = self.expect_ident_like()?;
1536        if !matches!(self.peek(), Token::On) {
1537            return Err(self.err(format!(
1538                "expected ON after CREATE INDEX <name>, got {:?}",
1539                self.peek()
1540            )));
1541        }
1542        self.advance();
1543        let table = self.expect_ident_like()?;
1544        // Optional `USING <method>` — only recognised method in v2.0 is
1545        // `hnsw` (a single-layer NSW graph for kNN). `USING` is the bare
1546        // ident `using` (we don't promote it to a reserved keyword
1547        // because it isn't reserved anywhere else in our SQL surface).
1548        let method = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("using")) {
1549            self.advance();
1550            let m = self.expect_ident_like()?;
1551            match m.to_ascii_lowercase().as_str() {
1552                "hnsw" => IndexMethod::Hnsw,
1553                "btree" => IndexMethod::BTree,
1554                "brin" => IndexMethod::Brin,
1555                // v7.9.26b — PG `pg_dump` emits `USING gin` /
1556                // `USING gist` / `USING spgist` / `USING hash` for
1557                // their built-in index AMs. SPG doesn't have a
1558                // matching implementation; degrade to BTree on the
1559                // leading column so the schema loads + the index
1560                // catalogue stays consistent. Operator pays the
1561                // planner cost only for the queries that would have
1562                // used the specialised AM.
1563                "gin" | "gist" | "spgist" | "hash" => IndexMethod::BTree,
1564                // v7.11.3 — pgvector ships both `ivfflat` and
1565                // `hnsw`. Customers shouldn't have to choose
1566                // their on-disk index method based on what SPG
1567                // implements; accept `ivfflat` as a synonym for
1568                // `hnsw` so PG schemas using either method drop
1569                // in. The vector distance op (`<->` / `<#>` /
1570                // `<=>`) at query time still picks the metric.
1571                "ivfflat" => IndexMethod::Hnsw,
1572                other => {
1573                    return Err(self.err(alloc::format!(
1574                        "unknown index method {other:?}; supported: hnsw, btree, brin (gin/gist/spgist/hash accepted as BTree fallback)"
1575                    )));
1576                }
1577            }
1578        } else {
1579            IndexMethod::BTree
1580        };
1581        if !matches!(self.peek(), Token::LParen) {
1582            return Err(self.err(format!(
1583                "expected '(' before indexed column, got {:?}",
1584                self.peek()
1585            )));
1586        }
1587        self.advance();
1588        // v6.8.2 — accept either a bare column ident (legacy) or
1589        // an expression `fn(col, …)` for expression indexes.
1590        // Distinguish by peeking the token *after* the current
1591        // ident: `ident )` is the legacy column-only path;
1592        // anything else triggers the Pratt expression parser.
1593        // (`advance()` uses `mem::replace` to nil out the current
1594        // slot, so we can't save+rewind cleanly — peek-ahead via
1595        // direct index avoids the mutation.)
1596        let (column, expression): (String, Option<Expr>) = match self.peek().clone() {
1597            // Single column with `)` immediately after — fast path.
1598            // v7.9.29 — also: bare column followed by `,` (the
1599            // multi-column form `(a, b, c)`). Without this branch
1600            // the leading ident gets pulled into `parse_expr`
1601            // which then sets `expression = Some(Column(a))` and
1602            // breaks Display round-trip on the multi-column shape.
1603            Token::Ident(s) | Token::QuotedIdent(s)
1604                if matches!(
1605                    self.tokens.get(self.pos + 1),
1606                    Some(Token::RParen | Token::Comma)
1607                ) =>
1608            {
1609                self.advance();
1610                (s, None)
1611            }
1612            // v7.9.22 — single column followed by a pgvector
1613            // opclass ident: `(col vector_cosine_ops)`. mailrs G5.
1614            // SPG's HNSW currently picks its distance metric from
1615            // the query's operator (`<->` / `<#>` / `<=>`), so the
1616            // opclass is informational — accepted and discarded.
1617            // Recognised opclasses: vector_cosine_ops, vector_l2_ops,
1618            // vector_ip_ops, halfvec_*_ops, sq8_*_ops.
1619            Token::Ident(s) | Token::QuotedIdent(s)
1620                if matches!(
1621                    self.tokens.get(self.pos + 1),
1622                    Some(Token::Ident(op) | Token::QuotedIdent(op))
1623                        if is_vector_opclass_name(op)
1624                ) =>
1625            {
1626                self.advance(); // column name
1627                self.advance(); // opclass ident — drop
1628                (s, None)
1629            }
1630            Token::Ident(_) | Token::QuotedIdent(_) => {
1631                let key_expr = self.parse_expr(0)?;
1632                let primary = extract_first_column(&key_expr).ok_or_else(|| {
1633                    self.err("expression index key must reference at least one column".into())
1634                })?;
1635                (primary, Some(key_expr))
1636            }
1637            other => {
1638                return Err(self.err(format!(
1639                    "expected column ident or expression, got {other:?}"
1640                )));
1641            }
1642        };
1643        // v7.9.14 — accept extra comma-separated columns inside
1644        // the index key parens (`CREATE INDEX … (a, b, c)`).
1645        // mailrs F2. Each extra column may carry an optional
1646        // `ASC` / `DESC` / `NULLS FIRST` / `NULLS LAST` clause
1647        // — parsed and discarded; SPG doesn't honour direction
1648        // on a BTree index today (column ordering is intrinsic
1649        // to the storage). v7.10 will widen to genuine composite
1650        // index keys.
1651        let mut extra_columns: Vec<String> = Vec::new();
1652        // The leading column may also have ASC/DESC after it.
1653        self.consume_optional_index_column_qualifiers();
1654        while matches!(self.peek(), Token::Comma) {
1655            self.advance();
1656            let extra = self.expect_ident_like()?;
1657            self.consume_optional_index_column_qualifiers();
1658            extra_columns.push(extra);
1659        }
1660        if !matches!(self.peek(), Token::RParen) {
1661            return Err(self.err(format!(
1662                "expected ')' after indexed column / expression, got {:?}",
1663                self.peek()
1664            )));
1665        }
1666        self.advance();
1667        // v6.8.0 — optional `INCLUDE (col1, col2, …)` clause for
1668        // index-only-scan annotation. Bare ident (not a reserved
1669        // keyword) so we test by case-insensitive string match.
1670        let included_columns = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("include"))
1671        {
1672            self.advance();
1673            if !matches!(self.peek(), Token::LParen) {
1674                return Err(self.err(format!("expected '(' after INCLUDE, got {:?}", self.peek())));
1675            }
1676            self.advance();
1677            let mut cols = Vec::new();
1678            loop {
1679                cols.push(self.expect_ident_like()?);
1680                match self.peek() {
1681                    Token::Comma => {
1682                        self.advance();
1683                    }
1684                    Token::RParen => {
1685                        self.advance();
1686                        break;
1687                    }
1688                    other => {
1689                        return Err(self.err(format!(
1690                            "expected ',' or ')' in INCLUDE list, got {other:?}"
1691                        )));
1692                    }
1693                }
1694            }
1695            cols
1696        } else {
1697            Vec::new()
1698        };
1699        // v7.11.3 — accept and discard PG `WITH (k = v, ...)` index
1700        // storage parameters. pgvector emits `WITH (lists = N)` for
1701        // ivfflat and `WITH (m = N, ef_construction = M)` for hnsw;
1702        // SPG's HNSW picks its own parameters today (tunable via
1703        // env vars), so the WITH clause is informational and dropped.
1704        if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("with")) {
1705            self.advance();
1706            if !matches!(self.peek(), Token::LParen) {
1707                return Err(self.err(format!(
1708                    "expected '(' after WITH in CREATE INDEX, got {:?}",
1709                    self.peek()
1710                )));
1711            }
1712            self.advance();
1713            loop {
1714                if matches!(self.peek(), Token::RParen) {
1715                    self.advance();
1716                    break;
1717                }
1718                // Drain `key = value` or bare `key` tokens.
1719                let _ = self.advance(); // key
1720                if matches!(self.peek(), Token::Eq) {
1721                    self.advance();
1722                    let _ = self.advance(); // value (int / string / ident)
1723                }
1724                match self.peek() {
1725                    Token::Comma => {
1726                        self.advance();
1727                    }
1728                    Token::RParen => {
1729                        self.advance();
1730                        break;
1731                    }
1732                    other => {
1733                        return Err(self.err(format!(
1734                            "expected ',' or ')' in WITH (…) clause, got {other:?}"
1735                        )));
1736                    }
1737                }
1738            }
1739        }
1740        // v6.8.1 — optional `WHERE <expr>` partial-index predicate.
1741        let partial_predicate = if matches!(self.peek(), Token::Where) {
1742            self.advance();
1743            Some(self.parse_expr(0)?)
1744        } else {
1745            None
1746        };
1747        // v7.9.29 — UNIQUE on a vector index (HNSW) makes no
1748        // sense: uniqueness over an ANN structure has no clean
1749        // semantics. Reject early. (BRIN UNIQUE is similarly
1750        // meaningless — block both.)
1751        if is_unique && !matches!(method, IndexMethod::BTree) {
1752            return Err(self.err(alloc::format!(
1753                "UNIQUE is only supported on BTree indexes, got USING {:?}",
1754                method
1755            )));
1756        }
1757        Ok(Statement::CreateIndex(CreateIndexStatement {
1758            name,
1759            table,
1760            column,
1761            method,
1762            if_not_exists,
1763            included_columns,
1764            partial_predicate,
1765            extra_columns: extra_columns.clone(),
1766            expression,
1767            is_unique,
1768        }))
1769    }
1770
1771    /// v7.6.0 — wraps `parse_column_def` and consumes an optional
1772    /// column-level `REFERENCES ...` clause. The trailing FK is
1773    /// normalised into table-level shape (single-element columns +
1774    /// parent_columns) so the engine sees one uniform constraint list.
1775    fn parse_column_def_with_fk(
1776        &mut self,
1777    ) -> Result<(ColumnDef, Option<ForeignKeyConstraint>), ParseError> {
1778        let col = self.parse_column_def()?;
1779        // Inline form: `col INT REFERENCES tbl(pcol) [ON DELETE ...] [ON UPDATE ...]`.
1780        let inline_references = matches!(
1781            self.peek(),
1782            Token::Ident(s) if s.eq_ignore_ascii_case("references")
1783        );
1784        if !inline_references {
1785            return Ok((col, None));
1786        }
1787        let (parent_table, parent_columns, on_delete, on_update) = self.parse_references_tail(1)?;
1788        let fk = ForeignKeyConstraint {
1789            name: None,
1790            columns: vec![col.name.clone()],
1791            parent_table,
1792            parent_columns,
1793            on_delete,
1794            on_update,
1795        };
1796        Ok((col, Some(fk)))
1797    }
1798
1799    fn parse_column_def(&mut self) -> Result<ColumnDef, ParseError> {
1800        let name = self.expect_ident_like()?;
1801        // Type keyword arrives as a bare Ident (we did not promote type names
1802        // to keyword tokens — see lexer rationale).
1803        let ty_ident = match self.advance() {
1804            Token::Ident(s) => s,
1805            other => {
1806                return Err(ParseError {
1807                    message: format!("expected column type, got {other:?}"),
1808                    token_pos: self.pos.saturating_sub(1),
1809                });
1810            }
1811        };
1812        // v7.9.6 — PG `SERIAL` / `BIGSERIAL` shorthand for
1813        // `INT/BIGINT NOT NULL AUTO_INCREMENT`. PG also defines
1814        // SMALLSERIAL → SMALLINT; we accept that too. The implicit
1815        // NOT NULL + AUTO_INCREMENT flags get baked in after the
1816        // type tag so the rest of the constraint-loop parser sees
1817        // them as if user-supplied (rejecting duplicates).
1818        let mut implied_auto_increment = false;
1819        let mut implied_not_null = false;
1820        let mut ty = match ty_ident.as_str() {
1821            // PG SERIAL family. Implies NOT NULL + AUTO_INCREMENT.
1822            "smallserial" | "serial2" => {
1823                implied_auto_increment = true;
1824                implied_not_null = true;
1825                ColumnTypeName::SmallInt
1826            }
1827            "serial" | "serial4" => {
1828                implied_auto_increment = true;
1829                implied_not_null = true;
1830                ColumnTypeName::Int
1831            }
1832            "bigserial" | "serial8" => {
1833                implied_auto_increment = true;
1834                implied_not_null = true;
1835                ColumnTypeName::BigInt
1836            }
1837            // MySQL flavours we accept by aliasing to the closest SPG
1838            // type. TINYINT covers MySQL's i8 — held inside SMALLINT
1839            // since SPG doesn't have a dedicated i8. MEDIUMINT (MySQL
1840            // 24-bit) → INT. UNSIGNED modifiers are consumed below
1841            // without semantic effect.
1842            "smallint" | "tinyint" => ColumnTypeName::SmallInt,
1843            // INTEGER is MySQL's spelling for INT; MEDIUMINT widens up.
1844            "int" | "integer" | "mediumint" => ColumnTypeName::Int,
1845            "bigint" => ColumnTypeName::BigInt,
1846            // DOUBLE / REAL are 64-bit IEEE — same as our FLOAT.
1847            "float" | "double" | "real" => ColumnTypeName::Float,
1848            "text" => ColumnTypeName::Text,
1849            "bool" | "boolean" => ColumnTypeName::Bool,
1850            "varchar" => ColumnTypeName::Varchar(self.parse_paren_size("VARCHAR")?),
1851            "char" => ColumnTypeName::Char(self.parse_paren_size("CHAR")?),
1852            "vector" => {
1853                let dim = self.parse_paren_size("VECTOR")?;
1854                let encoding = self.parse_optional_vector_encoding()?;
1855                ColumnTypeName::Vector { dim, encoding }
1856            }
1857            "numeric" => {
1858                let (precision, scale) = self.parse_optional_numeric_params()?;
1859                ColumnTypeName::Numeric(precision, scale)
1860            }
1861            "date" => ColumnTypeName::Date,
1862            // MySQL's `DATETIME` is the same domain as standard
1863            // `TIMESTAMP` — accept both spellings.
1864            "timestamp" | "datetime" => ColumnTypeName::Timestamp,
1865            // v7.9.2 — `TIMESTAMPTZ` and full PG spelling
1866            // `TIMESTAMP WITH TIME ZONE`. Same storage as TIMESTAMP;
1867            // only PG-wire OID differs.
1868            "timestamptz" => ColumnTypeName::Timestamptz,
1869            // v4.9: JSON / JSONB. Stored as raw text — no parse-time
1870            // validation. We accept the JSONB spelling too because
1871            // most PG clients default to it; SPG doesn't distinguish
1872            // the two (no path-operator perf advantage to model).
1873            "json" => ColumnTypeName::Json,
1874            "jsonb" => ColumnTypeName::Jsonb,
1875            // v7.10.4 — PG `BYTEA` and the SPG `BYTES` alias both
1876            // surface here. Same storage shape; mapping happens at
1877            // the engine side via the ColumnTypeName → DataType
1878            // resolver. Literal forms are handled at coerce_value
1879            // time so the lexer stays untouched.
1880            "bytea" | "bytes" => ColumnTypeName::Bytes,
1881            other => {
1882                return Err(ParseError {
1883                    message: format!("unsupported column type {other:?}"),
1884                    token_pos: self.pos.saturating_sub(1),
1885                });
1886            }
1887        };
1888        // MySQL's `UNSIGNED` modifier sits right after the type
1889        // keyword. SPG doesn't carry a separate unsigned variant —
1890        // accepting the keyword keeps existing schemas compatible
1891        // without changing semantics. Drop it silently.
1892        if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("unsigned")) {
1893            self.advance();
1894        }
1895        // v7.10.10 — postfix `[]` widens TEXT → TEXT[]. PG accepts
1896        // `TYPE[]` after any base type; v7.10 only models TEXT[]
1897        // so we reject other base types here. mailrs uses TEXT[]
1898        // for labels / addresses / message-on-thread.
1899        if matches!(self.peek(), Token::LBracket) {
1900            self.advance();
1901            if !matches!(self.peek(), Token::RBracket) {
1902                return Err(self.err(alloc::format!(
1903                    "TEXT[] takes no dimension; got {:?}",
1904                    self.peek()
1905                )));
1906            }
1907            self.advance();
1908            // v7.11.13 — widened to INT[] and BIGINT[] in addition
1909            // to TEXT[]. Other base types (BOOL[], NUMERIC[], etc.)
1910            // still error here.
1911            ty = match ty {
1912                ColumnTypeName::Text => ColumnTypeName::TextArray,
1913                ColumnTypeName::Int => ColumnTypeName::IntArray,
1914                ColumnTypeName::BigInt => ColumnTypeName::BigIntArray,
1915                other => {
1916                    return Err(self.err(alloc::format!(
1917                        "v7.11 supports TEXT[] / INT[] / BIGINT[] only; got {other:?}[]"
1918                    )));
1919                }
1920            };
1921        }
1922        // Column constraints: `DEFAULT <expr>`, `NOT NULL`, and the
1923        // MySQL-flavoured `AUTO_INCREMENT` may appear in any order;
1924        // each at most once.
1925        let mut default: Option<Expr> = None;
1926        let mut nullable = !implied_not_null;
1927        let mut nullability_seen = implied_not_null;
1928        let mut auto_increment = implied_auto_increment;
1929        let mut is_primary_key = false;
1930        loop {
1931            if matches!(self.peek(), Token::Default) {
1932                if default.is_some() {
1933                    return Err(self.err("DEFAULT specified twice".into()));
1934                }
1935                self.advance();
1936                default = Some(self.parse_expr(0)?);
1937                continue;
1938            }
1939            if matches!(self.peek(), Token::Not) {
1940                if nullability_seen {
1941                    return Err(self.err("NOT NULL specified twice".into()));
1942                }
1943                self.advance();
1944                if !matches!(self.peek(), Token::Null) {
1945                    return Err(self.err(format!(
1946                        "expected NULL after NOT in column def, got {:?}",
1947                        self.peek()
1948                    )));
1949                }
1950                self.advance();
1951                nullable = false;
1952                nullability_seen = true;
1953                continue;
1954            }
1955            // `AUTO_INCREMENT` or its abbreviated form `AUTOINCREMENT`
1956            // arrives as a bare Ident. Match either, case-insensitive.
1957            if let Token::Ident(s) = self.peek()
1958                && (s.eq_ignore_ascii_case("auto_increment")
1959                    || s.eq_ignore_ascii_case("autoincrement"))
1960            {
1961                if auto_increment {
1962                    return Err(self.err("AUTO_INCREMENT specified twice".into()));
1963                }
1964                self.advance();
1965                auto_increment = true;
1966                continue;
1967            }
1968            // v7.9.13 — inline `PRIMARY KEY` column constraint
1969            // (mailrs F1). Implies `NOT NULL`. The engine creates
1970            // a BTree index for the PK column at CREATE TABLE time
1971            // so FK parent-side index lookups resolve.
1972            if let Token::Ident(s) = self.peek()
1973                && s.eq_ignore_ascii_case("primary")
1974            {
1975                if is_primary_key {
1976                    return Err(self.err("PRIMARY KEY specified twice".into()));
1977                }
1978                // Peek-ahead for the required `KEY` token.
1979                let next = self.tokens.get(self.pos + 1);
1980                let next_is_key = matches!(
1981                    next,
1982                    Some(Token::Ident(k)) if k.eq_ignore_ascii_case("key")
1983                );
1984                if !next_is_key {
1985                    return Err(self.err(format!(
1986                        "expected KEY after PRIMARY in column def, got {:?}",
1987                        next
1988                    )));
1989                }
1990                self.advance(); // PRIMARY
1991                self.advance(); // KEY
1992                is_primary_key = true;
1993                if nullability_seen && nullable {
1994                    return Err(self.err(
1995                        "column declared NULL but inline PRIMARY KEY implies NOT NULL".into(),
1996                    ));
1997                }
1998                nullable = false;
1999                nullability_seen = true;
2000                continue;
2001            }
2002            break;
2003        }
2004        Ok(ColumnDef {
2005            name,
2006            ty,
2007            nullable,
2008            default,
2009            auto_increment,
2010            is_primary_key,
2011        })
2012    }
2013
2014    /// `NUMERIC` may appear without parameters, with one (precision
2015    /// only, scale=0), or with both. Returns `(precision, scale)` with
2016    /// 0 = unspecified for the bare form.
2017    fn parse_optional_numeric_params(&mut self) -> Result<(u8, u8), ParseError> {
2018        if !matches!(self.peek(), Token::LParen) {
2019            // Bare `NUMERIC` — PG treats this as "unlimited precision";
2020            // we surface it as precision=0 to mean "unconstrained" so
2021            // the engine doesn't need a separate variant.
2022            return Ok((0, 0));
2023        }
2024        self.advance();
2025        let precision = match self.advance() {
2026            Token::Integer(n) if (1..=38).contains(&n) => u8::try_from(n).expect("range-checked"),
2027            other => {
2028                return Err(ParseError {
2029                    message: format!(
2030                        "NUMERIC precision must be an integer in 1..=38, got {other:?}"
2031                    ),
2032                    token_pos: self.pos.saturating_sub(1),
2033                });
2034            }
2035        };
2036        let scale = if matches!(self.peek(), Token::Comma) {
2037            self.advance();
2038            match self.advance() {
2039                Token::Integer(n) if (0..=i64::from(precision)).contains(&n) => {
2040                    u8::try_from(n).expect("range-checked")
2041                }
2042                other => {
2043                    return Err(ParseError {
2044                        message: format!(
2045                            "NUMERIC scale must be a non-negative integer ≤ precision, got {other:?}"
2046                        ),
2047                        token_pos: self.pos.saturating_sub(1),
2048                    });
2049                }
2050            }
2051        } else {
2052            0
2053        };
2054        if !matches!(self.peek(), Token::RParen) {
2055            return Err(self.err(format!(
2056                "expected ')' to close NUMERIC params, got {:?}",
2057                self.peek()
2058            )));
2059        }
2060        self.advance();
2061        Ok((precision, scale))
2062    }
2063
2064    /// Parse `(N)` where `N` is a positive integer literal — used by the
2065    /// `VARCHAR`/`CHAR`/`VECTOR` column types. `label` is the type name
2066    /// for the error message.
2067    /// v6.0.1: parse the optional `USING <encoding>` clause that
2068    /// follows `VECTOR(N)` in a column definition. Missing clause
2069    /// → `VecEncoding::F32` (pre-v6 default). Unknown encoding
2070    /// ident → `ParseError` listing the encodings recognised today.
2071    fn parse_optional_vector_encoding(&mut self) -> Result<VecEncoding, ParseError> {
2072        if !matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("using")) {
2073            return Ok(VecEncoding::F32);
2074        }
2075        self.advance();
2076        let enc_ident = match self.advance() {
2077            Token::Ident(s) => s,
2078            other => {
2079                return Err(self.err(format!(
2080                    "expected vector encoding after USING, got {other:?}"
2081                )));
2082            }
2083        };
2084        match enc_ident.to_ascii_lowercase().as_str() {
2085            "sq8" => Ok(VecEncoding::Sq8),
2086            // v6.0.3: `HALF` (pgvector convention) selects IEEE-754
2087            // binary16 per-element storage.
2088            "half" => Ok(VecEncoding::F16),
2089            other => Err(self.err(format!(
2090                "unknown vector encoding {other:?}; supported: SQ8, HALF"
2091            ))),
2092        }
2093    }
2094
2095    fn parse_paren_size(&mut self, label: &str) -> Result<u32, ParseError> {
2096        if !matches!(self.peek(), Token::LParen) {
2097            return Err(self.err(format!("{label} type requires (N), got {:?}", self.peek())));
2098        }
2099        self.advance();
2100        let n = match self.advance() {
2101            Token::Integer(n) if n > 0 => u32::try_from(n).map_err(|_| ParseError {
2102                message: format!("{label} size too large: {n}"),
2103                token_pos: self.pos.saturating_sub(1),
2104            })?,
2105            other => {
2106                return Err(ParseError {
2107                    message: format!("expected positive integer {label} size, got {other:?}"),
2108                    token_pos: self.pos.saturating_sub(1),
2109                });
2110            }
2111        };
2112        if !matches!(self.peek(), Token::RParen) {
2113            return Err(self.err(format!(
2114                "expected ')' after {label} size, got {:?}",
2115                self.peek()
2116            )));
2117        }
2118        self.advance();
2119        Ok(n)
2120    }
2121
2122    fn parse_insert_stmt(&mut self) -> Result<Statement, ParseError> {
2123        debug_assert!(matches!(self.peek(), Token::Insert));
2124        self.advance();
2125        if !matches!(self.peek(), Token::Into) {
2126            return Err(self.err(format!("expected INTO after INSERT, got {:?}", self.peek())));
2127        }
2128        self.advance();
2129        let table = self.expect_ident_like()?;
2130        // Optional column list — `INSERT INTO t (a, b) VALUES ...`.
2131        let columns = if matches!(self.peek(), Token::LParen) {
2132            self.advance();
2133            let mut names = Vec::new();
2134            loop {
2135                names.push(self.expect_ident_like()?);
2136                match self.peek() {
2137                    Token::Comma => {
2138                        self.advance();
2139                    }
2140                    Token::RParen => {
2141                        self.advance();
2142                        break;
2143                    }
2144                    other => {
2145                        return Err(self.err(format!(
2146                            "expected ',' or ')' in INSERT column list, got {other:?}"
2147                        )));
2148                    }
2149                }
2150            }
2151            Some(names)
2152        } else {
2153            None
2154        };
2155        if !matches!(self.peek(), Token::Values) {
2156            return Err(self.err(format!(
2157                "expected VALUES after table name, got {:?}",
2158                self.peek()
2159            )));
2160        }
2161        self.advance();
2162        if !matches!(self.peek(), Token::LParen) {
2163            return Err(self.err(format!("expected '(' after VALUES, got {:?}", self.peek())));
2164        }
2165        let mut rows = Vec::new();
2166        loop {
2167            // Each iteration consumes one `(expr, expr, …)` tuple.
2168            if !matches!(self.peek(), Token::LParen) {
2169                return Err(self.err(format!(
2170                    "expected '(' for next VALUES tuple, got {:?}",
2171                    self.peek()
2172                )));
2173            }
2174            self.advance();
2175            let mut tuple = Vec::new();
2176            loop {
2177                tuple.push(self.parse_expr(0)?);
2178                match self.peek() {
2179                    Token::Comma => {
2180                        self.advance();
2181                    }
2182                    Token::RParen => {
2183                        self.advance();
2184                        break;
2185                    }
2186                    other => {
2187                        return Err(self.err(format!(
2188                            "expected ',' or ')' in VALUES tuple, got {other:?}"
2189                        )));
2190                    }
2191                }
2192            }
2193            if tuple.is_empty() {
2194                return Err(self.err("INSERT VALUES tuple requires at least one value".into()));
2195            }
2196            rows.push(tuple);
2197            // Continue with comma-separated tuples.
2198            if matches!(self.peek(), Token::Comma) {
2199                self.advance();
2200            } else {
2201                break;
2202            }
2203        }
2204        let on_conflict = self.parse_optional_on_conflict()?;
2205        let returning = self.parse_optional_returning()?;
2206        Ok(Statement::Insert(InsertStatement {
2207            table,
2208            columns,
2209            rows,
2210            on_conflict,
2211            returning,
2212        }))
2213    }
2214
2215    /// v7.9.7 — parse the optional `ON CONFLICT (cols) DO …`
2216    /// clause sitting between the INSERT body and the trailing
2217    /// RETURNING. All keywords come in as bare idents; `ON` is
2218    /// a reserved Token though.
2219    fn parse_optional_on_conflict(
2220        &mut self,
2221    ) -> Result<Option<crate::ast::OnConflictClause>, ParseError> {
2222        if !matches!(self.peek(), Token::On) {
2223            return Ok(None);
2224        }
2225        // Peek further: we want exactly "ON CONFLICT ...". If the
2226        // next ident isn't "conflict", let some other parser handle.
2227        let next_is_conflict = matches!(
2228            self.tokens.get(self.pos + 1),
2229            Some(Token::Ident(s) | Token::QuotedIdent(s)) if s.eq_ignore_ascii_case("conflict")
2230        );
2231        if !next_is_conflict {
2232            return Ok(None);
2233        }
2234        self.advance(); // ON
2235        self.advance(); // CONFLICT
2236        // Optional `(col [, col]*)` target list.
2237        let mut target_columns: Vec<String> = Vec::new();
2238        if matches!(self.peek(), Token::LParen) {
2239            self.advance();
2240            loop {
2241                target_columns.push(self.expect_ident_like()?);
2242                match self.peek() {
2243                    Token::Comma => {
2244                        self.advance();
2245                    }
2246                    Token::RParen => {
2247                        self.advance();
2248                        break;
2249                    }
2250                    other => {
2251                        return Err(self.err(alloc::format!(
2252                            "expected ',' or ')' in ON CONFLICT target list, got {other:?}"
2253                        )));
2254                    }
2255                }
2256            }
2257        }
2258        // Required `DO`.
2259        match self.advance() {
2260            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("do") => {}
2261            other => {
2262                return Err(self.err(alloc::format!(
2263                    "expected DO after ON CONFLICT [(…)], got {other:?}"
2264                )));
2265            }
2266        }
2267        // Action: NOTHING | UPDATE SET …
2268        let action = match self.advance() {
2269            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("nothing") => {
2270                crate::ast::OnConflictAction::Nothing
2271            }
2272            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("update") => {
2273                self.parse_on_conflict_update_action()?
2274            }
2275            other => {
2276                return Err(self.err(alloc::format!(
2277                    "expected NOTHING or UPDATE after ON CONFLICT DO, got {other:?}"
2278                )));
2279            }
2280        };
2281        Ok(Some(crate::ast::OnConflictClause {
2282            target_columns,
2283            action,
2284        }))
2285    }
2286
2287    /// v7.9.7 — tail of `ON CONFLICT … DO UPDATE`: parse
2288    /// `SET col = expr [, …] [WHERE cond]`. Caller already
2289    /// consumed `UPDATE`.
2290    fn parse_on_conflict_update_action(
2291        &mut self,
2292    ) -> Result<crate::ast::OnConflictAction, ParseError> {
2293        // `SET`
2294        match self.advance() {
2295            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("set") => {}
2296            other => {
2297                return Err(self.err(alloc::format!(
2298                    "expected SET after ON CONFLICT DO UPDATE, got {other:?}"
2299                )));
2300            }
2301        }
2302        let mut assignments: Vec<(String, Expr)> = Vec::new();
2303        loop {
2304            let col = self.expect_ident_like()?;
2305            if !matches!(self.peek(), Token::Eq) {
2306                return Err(self.err(alloc::format!(
2307                    "expected `=` after column in ON CONFLICT DO UPDATE SET, got {:?}",
2308                    self.peek()
2309                )));
2310            }
2311            self.advance();
2312            let value = self.parse_expr(0)?;
2313            assignments.push((col, value));
2314            if matches!(self.peek(), Token::Comma) {
2315                self.advance();
2316                continue;
2317            }
2318            break;
2319        }
2320        let where_ = if matches!(self.peek(), Token::Where) {
2321            self.advance();
2322            Some(self.parse_expr(0)?)
2323        } else {
2324            None
2325        };
2326        Ok(crate::ast::OnConflictAction::Update {
2327            assignments,
2328            where_,
2329        })
2330    }
2331
2332    fn parse_select_list(&mut self) -> Result<Vec<SelectItem>, ParseError> {
2333        let mut items = Vec::new();
2334        loop {
2335            items.push(self.parse_select_item()?);
2336            if matches!(self.peek(), Token::Comma) {
2337                self.advance();
2338            } else {
2339                break;
2340            }
2341        }
2342        Ok(items)
2343    }
2344
2345    fn parse_select_item(&mut self) -> Result<SelectItem, ParseError> {
2346        if matches!(self.peek(), Token::Star) {
2347            self.advance();
2348            return Ok(SelectItem::Wildcard);
2349        }
2350        let expr = self.parse_expr(0)?;
2351        let alias = self.parse_optional_alias();
2352        Ok(SelectItem::Expr { expr, alias })
2353    }
2354
2355    fn parse_table_ref(&mut self) -> Result<TableRef, ParseError> {
2356        // v7.11.7 — `FROM unnest(<expr>) [AS] <alias>` set-returning
2357        // source. Detect at the head before the bare-ident fallback;
2358        // unnest is not a reserved token.
2359        if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("unnest"))
2360            && matches!(self.tokens.get(self.pos + 1), Some(Token::LParen))
2361        {
2362            self.advance(); // unnest
2363            self.advance(); // (
2364            let expr = self.parse_expr(0)?;
2365            if !matches!(self.peek(), Token::RParen) {
2366                return Err(self.err(alloc::format!(
2367                    "expected ')' after unnest() argument, got {:?}",
2368                    self.peek()
2369                )));
2370            }
2371            self.advance();
2372            let alias_ident = self.parse_optional_alias();
2373            let name = alias_ident.clone().unwrap_or_else(|| "unnest".to_string());
2374            return Ok(TableRef {
2375                name,
2376                alias: alias_ident,
2377                as_of_segment: None,
2378                unnest_expr: Some(Box::new(expr)),
2379            });
2380        }
2381        let name = self.expect_ident_like()?;
2382        // v6.10.2 — optional `AS OF SEGMENT '<id>'` cold-tier
2383        // time-travel clause. Parse BEFORE the alias so the
2384        // alias can still ride at the tail (`tbl AS OF SEGMENT
2385        // '5' alias`). `AS` is a reserved keyword token, while
2386        // `OF` and `SEGMENT` are bare idents.
2387        let as_of_segment = if matches!(self.peek(), Token::As)
2388            && matches!(self.tokens.get(self.pos + 1), Some(Token::Ident(s) | Token::QuotedIdent(s)) if s.eq_ignore_ascii_case("of"))
2389        {
2390            self.advance(); // AS
2391            self.advance(); // OF
2392            let kw = match self.peek().clone() {
2393                Token::Ident(s) | Token::QuotedIdent(s) => s,
2394                other => {
2395                    return Err(self.err(format!("expected SEGMENT after AS OF, got {other:?}")));
2396                }
2397            };
2398            if !kw.eq_ignore_ascii_case("segment") {
2399                return Err(self.err(format!(
2400                    "expected SEGMENT after AS OF, got {kw:?}; v6.10.2 supports SEGMENT only"
2401                )));
2402            }
2403            self.advance();
2404            // Segment id literal — accept either a string or
2405            // integer for operator ergonomics.
2406            let id = match self.advance() {
2407                Token::String(s) => s
2408                    .parse::<u32>()
2409                    .map_err(|e| self.err(format!("AS OF SEGMENT id parse: {e}")))?,
2410                Token::Integer(n) => u32::try_from(n)
2411                    .map_err(|e| self.err(format!("AS OF SEGMENT id parse: {e}")))?,
2412                other => {
2413                    return Err(self.err(format!(
2414                        "expected segment id literal after AS OF SEGMENT, got {other:?}"
2415                    )));
2416                }
2417            };
2418            Some(id)
2419        } else {
2420            None
2421        };
2422        let alias = self.parse_optional_alias();
2423        Ok(TableRef {
2424            name,
2425            alias,
2426            as_of_segment,
2427            unnest_expr: None,
2428        })
2429    }
2430
2431    /// FROM-clause: a primary table reference plus zero-or-more joined
2432    /// peers expressed via either `, <table>` (cross-product, no ON) or
2433    /// `[INNER|LEFT [OUTER]|CROSS] JOIN <table> [ON expr]`. v1.10 keeps
2434    /// the join list flat (left-associative nested-loop semantics).
2435    fn parse_from_clause(&mut self) -> Result<FromClause, ParseError> {
2436        let primary = self.parse_table_ref()?;
2437        let mut joins = Vec::new();
2438        loop {
2439            // `, <table>` — cross-product with no ON.
2440            if matches!(self.peek(), Token::Comma) {
2441                self.advance();
2442                let table = self.parse_table_ref()?;
2443                joins.push(FromJoin {
2444                    kind: JoinKind::Cross,
2445                    table,
2446                    on: None,
2447                });
2448                continue;
2449            }
2450            // Explicit JOIN syntax. Accept INNER JOIN, LEFT [OUTER] JOIN,
2451            // CROSS JOIN, and bare JOIN (defaults to INNER).
2452            let kind =
2453                match self.peek() {
2454                    Token::Inner => {
2455                        self.advance();
2456                        if !matches!(self.peek(), Token::Join) {
2457                            return Err(self
2458                                .err(format!("expected JOIN after INNER, got {:?}", self.peek())));
2459                        }
2460                        self.advance();
2461                        JoinKind::Inner
2462                    }
2463                    Token::Left => {
2464                        self.advance();
2465                        if matches!(self.peek(), Token::Outer) {
2466                            self.advance();
2467                        }
2468                        if !matches!(self.peek(), Token::Join) {
2469                            return Err(self.err(format!(
2470                                "expected JOIN after LEFT [OUTER], got {:?}",
2471                                self.peek()
2472                            )));
2473                        }
2474                        self.advance();
2475                        JoinKind::Left
2476                    }
2477                    Token::Cross => {
2478                        self.advance();
2479                        if !matches!(self.peek(), Token::Join) {
2480                            return Err(self
2481                                .err(format!("expected JOIN after CROSS, got {:?}", self.peek())));
2482                        }
2483                        self.advance();
2484                        JoinKind::Cross
2485                    }
2486                    Token::Join => {
2487                        self.advance();
2488                        JoinKind::Inner
2489                    }
2490                    _ => break,
2491                };
2492            let table = self.parse_table_ref()?;
2493            let on = if matches!(self.peek(), Token::On) {
2494                self.advance();
2495                Some(self.parse_expr(0)?)
2496            } else if kind == JoinKind::Cross {
2497                None
2498            } else {
2499                return Err(self.err(format!(
2500                    "expected ON after {:?} JOIN, got {:?}",
2501                    kind,
2502                    self.peek()
2503                )));
2504            };
2505            joins.push(FromJoin { kind, table, on });
2506        }
2507        Ok(FromClause { primary, joins })
2508    }
2509
2510    /// Optional alias after an expression or table:
2511    /// `AS <ident>` is unambiguous; a bare `<ident>` directly after is also
2512    /// accepted (PG-style implicit alias). Returns `None` if the next token
2513    /// is not alias-shaped (e.g. comma, FROM, WHERE, semicolon, EOF, operator).
2514    fn parse_optional_alias(&mut self) -> Option<String> {
2515        if matches!(self.peek(), Token::As) {
2516            self.advance();
2517            // After AS, the next token MUST be an identifier-like — if not,
2518            // we still return None and let the caller surface the error on the
2519            // next expectation. v0.2 keeps the alias path forgiving; the
2520            // corpus tests don't exercise the malformed case.
2521            if let Token::Ident(_) | Token::QuotedIdent(_) = self.peek() {
2522                return self.expect_ident_like().ok();
2523            }
2524            return None;
2525        }
2526        if let Token::Ident(_) | Token::QuotedIdent(_) = self.peek() {
2527            return self.expect_ident_like().ok();
2528        }
2529        None
2530    }
2531
2532    /// Pratt loop. `min_prec` is the minimum binary-op precedence we'll accept.
2533    fn parse_expr(&mut self, min_prec: u8) -> Result<Expr, ParseError> {
2534        let mut lhs = self.parse_unary()?;
2535        while let Some((op, prec)) = binop_from(self.peek()) {
2536            if prec < min_prec {
2537                break;
2538            }
2539            self.advance();
2540            // v7.10.12 — `x <op> ANY(arr)` / `x <op> ALL(arr)`.
2541            // ANY is a bare ident; ALL is a reserved Token. Both
2542            // require an immediate `(` to disambiguate from
2543            // identifier columns named `any` / `all`.
2544            let any_kind = match self.peek() {
2545                Token::All if matches!(self.tokens.get(self.pos + 1), Some(Token::LParen)) => {
2546                    Some(false)
2547                }
2548                Token::Ident(s) | Token::QuotedIdent(s)
2549                    if (s.eq_ignore_ascii_case("any") || s.eq_ignore_ascii_case("all"))
2550                        && matches!(self.tokens.get(self.pos + 1), Some(Token::LParen)) =>
2551                {
2552                    Some(s.eq_ignore_ascii_case("any"))
2553                }
2554                _ => None,
2555            };
2556            if let Some(is_any) = any_kind {
2557                self.advance(); // ident
2558                self.advance(); // (
2559                let arr = self.parse_expr(0)?;
2560                if !matches!(self.peek(), Token::RParen) {
2561                    return Err(self.err(alloc::format!(
2562                        "expected ')' after ANY/ALL argument, got {:?}",
2563                        self.peek()
2564                    )));
2565                }
2566                self.advance();
2567                lhs = Expr::AnyAll {
2568                    expr: Box::new(lhs),
2569                    op,
2570                    array: Box::new(arr),
2571                    is_any,
2572                };
2573                continue;
2574            }
2575            let rhs = self.parse_expr(prec + 1)?;
2576            lhs = Expr::Binary {
2577                lhs: Box::new(lhs),
2578                op,
2579                rhs: Box::new(rhs),
2580            };
2581        }
2582        Ok(lhs)
2583    }
2584
2585    fn parse_unary(&mut self) -> Result<Expr, ParseError> {
2586        match self.peek() {
2587            Token::Not => {
2588                self.advance();
2589                // NOT sits between AND (2) and comparisons (4) — bind everything
2590                // ≥3, which leaves AND/OR outside.
2591                let e = self.parse_expr(3)?;
2592                Ok(Expr::Unary {
2593                    op: UnOp::Not,
2594                    expr: Box::new(e),
2595                })
2596            }
2597            Token::Minus => {
2598                self.advance();
2599                // Unary minus binds tighter than `*`/`/` (now at prec 7 after
2600                // `<->` slotted into 5 and arithmetic shifted up).
2601                let e = self.parse_expr(8)?;
2602                Ok(Expr::Unary {
2603                    op: UnOp::Neg,
2604                    expr: Box::new(e),
2605                })
2606            }
2607            _ => self.parse_atom(),
2608        }
2609    }
2610
2611    fn parse_atom(&mut self) -> Result<Expr, ParseError> {
2612        let tok_pos = self.pos;
2613        match self.advance() {
2614            Token::Integer(n) => Ok(Expr::Literal(Literal::Integer(n))),
2615            Token::Float(x) => Ok(Expr::Literal(Literal::Float(x))),
2616            Token::String(s) => Ok(Expr::Literal(Literal::String(s))),
2617            Token::True => Ok(Expr::Literal(Literal::Bool(true))),
2618            Token::False => Ok(Expr::Literal(Literal::Bool(false))),
2619            Token::Null => Ok(Expr::Literal(Literal::Null)),
2620            // v6.1.1 — `$N` placeholder. The actual Value lookup
2621            // happens in the engine eval path against the prepared-
2622            // statement bind buffer.
2623            Token::Placeholder(n) => Ok(Expr::Placeholder(n)),
2624            Token::LParen => {
2625                // v4.10: `(SELECT ...)` in expression position is a
2626                // scalar subquery; otherwise it's a parenthesised
2627                // expression. Peek for SELECT keyword to dispatch.
2628                if matches!(self.peek(), Token::Select) {
2629                    let inner = self.parse_select_stmt()?;
2630                    match self.advance() {
2631                        Token::RParen => {
2632                            let Statement::Select(s) = inner else {
2633                                unreachable!("parse_select_stmt returns Select")
2634                            };
2635                            Ok(Expr::ScalarSubquery(Box::new(s)))
2636                        }
2637                        other => Err(ParseError {
2638                            message: format!("expected ')' after scalar subquery, got {other:?}"),
2639                            token_pos: self.pos.saturating_sub(1),
2640                        }),
2641                    }
2642                } else {
2643                    let e = self.parse_expr(0)?;
2644                    match self.advance() {
2645                        Token::RParen => Ok(e),
2646                        other => Err(ParseError {
2647                            message: format!("expected ')', got {other:?}"),
2648                            token_pos: self.pos.saturating_sub(1),
2649                        }),
2650                    }
2651                }
2652            }
2653            Token::LBracket => self.parse_vector_literal_body(),
2654            Token::Extract => self.parse_extract_atom(),
2655            Token::Interval => self.parse_interval_atom(),
2656            // v4.10: EXISTS / NOT EXISTS. EXISTS isn't a reserved
2657            // token; we match on the bare ident. NOT is a token
2658            // (consumed in the comparison rung), but `EXISTS (...)`
2659            // at the top of an expression starts here.
2660            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("exists") => {
2661                self.parse_exists_atom(false)
2662            }
2663            // v7.10.10 — `ARRAY[expr, expr, …]` constructor. ARRAY
2664            // is not a reserved token; we match by case-insensitive
2665            // ident. The opening `[` must follow immediately.
2666            Token::Ident(s) | Token::QuotedIdent(s)
2667                if s.eq_ignore_ascii_case("array") && matches!(self.peek(), Token::LBracket) =>
2668            {
2669                self.advance(); // consume `[`
2670                let mut items: Vec<Expr> = Vec::new();
2671                if !matches!(self.peek(), Token::RBracket) {
2672                    loop {
2673                        items.push(self.parse_expr(0)?);
2674                        match self.peek() {
2675                            Token::Comma => {
2676                                self.advance();
2677                            }
2678                            Token::RBracket => break,
2679                            other => {
2680                                return Err(self.err(alloc::format!(
2681                                    "expected ',' or ']' in ARRAY literal, got {other:?}"
2682                                )));
2683                            }
2684                        }
2685                    }
2686                }
2687                self.advance(); // consume `]`
2688                Ok(Expr::Array(items))
2689            }
2690            Token::Ident(s) | Token::QuotedIdent(s) => self.finish_ident_atom(s),
2691            other => Err(ParseError {
2692                message: format!("unexpected token {other:?} in expression"),
2693                token_pos: tok_pos,
2694            }),
2695        }
2696        // After parsing the atom, fold any postfix `::vector` casts.
2697        .and_then(|atom| self.finish_postfix_casts(atom))
2698    }
2699
2700    /// Postfix operators on an atom: `::TYPE` cast and `IS [NOT] NULL`.
2701    /// Both bind tighter than any binary op.
2702    fn finish_postfix_casts(&mut self, mut expr: Expr) -> Result<Expr, ParseError> {
2703        loop {
2704            if matches!(self.peek(), Token::DoubleColon) {
2705                self.advance();
2706                // v7.9.25 / v7.9.26 — broaden the postfix `::` cast
2707                // target set to include INTERVAL (reserved Token),
2708                // TIMESTAMPTZ, and PG catalog regtype / regclass.
2709                // mailrs follow-up H3a + H3b.
2710                let target = match self.advance() {
2711                    Token::Ident(s) => match s.to_ascii_lowercase().as_str() {
2712                        "int" | "integer" | "int4" => {
2713                            if matches!(self.peek(), Token::LBracket)
2714                                && matches!(self.tokens.get(self.pos + 1), Some(Token::RBracket))
2715                            {
2716                                self.advance();
2717                                self.advance();
2718                                CastTarget::IntArray
2719                            } else {
2720                                CastTarget::Int
2721                            }
2722                        }
2723                        "bigint" | "int8" => {
2724                            if matches!(self.peek(), Token::LBracket)
2725                                && matches!(self.tokens.get(self.pos + 1), Some(Token::RBracket))
2726                            {
2727                                self.advance();
2728                                self.advance();
2729                                CastTarget::BigIntArray
2730                            } else {
2731                                CastTarget::BigInt
2732                            }
2733                        }
2734                        "float" | "double" | "real" => CastTarget::Float,
2735                        "text" => {
2736                            // v7.10.11 — `::TEXT[]` widens to TextArray.
2737                            if matches!(self.peek(), Token::LBracket)
2738                                && matches!(self.tokens.get(self.pos + 1), Some(Token::RBracket))
2739                            {
2740                                self.advance();
2741                                self.advance();
2742                                CastTarget::TextArray
2743                            } else {
2744                                CastTarget::Text
2745                            }
2746                        }
2747                        "bool" | "boolean" => CastTarget::Bool,
2748                        "vector" => CastTarget::Vector,
2749                        "date" => CastTarget::Date,
2750                        "timestamp" | "datetime" => CastTarget::Timestamp,
2751                        "timestamptz" => CastTarget::Timestamptz,
2752                        "interval" => CastTarget::Interval,
2753                        "json" => CastTarget::Json,
2754                        "jsonb" => CastTarget::Jsonb,
2755                        "regtype" => CastTarget::RegType,
2756                        "regclass" => CastTarget::RegClass,
2757                        other => {
2758                            return Err(ParseError {
2759                                message: format!("unsupported cast target `::{other}`"),
2760                                token_pos: self.pos.saturating_sub(1),
2761                            });
2762                        }
2763                    },
2764                    Token::Interval => CastTarget::Interval,
2765                    other => {
2766                        return Err(ParseError {
2767                            message: format!("expected type ident after `::`, got {other:?}"),
2768                            token_pos: self.pos.saturating_sub(1),
2769                        });
2770                    }
2771                };
2772                expr = Expr::Cast {
2773                    expr: Box::new(expr),
2774                    target,
2775                };
2776                continue;
2777            }
2778            if matches!(self.peek(), Token::Is) {
2779                self.advance();
2780                let negated = if matches!(self.peek(), Token::Not) {
2781                    self.advance();
2782                    true
2783                } else {
2784                    false
2785                };
2786                // v7.9.27b — `IS [NOT] DISTINCT FROM <rhs>`.
2787                // mailrs pg_dump.
2788                if matches!(self.peek(), Token::Distinct) {
2789                    self.advance();
2790                    if !matches!(self.peek(), Token::From) {
2791                        return Err(self.err(format!(
2792                            "expected FROM after IS{} DISTINCT, got {:?}",
2793                            if negated { " NOT" } else { "" },
2794                            self.peek()
2795                        )));
2796                    }
2797                    self.advance();
2798                    // Right-hand side: parse at the same precedence
2799                    // tier as comparison so `x IS DISTINCT FROM a + b`
2800                    // groups as `x IS DISTINCT FROM (a + b)`.
2801                    let rhs = self.parse_expr(20)?;
2802                    let op = if negated {
2803                        BinOp::IsNotDistinctFrom
2804                    } else {
2805                        BinOp::IsDistinctFrom
2806                    };
2807                    expr = Expr::Binary {
2808                        op,
2809                        lhs: Box::new(expr),
2810                        rhs: Box::new(rhs),
2811                    };
2812                    continue;
2813                }
2814                if !matches!(self.peek(), Token::Null) {
2815                    return Err(self.err(format!(
2816                        "expected NULL or DISTINCT after IS{}, got {:?}",
2817                        if negated { " NOT" } else { "" },
2818                        self.peek()
2819                    )));
2820                }
2821                self.advance();
2822                expr = Expr::IsNull {
2823                    expr: Box::new(expr),
2824                    negated,
2825                };
2826                continue;
2827            }
2828            // `x [NOT] BETWEEN a AND b`, `x [NOT] IN (...)`, `x [NOT] LIKE p`.
2829            // Look one token ahead so a stray `NOT` not followed by any of
2830            // these flows through to the early return below untouched.
2831            let negated = if matches!(self.peek(), Token::Not) {
2832                let next = self.tokens.get(self.pos + 1);
2833                matches!(next, Some(Token::Between | Token::In | Token::Like))
2834            } else {
2835                false
2836            };
2837            if negated {
2838                self.advance();
2839            }
2840            if matches!(self.peek(), Token::Between) {
2841                expr = self.parse_between_tail(expr, negated)?;
2842                continue;
2843            }
2844            if matches!(self.peek(), Token::In) {
2845                expr = self.parse_in_tail(expr, negated)?;
2846                continue;
2847            }
2848            if matches!(self.peek(), Token::Like) {
2849                self.advance();
2850                // Pattern at the same precedence as other comparison RHSes —
2851                // 5 leaves AND/OR alone so `a LIKE 'x%' AND b` parses right.
2852                let pattern = self.parse_expr(5)?;
2853                expr = Expr::Like {
2854                    expr: Box::new(expr),
2855                    pattern: Box::new(pattern),
2856                    negated,
2857                };
2858                continue;
2859            }
2860            // v7.10.12 — `arr[i]` subscript. PG 1-based; engine
2861            // returns NULL for out-of-range. Multiple subscripts
2862            // chain: `a[i][j]` parses left-to-right.
2863            if matches!(self.peek(), Token::LBracket) {
2864                self.advance();
2865                let index = self.parse_expr(0)?;
2866                if !matches!(self.peek(), Token::RBracket) {
2867                    return Err(self.err(alloc::format!(
2868                        "expected ']' after array index, got {:?}",
2869                        self.peek()
2870                    )));
2871                }
2872                self.advance();
2873                expr = Expr::ArraySubscript {
2874                    target: Box::new(expr),
2875                    index: Box::new(index),
2876                };
2877                continue;
2878            }
2879            return Ok(expr);
2880        }
2881    }
2882
2883    /// `x BETWEEN low AND high`  →  `(x >= low) AND (x <= high)`, wrapped in
2884    /// `NOT` when `negated`. Bounds parse at precedence 5 so the trailing
2885    /// `AND` is not swallowed.
2886    fn parse_between_tail(&mut self, expr: Expr, negated: bool) -> Result<Expr, ParseError> {
2887        self.advance(); // BETWEEN
2888        let low = self.parse_expr(5)?;
2889        if !matches!(self.peek(), Token::And) {
2890            return Err(self.err(format!(
2891                "expected AND after BETWEEN low bound, got {:?}",
2892                self.peek()
2893            )));
2894        }
2895        self.advance();
2896        let high = self.parse_expr(5)?;
2897        let target = Box::new(expr);
2898        let combined = Expr::Binary {
2899            lhs: Box::new(Expr::Binary {
2900                lhs: target.clone(),
2901                op: BinOp::GtEq,
2902                rhs: Box::new(low),
2903            }),
2904            op: BinOp::And,
2905            rhs: Box::new(Expr::Binary {
2906                lhs: target,
2907                op: BinOp::LtEq,
2908                rhs: Box::new(high),
2909            }),
2910        };
2911        Ok(maybe_not(combined, negated))
2912    }
2913
2914    /// `x IN (a, b, c)`  →  chained OR of equalities. Empty list collapses
2915    /// to FALSE (TRUE under NOT IN), matching standard SQL semantics.
2916    /// v4.11: parse `WITH name AS (SELECT ...) [, ...] SELECT ...`.
2917    /// Caller already consumed the leading `WITH` ident.
2918    fn parse_with_cte_then_select(&mut self) -> Result<Statement, ParseError> {
2919        // v4.22: WITH RECURSIVE — optional keyword right after WITH.
2920        // Comes through as an identifier; consume it if present and
2921        // mark every CTE in the clause as recursive (PG semantics —
2922        // the flag is per-WITH, not per-CTE).
2923        let mut recursive = false;
2924        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
2925            && s.eq_ignore_ascii_case("recursive")
2926        {
2927            self.advance();
2928            recursive = true;
2929        }
2930        let mut ctes = Vec::new();
2931        loop {
2932            let name = self.expect_ident_like()?;
2933            // v4.22: optional column-name list — `WITH t(a,b,c) AS ...`.
2934            // PG uses these to rename the body's output columns; we
2935            // do the same below by overriding `columns[i].name`.
2936            let column_overrides: Vec<String> = if matches!(self.peek(), Token::LParen) {
2937                self.advance();
2938                let mut names = Vec::new();
2939                loop {
2940                    names.push(self.expect_ident_like()?);
2941                    if matches!(self.peek(), Token::Comma) {
2942                        self.advance();
2943                        continue;
2944                    }
2945                    break;
2946                }
2947                if !matches!(self.peek(), Token::RParen) {
2948                    return Err(self.err(format!(
2949                        "expected ')' to close CTE column list, got {:?}",
2950                        self.peek()
2951                    )));
2952                }
2953                self.advance();
2954                names
2955            } else {
2956                Vec::new()
2957            };
2958            // AS is a reserved Token::As (used by SELECT-item / FROM
2959            // aliasing) — handle it specially rather than as a bare
2960            // ident.
2961            if !matches!(self.peek(), Token::As) {
2962                return Err(self.err(format!(
2963                    "expected AS after CTE name {name:?}, got {:?}",
2964                    self.peek()
2965                )));
2966            }
2967            self.advance();
2968            if !matches!(self.peek(), Token::LParen) {
2969                return Err(self.err(format!(
2970                    "expected '(' after AS in WITH clause, got {:?}",
2971                    self.peek()
2972                )));
2973            }
2974            self.advance();
2975            if !matches!(self.peek(), Token::Select) {
2976                return Err(self.err(format!("WITH body must be a SELECT, got {:?}", self.peek())));
2977            }
2978            let inner = self.parse_select_stmt()?;
2979            if !matches!(self.peek(), Token::RParen) {
2980                return Err(self.err(format!(
2981                    "expected ')' after CTE body, got {:?}",
2982                    self.peek()
2983                )));
2984            }
2985            self.advance();
2986            let Statement::Select(body) = inner else {
2987                unreachable!("parse_select_stmt returns Select")
2988            };
2989            ctes.push(crate::ast::Cte {
2990                name,
2991                body,
2992                recursive,
2993                column_overrides,
2994            });
2995            if matches!(self.peek(), Token::Comma) {
2996                self.advance();
2997                continue;
2998            }
2999            break;
3000        }
3001        // The body SELECT follows. Must start with SELECT.
3002        if !matches!(self.peek(), Token::Select) {
3003            return Err(self.err(format!(
3004                "expected SELECT after WITH clause, got {:?}",
3005                self.peek()
3006            )));
3007        }
3008        let body_stmt = self.parse_select_stmt()?;
3009        let Statement::Select(mut body) = body_stmt else {
3010            unreachable!()
3011        };
3012        body.ctes = ctes;
3013        Ok(Statement::Select(body))
3014    }
3015
3016    /// v4.10: parse `EXISTS (SELECT ...)`. Caller (`parse_atom`)
3017    /// already consumed the leading `EXISTS` ident via
3018    /// `self.advance()`.
3019    fn parse_exists_atom(&mut self, negated: bool) -> Result<Expr, ParseError> {
3020        if !matches!(self.peek(), Token::LParen) {
3021            return Err(self.err(format!("expected '(' after EXISTS, got {:?}", self.peek())));
3022        }
3023        self.advance();
3024        let inner = self.parse_select_stmt()?;
3025        if !matches!(self.peek(), Token::RParen) {
3026            return Err(self.err(format!(
3027                "expected ')' after EXISTS-subquery, got {:?}",
3028                self.peek()
3029            )));
3030        }
3031        self.advance();
3032        let Statement::Select(s) = inner else {
3033            unreachable!("parse_select_stmt returns Select")
3034        };
3035        Ok(Expr::Exists {
3036            subquery: Box::new(s),
3037            negated,
3038        })
3039    }
3040
3041    fn parse_in_tail(&mut self, expr: Expr, negated: bool) -> Result<Expr, ParseError> {
3042        self.advance(); // IN
3043        if !matches!(self.peek(), Token::LParen) {
3044            return Err(self.err(format!("expected '(' after IN, got {:?}", self.peek())));
3045        }
3046        self.advance();
3047        // v4.10: `IN (SELECT ...)` — subquery branch.
3048        if matches!(self.peek(), Token::Select) {
3049            let inner = self.parse_select_stmt()?;
3050            if !matches!(self.peek(), Token::RParen) {
3051                return Err(self.err(format!(
3052                    "expected ')' after IN-subquery, got {:?}",
3053                    self.peek()
3054                )));
3055            }
3056            self.advance();
3057            let Statement::Select(s) = inner else {
3058                unreachable!("parse_select_stmt always returns Statement::Select")
3059            };
3060            return Ok(Expr::InSubquery {
3061                expr: Box::new(expr),
3062                subquery: Box::new(s),
3063                negated,
3064            });
3065        }
3066        let mut elements = Vec::new();
3067        if !matches!(self.peek(), Token::RParen) {
3068            loop {
3069                elements.push(self.parse_expr(0)?);
3070                match self.peek() {
3071                    Token::Comma => {
3072                        self.advance();
3073                    }
3074                    Token::RParen => break,
3075                    other => {
3076                        return Err(
3077                            self.err(format!("expected ',' or ')' in IN list, got {other:?}"))
3078                        );
3079                    }
3080                }
3081            }
3082        }
3083        self.advance(); // ')'
3084        let target = Box::new(expr);
3085        let combined = if elements.is_empty() {
3086            Expr::Literal(Literal::Bool(false))
3087        } else {
3088            let mut iter = elements.into_iter();
3089            let first = iter.next().unwrap();
3090            let mut acc = Expr::Binary {
3091                lhs: target.clone(),
3092                op: BinOp::Eq,
3093                rhs: Box::new(first),
3094            };
3095            for elt in iter {
3096                acc = Expr::Binary {
3097                    lhs: Box::new(acc),
3098                    op: BinOp::Or,
3099                    rhs: Box::new(Expr::Binary {
3100                        lhs: target.clone(),
3101                        op: BinOp::Eq,
3102                        rhs: Box::new(elt),
3103                    }),
3104                };
3105            }
3106            acc
3107        };
3108        Ok(maybe_not(combined, negated))
3109    }
3110
3111    /// Parse a pgvector array literal `[ x1, x2, ... ]`. The opening `[` is
3112    /// already consumed by the caller. Elements must be numeric literals
3113    /// (with optional unary `-`); any compound expression is rejected at
3114    /// parse time so the runtime never needs to evaluate inside a vector.
3115    /// `EXTRACT(<field> FROM <source>)`. The dispatching `parse_atom`
3116    /// has already consumed the `EXTRACT` token before calling us —
3117    /// we pick up at the opening `(`.
3118    fn parse_extract_atom(&mut self) -> Result<Expr, ParseError> {
3119        if !matches!(self.peek(), Token::LParen) {
3120            return Err(self.err(format!("expected '(' after EXTRACT, got {:?}", self.peek())));
3121        }
3122        self.advance();
3123        let field_name = self.expect_ident_like()?;
3124        let field = match field_name.to_ascii_lowercase().as_str() {
3125            "year" => ExtractField::Year,
3126            "month" => ExtractField::Month,
3127            "day" => ExtractField::Day,
3128            "hour" => ExtractField::Hour,
3129            "minute" => ExtractField::Minute,
3130            "second" => ExtractField::Second,
3131            "microsecond" | "microseconds" => ExtractField::Microsecond,
3132            other => {
3133                return Err(self.err(format!(
3134                    "unknown EXTRACT field {other:?}; \
3135                     supported: YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MICROSECOND"
3136                )));
3137            }
3138        };
3139        if !matches!(self.peek(), Token::From) {
3140            return Err(self.err(format!(
3141                "expected FROM after EXTRACT field, got {:?}",
3142                self.peek()
3143            )));
3144        }
3145        self.advance();
3146        let source = self.parse_expr(0)?;
3147        if !matches!(self.peek(), Token::RParen) {
3148            return Err(self.err(format!(
3149                "expected ')' to close EXTRACT, got {:?}",
3150                self.peek()
3151            )));
3152        }
3153        self.advance();
3154        Ok(Expr::Extract {
3155            field,
3156            source: Box::new(source),
3157        })
3158    }
3159
3160    /// `INTERVAL '<n> <unit> [<n> <unit> ...]'` — the `INTERVAL` keyword
3161    /// is already consumed; we expect a single string literal next and
3162    /// resolve it into `Literal::Interval` at parse time so the engine
3163    /// never has to re-tokenise inside the string.
3164    fn parse_interval_atom(&mut self) -> Result<Expr, ParseError> {
3165        let tok = self.advance();
3166        let Token::String(text) = tok else {
3167            return Err(self.err(format!(
3168                "expected string literal after INTERVAL, got {tok:?}"
3169            )));
3170        };
3171        let (months, micros) = parse_interval_text(&text).ok_or_else(|| ParseError {
3172            message: format!(
3173                "cannot parse INTERVAL {text:?}; \
3174                     expected `<n> <unit> [<n> <unit> ...]` with units \
3175                     microsecond[s], millisecond[s], second[s], minute[s], \
3176                     hour[s], day[s], week[s], month[s], year[s]"
3177            ),
3178            token_pos: self.pos.saturating_sub(1),
3179        })?;
3180        Ok(Expr::Literal(Literal::Interval {
3181            months,
3182            micros,
3183            text,
3184        }))
3185    }
3186
3187    fn parse_vector_literal_body(&mut self) -> Result<Expr, ParseError> {
3188        let mut elems = Vec::new();
3189        if matches!(self.peek(), Token::RBracket) {
3190            self.advance();
3191            return Ok(Expr::Literal(Literal::Vector(elems)));
3192        }
3193        loop {
3194            let e = self.parse_expr(0)?;
3195            let x = extract_numeric_literal(&e).ok_or_else(|| ParseError {
3196                message: format!("vector element must be a numeric literal, got {e:?}"),
3197                token_pos: self.pos,
3198            })?;
3199            elems.push(x);
3200            match self.peek() {
3201                Token::Comma => {
3202                    self.advance();
3203                }
3204                Token::RBracket => {
3205                    self.advance();
3206                    break;
3207                }
3208                other => {
3209                    return Err(self.err(format!("expected ',' or ']' in vector, got {other:?}")));
3210                }
3211            }
3212        }
3213        Ok(Expr::Literal(Literal::Vector(elems)))
3214    }
3215
3216    /// Atom that started with an identifier: could be `t.col`, `col`, or
3217    /// `func(arg, ...)`. Detect each shape by looking at the next token.
3218    /// v4.12: parse `(PARTITION BY expr, ... ORDER BY expr [DESC]
3219    /// [, ...])`. Caller has already consumed `OVER`. Either clause
3220    /// is optional; an empty `()` is also legal (PG semantics).
3221    /// v6.4.2 — consume an optional `IGNORE NULLS` / `RESPECT NULLS`
3222    /// modifier between `name(args)` and `OVER (...)`. Default is
3223    /// `Respect`. Unrecognised idents leave the stream unchanged.
3224    fn parse_null_treatment_modifier(&mut self) -> NullTreatment {
3225        let Token::Ident(s) = self.peek().clone() else {
3226            return NullTreatment::Respect;
3227        };
3228        let is_ignore = s.eq_ignore_ascii_case("ignore");
3229        let is_respect = s.eq_ignore_ascii_case("respect");
3230        if !is_ignore && !is_respect {
3231            return NullTreatment::Respect;
3232        }
3233        // Lookahead for NULLS — only consume both tokens together.
3234        // pos+1 must hold a "nulls" ident.
3235        if self.pos + 1 < self.tokens.len()
3236            && let Token::Ident(s2) = &self.tokens[self.pos + 1]
3237            && s2.eq_ignore_ascii_case("nulls")
3238        {
3239            self.advance();
3240            self.advance();
3241            return if is_ignore {
3242                NullTreatment::Ignore
3243            } else {
3244                NullTreatment::Respect
3245            };
3246        }
3247        NullTreatment::Respect
3248    }
3249
3250    /// No frame clause is supported.
3251    #[allow(clippy::type_complexity)] // (partitions, ordered-keys-with-desc) is the natural shape
3252    fn parse_over_clause(
3253        &mut self,
3254    ) -> Result<(Vec<Expr>, Vec<(Expr, bool)>, Option<WindowFrame>), ParseError> {
3255        if !matches!(self.peek(), Token::LParen) {
3256            return Err(self.err(format!("expected '(' after OVER, got {:?}", self.peek())));
3257        }
3258        self.advance();
3259        let mut partition_by = Vec::new();
3260        let mut order_by = Vec::new();
3261        // PARTITION BY ?
3262        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
3263            && s.eq_ignore_ascii_case("partition")
3264        {
3265            self.advance();
3266            if !matches!(self.peek(), Token::By) {
3267                return Err(self.err(format!(
3268                    "expected BY after PARTITION, got {:?}",
3269                    self.peek()
3270                )));
3271            }
3272            self.advance();
3273            loop {
3274                partition_by.push(self.parse_expr(0)?);
3275                if matches!(self.peek(), Token::Comma) {
3276                    self.advance();
3277                    continue;
3278                }
3279                break;
3280            }
3281        }
3282        // ORDER BY ?
3283        if matches!(self.peek(), Token::Order) {
3284            self.advance();
3285            if !matches!(self.peek(), Token::By) {
3286                return Err(self.err(format!("expected BY after ORDER, got {:?}", self.peek())));
3287            }
3288            self.advance();
3289            loop {
3290                let e = self.parse_expr(0)?;
3291                let desc = if matches!(self.peek(), Token::Desc) {
3292                    self.advance();
3293                    true
3294                } else if matches!(self.peek(), Token::Asc) {
3295                    self.advance();
3296                    false
3297                } else {
3298                    false
3299                };
3300                order_by.push((e, desc));
3301                if matches!(self.peek(), Token::Comma) {
3302                    self.advance();
3303                    continue;
3304                }
3305                break;
3306            }
3307        }
3308        // v4.20: optional explicit frame, `ROWS ...` / `RANGE ...`.
3309        // Both keywords come through the lexer as identifiers; match
3310        // case-insensitively.
3311        let mut frame: Option<WindowFrame> = None;
3312        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek() {
3313            let kind = if s.eq_ignore_ascii_case("rows") {
3314                Some(FrameKind::Rows)
3315            } else if s.eq_ignore_ascii_case("range") {
3316                Some(FrameKind::Range)
3317            } else {
3318                None
3319            };
3320            if let Some(kind) = kind {
3321                self.advance();
3322                frame = Some(self.parse_frame_tail(kind)?);
3323            }
3324        }
3325        if !matches!(self.peek(), Token::RParen) {
3326            return Err(self.err(format!(
3327                "expected ')' to close OVER clause, got {:?}",
3328                self.peek()
3329            )));
3330        }
3331        self.advance();
3332        Ok((partition_by, order_by, frame))
3333    }
3334
3335    /// v4.20: parse the tail of an explicit frame, given the `ROWS`
3336    /// or `RANGE` keyword was just consumed. Accepts both
3337    /// `BETWEEN <bound> AND <bound>` and the single-bound shorthand
3338    /// (`ROWS UNBOUNDED PRECEDING`, `ROWS 5 PRECEDING`, etc.) which
3339    /// PG normalises to `BETWEEN <bound> AND CURRENT ROW`.
3340    fn parse_frame_tail(&mut self, kind: FrameKind) -> Result<WindowFrame, ParseError> {
3341        if matches!(self.peek(), Token::Between) {
3342            self.advance();
3343            let start = self.parse_frame_bound()?;
3344            if !matches!(self.peek(), Token::And) {
3345                return Err(self.err(format!("expected AND in frame spec, got {:?}", self.peek())));
3346            }
3347            self.advance();
3348            let end = self.parse_frame_bound()?;
3349            Ok(WindowFrame {
3350                kind,
3351                start,
3352                end: Some(end),
3353            })
3354        } else {
3355            let start = self.parse_frame_bound()?;
3356            Ok(WindowFrame {
3357                kind,
3358                start,
3359                end: None,
3360            })
3361        }
3362    }
3363
3364    /// Parse one frame bound: `UNBOUNDED PRECEDING`, `<n> PRECEDING`,
3365    /// `CURRENT ROW`, `<n> FOLLOWING`, `UNBOUNDED FOLLOWING`.
3366    fn parse_frame_bound(&mut self) -> Result<FrameBound, ParseError> {
3367        // Number-led: "<n> PRECEDING" / "<n> FOLLOWING".
3368        if let Token::Integer(n) = *self.peek() {
3369            self.advance();
3370            let n: u64 = u64::try_from(n).map_err(|_| {
3371                self.err(format!(
3372                    "invalid frame offset {n} — expected non-negative integer"
3373                ))
3374            })?;
3375            let dir = self.expect_ident_like()?;
3376            return if dir.eq_ignore_ascii_case("preceding") {
3377                Ok(FrameBound::OffsetPreceding(n))
3378            } else if dir.eq_ignore_ascii_case("following") {
3379                Ok(FrameBound::OffsetFollowing(n))
3380            } else {
3381                Err(self.err(format!(
3382                    "expected PRECEDING or FOLLOWING after offset, got {dir:?}"
3383                )))
3384            };
3385        }
3386        let first = self.expect_ident_like()?;
3387        if first.eq_ignore_ascii_case("unbounded") {
3388            let dir = self.expect_ident_like()?;
3389            return if dir.eq_ignore_ascii_case("preceding") {
3390                Ok(FrameBound::UnboundedPreceding)
3391            } else if dir.eq_ignore_ascii_case("following") {
3392                Ok(FrameBound::UnboundedFollowing)
3393            } else {
3394                Err(self.err(format!(
3395                    "expected PRECEDING or FOLLOWING after UNBOUNDED, got {dir:?}"
3396                )))
3397            };
3398        }
3399        if first.eq_ignore_ascii_case("current") {
3400            let row = self.expect_ident_like()?;
3401            if !row.eq_ignore_ascii_case("row") {
3402                return Err(self.err(format!("expected ROW after CURRENT, got {row:?}")));
3403            }
3404            return Ok(FrameBound::CurrentRow);
3405        }
3406        Err(self.err(format!(
3407            "expected frame bound (UNBOUNDED/CURRENT/<n>), got {first:?}"
3408        )))
3409    }
3410
3411    fn finish_ident_atom(&mut self, first: String) -> Result<Expr, ParseError> {
3412        if matches!(self.peek(), Token::Dot) {
3413            self.advance();
3414            let name = self.expect_ident_like()?;
3415            return Ok(Expr::Column(ColumnName {
3416                qualifier: Some(first),
3417                name,
3418            }));
3419        }
3420        if matches!(self.peek(), Token::LParen) {
3421            self.advance();
3422            // `COUNT(*)` — special-cased here because `*` isn't a normal
3423            // expression token. Lower-case match on `first` since the lexer
3424            // folds identifiers.
3425            if first.eq_ignore_ascii_case("count") && matches!(self.peek(), Token::Star) {
3426                self.advance();
3427                if !matches!(self.peek(), Token::RParen) {
3428                    return Err(self.err(format!(
3429                        "expected ')' after COUNT(*), got {:?}",
3430                        self.peek()
3431                    )));
3432                }
3433                self.advance();
3434                // v4.12: COUNT(*) OVER (...) — same window tail.
3435                let null_treatment = self.parse_null_treatment_modifier();
3436                if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
3437                    && s.eq_ignore_ascii_case("over")
3438                {
3439                    self.advance();
3440                    let (partition_by, order_by, frame) = self.parse_over_clause()?;
3441                    return Ok(Expr::WindowFunction {
3442                        name: "count_star".into(),
3443                        args: Vec::new(),
3444                        partition_by,
3445                        order_by,
3446                        frame,
3447                        null_treatment,
3448                    });
3449                }
3450                return Ok(Expr::FunctionCall {
3451                    name: "count_star".into(),
3452                    args: Vec::new(),
3453                });
3454            }
3455            // Function call. PG-style: zero-or-more comma-separated args.
3456            let mut args = Vec::new();
3457            if !matches!(self.peek(), Token::RParen) {
3458                loop {
3459                    args.push(self.parse_expr(0)?);
3460                    match self.peek() {
3461                        Token::Comma => {
3462                            self.advance();
3463                        }
3464                        Token::RParen => break,
3465                        other => {
3466                            return Err(self.err(format!(
3467                                "expected ',' or ')' in function args, got {other:?}"
3468                            )));
3469                        }
3470                    }
3471                }
3472            }
3473            self.advance(); // consume ')'
3474            // v4.12: window-function tail — `name(args) OVER (...)`.
3475            // Promotes the just-parsed FunctionCall into a
3476            // WindowFunction node carrying partition + order.
3477            // v6.4.2: also accepts `name(args) IGNORE NULLS OVER (...)`
3478            // / `RESPECT NULLS OVER (...)` between the closing paren
3479            // and `OVER`.
3480            let null_treatment = self.parse_null_treatment_modifier();
3481            if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
3482                && s.eq_ignore_ascii_case("over")
3483            {
3484                self.advance();
3485                let (partition_by, order_by, frame) = self.parse_over_clause()?;
3486                return Ok(Expr::WindowFunction {
3487                    name: first,
3488                    args,
3489                    partition_by,
3490                    order_by,
3491                    frame,
3492                    null_treatment,
3493                });
3494            }
3495            return Ok(Expr::FunctionCall { name: first, args });
3496        }
3497        // v7.9.20 — SQL-standard parenless keyword expressions
3498        // (PG treats these as functions called without parens).
3499        // Resolve to a synthetic FunctionCall so the engine's
3500        // eval path reuses the existing function-call routing.
3501        // mailrs G3.
3502        let lc = first.to_ascii_lowercase();
3503        if matches!(
3504            lc.as_str(),
3505            "current_date" | "current_time" | "current_timestamp" | "localtimestamp" | "localtime"
3506        ) {
3507            return Ok(Expr::FunctionCall {
3508                name: lc,
3509                args: Vec::new(),
3510            });
3511        }
3512        Ok(Expr::Column(ColumnName {
3513            qualifier: None,
3514            name: first,
3515        }))
3516    }
3517}
3518
3519/// v6.8.2 — walk an expression tree and return the first column
3520/// reference's bare name. Used by `parse_create_index_stmt_after_create`
3521/// to derive `CreateIndexStatement.column` from an expression
3522/// key (so downstream planner code resolving a primary column
3523/// position keeps working with expression indexes). Returns
3524/// `None` when the expression has no column ref at all — caller
3525/// surfaces that as a parse error.
3526fn extract_first_column(expr: &Expr) -> Option<String> {
3527    match expr {
3528        Expr::Column(cn) => Some(cn.name.clone()),
3529        Expr::FunctionCall { args, .. } => args.iter().find_map(extract_first_column),
3530        Expr::Binary { lhs, rhs, .. } => {
3531            extract_first_column(lhs).or_else(|| extract_first_column(rhs))
3532        }
3533        Expr::Unary { expr: e, .. } => extract_first_column(e),
3534        _ => None,
3535    }
3536}
3537
3538fn maybe_not(expr: Expr, negated: bool) -> Expr {
3539    if negated {
3540        Expr::Unary {
3541            op: UnOp::Not,
3542            expr: Box::new(expr),
3543        }
3544    } else {
3545        expr
3546    }
3547}
3548
3549fn binop_from(tok: &Token) -> Option<(BinOp, u8)> {
3550    let pair = match tok {
3551        Token::Or => (BinOp::Or, 1),
3552        Token::And => (BinOp::And, 2),
3553        Token::Eq => (BinOp::Eq, 4),
3554        Token::NotEq => (BinOp::NotEq, 4),
3555        Token::Lt => (BinOp::Lt, 4),
3556        Token::LtEq => (BinOp::LtEq, 4),
3557        Token::Gt => (BinOp::Gt, 4),
3558        Token::GtEq => (BinOp::GtEq, 4),
3559        // pgvector distance ops all sit on the same rung — tighter than
3560        // comparisons (4) so `col <-> v < threshold` parses correctly.
3561        Token::L2Distance => (BinOp::L2Distance, 5),
3562        Token::InnerProduct => (BinOp::InnerProduct, 5),
3563        Token::CosineDistance => (BinOp::CosineDistance, 5),
3564        Token::Plus => (BinOp::Add, 6),
3565        Token::Minus => (BinOp::Sub, 6),
3566        // `||` sits beside `+`/`-` (matches PG conceptually — concat groups
3567        // by the same level as binary additive arithmetic).
3568        Token::Concat => (BinOp::Concat, 6),
3569        Token::Star => (BinOp::Mul, 7),
3570        Token::Slash => (BinOp::Div, 7),
3571        // v4.14: JSON path ops bind tighter than comparisons (4)
3572        // and additive (6) so `doc->'k' = 'v'` parses correctly.
3573        // Same rung as the multiplicative ops.
3574        Token::JsonGet => (BinOp::JsonGet, 7),
3575        Token::JsonGetText => (BinOp::JsonGetText, 7),
3576        Token::JsonGetPath => (BinOp::JsonGetPath, 7),
3577        Token::JsonGetPathText => (BinOp::JsonGetPathText, 7),
3578        Token::JsonContains => (BinOp::JsonContains, 7),
3579        _ => return None,
3580    };
3581    Some(pair)
3582}
3583
3584#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
3585// `as f32` here is intentional: vector elements widen / narrow into f32 on
3586// purpose. i64 → f32 loses precision past 2^24, f64 → f32 loses precision
3587// past ~15 decimal digits — both are acceptable for a fixed-precision
3588// pgvector column.
3589fn extract_numeric_literal(e: &Expr) -> Option<f32> {
3590    match e {
3591        Expr::Literal(Literal::Integer(n)) => Some(*n as f32),
3592        Expr::Literal(Literal::Float(x)) => Some(*x as f32),
3593        Expr::Unary {
3594            op: UnOp::Neg,
3595            expr,
3596        } => extract_numeric_literal(expr).map(|x| -x),
3597        _ => None,
3598    }
3599}
3600
3601/// Parse the text inside `INTERVAL '...'` into `(months, micros)`. Accepts
3602/// one or more `<n> <unit>` pairs separated by whitespace. `<n>` may be
3603/// negative. Returns `None` if any pair fails to parse or no pair is found.
3604///
3605/// Recognised units (case-insensitive, optional trailing `s`):
3606/// `microsecond`, `millisecond`, `second`, `minute`, `hour`, `day`, `week`,
3607/// `month`, `year`. `week` widens to 7 days; `year` widens to 12 months.
3608pub fn parse_interval_text(s: &str) -> Option<(i32, i64)> {
3609    let parts: Vec<&str> = s.split_whitespace().collect();
3610    if parts.is_empty() || !parts.len().is_multiple_of(2) {
3611        return None;
3612    }
3613    let mut months: i32 = 0;
3614    let mut micros: i64 = 0;
3615    let mut i = 0;
3616    while i < parts.len() {
3617        let n: i64 = parts[i].parse().ok()?;
3618        let unit = parts[i + 1].to_ascii_lowercase();
3619        let unit_stripped = unit.strip_suffix('s').unwrap_or(&unit);
3620        match unit_stripped {
3621            "microsecond" => micros = micros.checked_add(n)?,
3622            "millisecond" => micros = micros.checked_add(n.checked_mul(1_000)?)?,
3623            "second" => micros = micros.checked_add(n.checked_mul(1_000_000)?)?,
3624            "minute" => micros = micros.checked_add(n.checked_mul(60_000_000)?)?,
3625            "hour" => micros = micros.checked_add(n.checked_mul(3_600_000_000)?)?,
3626            "day" => micros = micros.checked_add(n.checked_mul(86_400_000_000)?)?,
3627            "week" => micros = micros.checked_add(n.checked_mul(604_800_000_000)?)?,
3628            "month" => {
3629                let n32 = i32::try_from(n).ok()?;
3630                months = months.checked_add(n32)?;
3631            }
3632            "year" => {
3633                let n32 = i32::try_from(n).ok()?;
3634                months = months.checked_add(n32.checked_mul(12)?)?;
3635            }
3636            _ => return None,
3637        }
3638        i += 2;
3639    }
3640    Some((months, micros))
3641}
3642
3643#[cfg(test)]
3644mod tests {
3645    use super::*;
3646    use alloc::string::ToString;
3647
3648    fn parse(s: &str) -> Statement {
3649        parse_statement(s).expect("parse ok")
3650    }
3651
3652    fn lit_int(n: i64) -> Expr {
3653        Expr::Literal(Literal::Integer(n))
3654    }
3655
3656    fn col(name: &str) -> Expr {
3657        Expr::Column(ColumnName {
3658            qualifier: None,
3659            name: name.into(),
3660        })
3661    }
3662
3663    #[test]
3664    fn select_single_integer() {
3665        let s = parse("SELECT 1");
3666        let Statement::Select(s) = s else {
3667            panic!("expected SELECT")
3668        };
3669        assert_eq!(s.items.len(), 1);
3670        assert!(s.from.is_none());
3671        assert!(s.where_.is_none());
3672    }
3673
3674    #[test]
3675    fn select_multiple_literal_kinds() {
3676        let s = parse("SELECT 1, 'hi', NULL, TRUE, 1.5");
3677        let Statement::Select(s) = s else {
3678            panic!("expected SELECT")
3679        };
3680        assert_eq!(s.items.len(), 5);
3681    }
3682
3683    #[test]
3684    fn select_wildcard_from_table() {
3685        let s = parse("SELECT * FROM users");
3686        let Statement::Select(s) = s else {
3687            panic!("expected SELECT")
3688        };
3689        assert!(matches!(s.items[..], [SelectItem::Wildcard]));
3690        assert_eq!(s.from.as_ref().unwrap().primary.name, "users");
3691    }
3692
3693    #[test]
3694    fn select_with_table_alias() {
3695        let s = parse("SELECT * FROM users AS u");
3696        let Statement::Select(s) = s else {
3697            panic!("expected SELECT")
3698        };
3699        let t = &s.from.as_ref().unwrap().primary;
3700        assert_eq!(t.name, "users");
3701        assert_eq!(t.alias.as_deref(), Some("u"));
3702    }
3703
3704    #[test]
3705    fn select_with_where_eq() {
3706        let s = parse("SELECT a FROM t WHERE a = 1");
3707        let Statement::Select(s) = s else {
3708            panic!("expected SELECT")
3709        };
3710        let w = s.where_.unwrap();
3711        assert_eq!(
3712            w,
3713            Expr::Binary {
3714                lhs: Box::new(col("a")),
3715                op: BinOp::Eq,
3716                rhs: Box::new(lit_int(1)),
3717            }
3718        );
3719    }
3720
3721    #[test]
3722    fn arithmetic_precedence() {
3723        let s = parse("SELECT 1 + 2 * 3");
3724        let Statement::Select(s) = s else {
3725            panic!("expected SELECT")
3726        };
3727        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3728            panic!("wildcard?")
3729        };
3730        assert_eq!(
3731            expr,
3732            &Expr::Binary {
3733                lhs: Box::new(lit_int(1)),
3734                op: BinOp::Add,
3735                rhs: Box::new(Expr::Binary {
3736                    lhs: Box::new(lit_int(2)),
3737                    op: BinOp::Mul,
3738                    rhs: Box::new(lit_int(3)),
3739                }),
3740            }
3741        );
3742    }
3743
3744    #[test]
3745    fn parentheses_override_precedence() {
3746        let s = parse("SELECT (1 + 2) * 3");
3747        let Statement::Select(s) = s else {
3748            panic!("expected SELECT")
3749        };
3750        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3751            panic!()
3752        };
3753        assert_eq!(
3754            expr,
3755            &Expr::Binary {
3756                lhs: Box::new(Expr::Binary {
3757                    lhs: Box::new(lit_int(1)),
3758                    op: BinOp::Add,
3759                    rhs: Box::new(lit_int(2)),
3760                }),
3761                op: BinOp::Mul,
3762                rhs: Box::new(lit_int(3)),
3763            }
3764        );
3765    }
3766
3767    #[test]
3768    fn not_binds_below_comparison() {
3769        // `NOT a = 1` should parse as `NOT (a = 1)`.
3770        let s = parse("SELECT NOT a = 1 FROM t");
3771        let Statement::Select(s) = s else {
3772            panic!("expected SELECT")
3773        };
3774        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3775            panic!()
3776        };
3777        assert_eq!(
3778            expr,
3779            &Expr::Unary {
3780                op: UnOp::Not,
3781                expr: Box::new(Expr::Binary {
3782                    lhs: Box::new(col("a")),
3783                    op: BinOp::Eq,
3784                    rhs: Box::new(lit_int(1)),
3785                }),
3786            }
3787        );
3788    }
3789
3790    #[test]
3791    fn unary_minus_binds_above_multiplication() {
3792        // `-a * 2` should be `(-a) * 2`.
3793        let s = parse("SELECT -a * 2 FROM t");
3794        let Statement::Select(s) = s else {
3795            panic!("expected SELECT")
3796        };
3797        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3798            panic!()
3799        };
3800        assert_eq!(
3801            expr,
3802            &Expr::Binary {
3803                lhs: Box::new(Expr::Unary {
3804                    op: UnOp::Neg,
3805                    expr: Box::new(col("a")),
3806                }),
3807                op: BinOp::Mul,
3808                rhs: Box::new(lit_int(2)),
3809            }
3810        );
3811    }
3812
3813    #[test]
3814    fn qualified_column() {
3815        let s = parse("SELECT t.col FROM t");
3816        let Statement::Select(s) = s else {
3817            panic!("expected SELECT")
3818        };
3819        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3820            panic!()
3821        };
3822        assert_eq!(
3823            expr,
3824            &Expr::Column(ColumnName {
3825                qualifier: Some("t".into()),
3826                name: "col".into()
3827            })
3828        );
3829    }
3830
3831    #[test]
3832    fn select_item_alias_with_as() {
3833        let s = parse("SELECT a AS y FROM t");
3834        let Statement::Select(s) = s else {
3835            panic!("expected SELECT")
3836        };
3837        let SelectItem::Expr { alias, .. } = &s.items[0] else {
3838            panic!()
3839        };
3840        assert_eq!(alias.as_deref(), Some("y"));
3841    }
3842
3843    #[test]
3844    fn trailing_semicolon_accepted() {
3845        let s = parse("SELECT 1;");
3846        let Statement::Select(s) = s else {
3847            panic!("expected SELECT")
3848        };
3849        assert_eq!(s.items.len(), 1);
3850    }
3851
3852    #[test]
3853    fn boolean_chain_with_and_or_not() {
3854        // (NOT a) OR (b AND (NOT c))
3855        let s = parse("SELECT NOT a OR b AND NOT c FROM t");
3856        let Statement::Select(s) = s else {
3857            panic!("expected SELECT")
3858        };
3859        let SelectItem::Expr { expr, .. } = &s.items[0] else {
3860            panic!()
3861        };
3862        let expected = Expr::Binary {
3863            lhs: Box::new(Expr::Unary {
3864                op: UnOp::Not,
3865                expr: Box::new(col("a")),
3866            }),
3867            op: BinOp::Or,
3868            rhs: Box::new(Expr::Binary {
3869                lhs: Box::new(col("b")),
3870                op: BinOp::And,
3871                rhs: Box::new(Expr::Unary {
3872                    op: UnOp::Not,
3873                    expr: Box::new(col("c")),
3874                }),
3875            }),
3876        };
3877        assert_eq!(expr, &expected);
3878    }
3879
3880    #[test]
3881    fn empty_input_errors() {
3882        let err = parse_statement("").unwrap_err();
3883        assert!(err.message.contains("SELECT"));
3884    }
3885
3886    #[test]
3887    fn unmatched_paren_errors() {
3888        assert!(parse_statement("SELECT (1 + 2").is_err());
3889    }
3890
3891    #[test]
3892    fn display_round_trip_simple_select() {
3893        let original = parse("SELECT a + 1 FROM t WHERE a > 0");
3894        let text = original.to_string();
3895        let again = parse_statement(&text).expect("re-parse");
3896        assert_eq!(original, again);
3897    }
3898
3899    // --- CREATE TABLE & INSERT (v0.3) ---------------------------------------
3900
3901    #[test]
3902    fn create_table_single_column() {
3903        let s = parse("CREATE TABLE foo (a INT)");
3904        let Statement::CreateTable(c) = s else {
3905            panic!("expected CreateTable")
3906        };
3907        assert_eq!(c.name, "foo");
3908        assert_eq!(c.columns.len(), 1);
3909        assert_eq!(c.columns[0].name, "a");
3910        assert_eq!(c.columns[0].ty, ColumnTypeName::Int);
3911        assert!(c.columns[0].nullable);
3912    }
3913
3914    #[test]
3915    fn create_table_multi_column_with_not_null_mix() {
3916        let s = parse("CREATE TABLE u (id INT NOT NULL, name TEXT, score FLOAT NOT NULL, ok BOOL)");
3917        let Statement::CreateTable(c) = s else {
3918            panic!()
3919        };
3920        assert_eq!(c.columns.len(), 4);
3921        assert_eq!(c.columns[0].ty, ColumnTypeName::Int);
3922        assert!(!c.columns[0].nullable);
3923        assert_eq!(c.columns[1].ty, ColumnTypeName::Text);
3924        assert!(c.columns[1].nullable);
3925        assert_eq!(c.columns[2].ty, ColumnTypeName::Float);
3926        assert!(!c.columns[2].nullable);
3927        assert_eq!(c.columns[3].ty, ColumnTypeName::Bool);
3928    }
3929
3930    #[test]
3931    fn create_table_bigint_supported() {
3932        let s = parse("CREATE TABLE accounts (id BIGINT NOT NULL)");
3933        let Statement::CreateTable(c) = s else {
3934            panic!()
3935        };
3936        assert_eq!(c.columns[0].ty, ColumnTypeName::BigInt);
3937    }
3938
3939    #[test]
3940    fn create_table_vector_default_is_f32() {
3941        let s = parse("CREATE TABLE t (v VECTOR(128))");
3942        let Statement::CreateTable(c) = s else {
3943            panic!()
3944        };
3945        assert_eq!(
3946            c.columns[0].ty,
3947            ColumnTypeName::Vector {
3948                dim: 128,
3949                encoding: VecEncoding::F32,
3950            },
3951        );
3952    }
3953
3954    #[test]
3955    fn create_table_vector_using_sq8() {
3956        // v6.0.1: `USING SQ8` selects scalar-quantised encoding.
3957        // Case-insensitive on both `USING` and the encoding name.
3958        for sql in [
3959            "CREATE TABLE t (v VECTOR(128) USING SQ8)",
3960            "CREATE TABLE t (v VECTOR(128) using sq8)",
3961        ] {
3962            let s = parse(sql);
3963            let Statement::CreateTable(c) = s else {
3964                panic!()
3965            };
3966            assert_eq!(
3967                c.columns[0].ty,
3968                ColumnTypeName::Vector {
3969                    dim: 128,
3970                    encoding: VecEncoding::Sq8,
3971                },
3972                "{sql}",
3973            );
3974        }
3975    }
3976
3977    #[test]
3978    fn create_table_vector_using_unknown_errors() {
3979        let err = parse_statement("CREATE TABLE t (v VECTOR(8) USING PQ8)").unwrap_err();
3980        assert!(
3981            err.message.contains("unknown vector encoding"),
3982            "got: {}",
3983            err.message
3984        );
3985    }
3986
3987    #[test]
3988    fn vector_using_sq8_display_roundtrips() {
3989        // The Display impl must produce text that re-parses to the
3990        // same AST. Guard for the v6.0.1 `USING SQ8` suffix.
3991        let s = parse("CREATE TABLE t (v VECTOR(64) USING SQ8)");
3992        let Statement::CreateTable(c) = s else {
3993            panic!()
3994        };
3995        assert_eq!(c.columns[0].ty.to_string(), "VECTOR(64) USING SQ8");
3996    }
3997
3998    #[test]
3999    fn parser_recognises_placeholders() {
4000        use crate::ast::{Expr, SelectItem, Statement};
4001        // $N in expression position parses as Expr::Placeholder(N).
4002        let s = parse("SELECT $1, $2 + 1 FROM t WHERE x = $3");
4003        let Statement::Select(sel) = s else { panic!() };
4004        assert!(matches!(
4005            sel.items[0],
4006            SelectItem::Expr {
4007                expr: Expr::Placeholder(1),
4008                alias: None
4009            }
4010        ));
4011        // $2 + 1
4012        let SelectItem::Expr {
4013            expr: Expr::Binary { lhs, rhs, .. },
4014            ..
4015        } = &sel.items[1]
4016        else {
4017            panic!()
4018        };
4019        assert!(matches!(**lhs, Expr::Placeholder(2)));
4020        assert!(matches!(**rhs, Expr::Literal(Literal::Integer(1))));
4021        // WHERE x = $3
4022        let Some(Expr::Binary { rhs, .. }) = sel.where_.as_ref() else {
4023            panic!()
4024        };
4025        assert!(matches!(**rhs, Expr::Placeholder(3)));
4026    }
4027
4028    #[test]
4029    fn parser_rejects_dollar_zero() {
4030        // $0 is not valid in PG; the lexer rejects it.
4031        assert!(parse_statement("SELECT $0").is_err());
4032    }
4033
4034    #[test]
4035    fn placeholder_display_roundtrips() {
4036        // The Display impl must produce text that re-lexes to the
4037        // same Placeholder token.
4038        let s = parse("SELECT $42 FROM t");
4039        let printed = s.to_string();
4040        assert!(printed.contains("$42"));
4041        let again = parse(&printed);
4042        assert_eq!(s, again);
4043    }
4044
4045    #[test]
4046    fn alter_index_rebuild_bare() {
4047        use crate::ast::{AlterIndexTarget, Statement};
4048        let s = parse("ALTER INDEX my_idx REBUILD");
4049        let Statement::AlterIndex(a) = s else {
4050            panic!("expected AlterIndex, got {s:?}")
4051        };
4052        assert_eq!(a.name, "my_idx");
4053        assert_eq!(a.target, AlterIndexTarget::Rebuild { encoding: None });
4054    }
4055
4056    #[test]
4057    fn alter_index_rebuild_with_encoding() {
4058        use crate::ast::{AlterIndexTarget, Statement};
4059        for (sql, want) in [
4060            (
4061                "ALTER INDEX my_idx REBUILD WITH (encoding = F32)",
4062                VecEncoding::F32,
4063            ),
4064            (
4065                "ALTER INDEX my_idx REBUILD WITH (encoding = sq8)",
4066                VecEncoding::Sq8,
4067            ),
4068            (
4069                "ALTER INDEX my_idx REBUILD WITH (encoding = HALF)",
4070                VecEncoding::F16,
4071            ),
4072        ] {
4073            let s = parse(sql);
4074            let Statement::AlterIndex(a) = s else {
4075                panic!("{sql}: expected AlterIndex")
4076            };
4077            assert_eq!(a.name, "my_idx");
4078            assert_eq!(
4079                a.target,
4080                AlterIndexTarget::Rebuild {
4081                    encoding: Some(want)
4082                },
4083                "{sql}"
4084            );
4085        }
4086    }
4087
4088    #[test]
4089    fn alter_index_rebuild_unknown_encoding_errors() {
4090        let err = parse_statement("ALTER INDEX my_idx REBUILD WITH (encoding = PQ8)").unwrap_err();
4091        assert!(
4092            err.message.contains("unknown vector encoding"),
4093            "got: {}",
4094            err.message
4095        );
4096    }
4097
4098    #[test]
4099    fn alter_index_rebuild_display_roundtrips() {
4100        for (input, want) in [
4101            ("ALTER INDEX my_idx REBUILD", "ALTER INDEX my_idx REBUILD"),
4102            (
4103                "ALTER INDEX my_idx REBUILD WITH (encoding = SQ8)",
4104                "ALTER INDEX my_idx REBUILD WITH (encoding = SQ8)",
4105            ),
4106            (
4107                "ALTER INDEX my_idx REBUILD WITH (encoding = HALF)",
4108                "ALTER INDEX my_idx REBUILD WITH (encoding = HALF)",
4109            ),
4110        ] {
4111            let s = parse(input);
4112            assert_eq!(s.to_string(), want);
4113        }
4114    }
4115
4116    #[test]
4117    fn create_table_unknown_type_errors() {
4118        // v4.9: JSON is now real; pick an actually unsupported keyword
4119        // (XML never landed and isn't planned).
4120        let err = parse_statement("CREATE TABLE x (a xml)").unwrap_err();
4121        assert!(err.message.contains("unsupported column type"));
4122    }
4123
4124    #[test]
4125    fn create_table_missing_table_keyword_errors() {
4126        assert!(parse_statement("CREATE x (a INT)").is_err());
4127    }
4128
4129    #[test]
4130    fn insert_single_value() {
4131        let s = parse("INSERT INTO foo VALUES (42)");
4132        let Statement::Insert(i) = s else {
4133            panic!("expected Insert")
4134        };
4135        assert_eq!(i.table, "foo");
4136        assert_eq!(i.rows.len(), 1);
4137        assert_eq!(i.rows[0].len(), 1);
4138        assert!(matches!(i.rows[0][0], Expr::Literal(Literal::Integer(42))));
4139    }
4140
4141    #[test]
4142    fn insert_multi_value_with_mixed_literals() {
4143        let s = parse("INSERT INTO foo VALUES (1, 'hi', 3.14, TRUE, NULL)");
4144        let Statement::Insert(i) = s else { panic!() };
4145        assert_eq!(i.rows.len(), 1);
4146        assert_eq!(i.rows[0].len(), 5);
4147    }
4148
4149    #[test]
4150    fn insert_missing_into_errors() {
4151        assert!(parse_statement("INSERT foo VALUES (1)").is_err());
4152    }
4153
4154    #[test]
4155    fn create_table_round_trip() {
4156        let original =
4157            parse("CREATE TABLE foo (id BIGINT NOT NULL, label TEXT, score FLOAT NOT NULL)");
4158        let text = original.to_string();
4159        let again = parse_statement(&text).expect("re-parse");
4160        assert_eq!(original, again);
4161    }
4162
4163    #[test]
4164    fn insert_round_trip_with_negation_and_string() {
4165        let original = parse("INSERT INTO t VALUES (-1, 'it''s', NULL)");
4166        let text = original.to_string();
4167        let again = parse_statement(&text).expect("re-parse");
4168        assert_eq!(original, again);
4169    }
4170
4171    #[test]
4172    fn unknown_keyword_at_statement_start_errors() {
4173        // v4.4: UPDATE is real SQL now. Use a fabricated keyword so
4174        // the top-level dispatch still has no branch to take.
4175        let err = parse_statement("FROBNICATE foo SET x = 1").unwrap_err();
4176        assert!(err.message.contains("expected SELECT"));
4177    }
4178
4179    // --- v0.8 CREATE INDEX --------------------------------------------------
4180
4181    #[test]
4182    fn create_index_basic() {
4183        let s = parse("CREATE INDEX idx_id ON users (id)");
4184        let Statement::CreateIndex(c) = s else {
4185            panic!("expected CreateIndex")
4186        };
4187        assert_eq!(c.name, "idx_id");
4188        assert_eq!(c.table, "users");
4189        assert_eq!(c.column, "id");
4190    }
4191
4192    #[test]
4193    fn create_index_missing_on_errors() {
4194        assert!(parse_statement("CREATE INDEX foo users (id)").is_err());
4195    }
4196
4197    #[test]
4198    fn create_index_missing_paren_errors() {
4199        assert!(parse_statement("CREATE INDEX foo ON users id").is_err());
4200    }
4201
4202    #[test]
4203    fn create_index_round_trip() {
4204        let original = parse("CREATE INDEX by_name ON users (name)");
4205        let again = parse_statement(&original.to_string()).unwrap();
4206        assert_eq!(original, again);
4207    }
4208
4209    // --- v7.9.29 CREATE UNIQUE INDEX [WHERE pred] (mailrs K1) -------------
4210
4211    #[test]
4212    fn create_unique_index_basic() {
4213        let s = parse("CREATE UNIQUE INDEX uq_x ON t (a)");
4214        let Statement::CreateIndex(c) = s else {
4215            panic!("expected CreateIndex");
4216        };
4217        assert!(c.is_unique);
4218        assert_eq!(c.column, "a");
4219        assert!(c.partial_predicate.is_none());
4220    }
4221
4222    #[test]
4223    fn create_unique_index_partial() {
4224        // mailrs's email_templates "one default per user" shape.
4225        let s = parse(
4226            "CREATE UNIQUE INDEX idx_email_templates_user_default \
4227             ON email_templates (user_address) WHERE is_default = true",
4228        );
4229        let Statement::CreateIndex(c) = s else {
4230            panic!("expected CreateIndex");
4231        };
4232        assert!(c.is_unique);
4233        assert_eq!(c.table, "email_templates");
4234        assert_eq!(c.column, "user_address");
4235        assert!(c.partial_predicate.is_some());
4236    }
4237
4238    #[test]
4239    fn create_unique_index_composite_with_predicate() {
4240        // mailrs's calendar_events instance: composite columns.
4241        let s = parse(
4242            "CREATE UNIQUE INDEX uq_calendar_events_instance \
4243             ON calendar_events (calendar_id, uid, recurrence_id) \
4244             WHERE recurrence_id IS NOT NULL",
4245        );
4246        let Statement::CreateIndex(c) = s else {
4247            panic!("expected CreateIndex");
4248        };
4249        assert!(c.is_unique);
4250        assert_eq!(c.column, "calendar_id");
4251        assert_eq!(
4252            c.extra_columns,
4253            vec!["uid".to_string(), "recurrence_id".to_string()]
4254        );
4255        assert!(c.partial_predicate.is_some());
4256    }
4257
4258    #[test]
4259    fn create_unique_index_using_btree_ok() {
4260        let s = parse("CREATE UNIQUE INDEX uq_x ON t USING btree (a)");
4261        assert!(matches!(s, Statement::CreateIndex(ref c) if c.is_unique));
4262    }
4263
4264    #[test]
4265    fn create_unique_index_using_hnsw_rejected() {
4266        let err =
4267            parse_statement("CREATE UNIQUE INDEX uq_v ON t USING hnsw (embedding)").unwrap_err();
4268        assert!(err.message.contains("UNIQUE"), "{}", err.message);
4269    }
4270
4271    #[test]
4272    fn create_unique_index_round_trip() {
4273        let original = parse(
4274            "CREATE UNIQUE INDEX uq_calendar_events_master \
4275             ON calendar_events (calendar_id, uid) WHERE recurrence_id IS NULL",
4276        );
4277        let again = parse_statement(&original.to_string()).unwrap();
4278        assert_eq!(original, again);
4279    }
4280
4281    #[test]
4282    fn create_unique_without_index_errors() {
4283        let err = parse_statement("CREATE UNIQUE TABLE t (a INT)").unwrap_err();
4284        assert!(err.message.contains("INDEX"), "{}", err.message);
4285    }
4286
4287    // --- v7.10.4 BYTES / BYTEA column type (Epic 1) ----------------------
4288
4289    #[test]
4290    fn create_table_bytea_column() {
4291        let s = parse("CREATE TABLE t (id INT NOT NULL, payload BYTEA NOT NULL)");
4292        let Statement::CreateTable(c) = s else {
4293            panic!("expected CreateTable");
4294        };
4295        assert_eq!(c.columns.len(), 2);
4296        assert_eq!(c.columns[1].ty, ColumnTypeName::Bytes);
4297        assert!(!c.columns[1].nullable);
4298    }
4299
4300    #[test]
4301    fn create_table_bytes_alias_column() {
4302        let s = parse("CREATE TABLE t (blob BYTES)");
4303        let Statement::CreateTable(c) = s else {
4304            panic!("expected CreateTable");
4305        };
4306        assert_eq!(c.columns[0].ty, ColumnTypeName::Bytes);
4307    }
4308
4309    #[test]
4310    fn bytea_round_trip_display() {
4311        let original = parse("CREATE TABLE t (a BYTEA NOT NULL)");
4312        let again = parse_statement(&original.to_string()).unwrap();
4313        assert_eq!(original, again);
4314    }
4315
4316    // --- v0.9 transactions -------------------------------------------------
4317
4318    #[test]
4319    fn begin_commit_rollback_parse_as_unit_variants() {
4320        assert_eq!(parse("BEGIN"), Statement::Begin);
4321        assert_eq!(parse("COMMIT"), Statement::Commit);
4322        assert_eq!(parse("ROLLBACK"), Statement::Rollback);
4323        // Trailing semicolons accepted too.
4324        assert_eq!(parse("BEGIN;"), Statement::Begin);
4325    }
4326
4327    // --- v1.2: pgvector distance ops + ::vector cast --------------------
4328
4329    #[test]
4330    fn inner_product_binop_parses() {
4331        let s = parse("SELECT v <#> [1.0, 2.0] FROM t");
4332        let Statement::Select(s) = s else { panic!() };
4333        let SelectItem::Expr { expr, .. } = &s.items[0] else {
4334            panic!()
4335        };
4336        assert!(matches!(
4337            expr,
4338            Expr::Binary {
4339                op: BinOp::InnerProduct,
4340                ..
4341            }
4342        ));
4343    }
4344
4345    #[test]
4346    fn cosine_distance_binop_parses() {
4347        let s = parse("SELECT v <=> [1.0, 2.0] FROM t");
4348        let Statement::Select(s) = s else { panic!() };
4349        let SelectItem::Expr { expr, .. } = &s.items[0] else {
4350            panic!()
4351        };
4352        assert!(matches!(
4353            expr,
4354            Expr::Binary {
4355                op: BinOp::CosineDistance,
4356                ..
4357            }
4358        ));
4359    }
4360
4361    #[test]
4362    fn vector_cast_postfix_wraps_string_literal() {
4363        let s = parse("SELECT '[1,2,3]'::vector FROM t");
4364        let Statement::Select(s) = s else { panic!() };
4365        let SelectItem::Expr { expr, .. } = &s.items[0] else {
4366            panic!()
4367        };
4368        assert!(matches!(
4369            expr,
4370            Expr::Cast {
4371                target: CastTarget::Vector,
4372                ..
4373            }
4374        ));
4375    }
4376
4377    #[test]
4378    fn unsupported_cast_target_errors() {
4379        // `::numeric` isn't in the v1.3 cast target set.
4380        let err = parse_statement("SELECT 1::numeric FROM t").unwrap_err();
4381        assert!(err.message.contains("unsupported cast target"));
4382    }
4383
4384    #[test]
4385    fn tx_statements_round_trip() {
4386        for q in ["BEGIN", "COMMIT", "ROLLBACK"] {
4387            let original = parse(q);
4388            let again = parse_statement(&original.to_string()).unwrap();
4389            assert_eq!(original, again);
4390        }
4391    }
4392
4393    #[test]
4394    fn interval_text_parsing_units() {
4395        // Single unit.
4396        assert_eq!(parse_interval_text("1 day"), Some((0, 86_400_000_000)));
4397        assert_eq!(parse_interval_text("1 second"), Some((0, 1_000_000)));
4398        assert_eq!(parse_interval_text("1 month"), Some((1, 0)));
4399        assert_eq!(parse_interval_text("2 years"), Some((24, 0)));
4400        // Compound spans accumulate.
4401        assert_eq!(parse_interval_text("1 year 6 months"), Some((18, 0)));
4402        assert_eq!(
4403            parse_interval_text("1 day 2 hours"),
4404            Some((0, 86_400_000_000 + 7_200_000_000))
4405        );
4406        // Negative numbers carry through.
4407        assert_eq!(parse_interval_text("-1 day"), Some((0, -86_400_000_000)));
4408        // Bad shapes return None.
4409        assert_eq!(parse_interval_text(""), None);
4410        assert_eq!(parse_interval_text("garbage"), None);
4411        assert_eq!(parse_interval_text("1 fortnight"), None);
4412        assert_eq!(parse_interval_text("1"), None);
4413    }
4414
4415    #[test]
4416    fn interval_literal_roundtrips_via_display() {
4417        let parsed = parse("SELECT INTERVAL '1 day 2 hours'");
4418        let s = parsed.to_string();
4419        // Display preserves the original text verbatim.
4420        assert!(s.contains("INTERVAL '1 day 2 hours'"), "got: {s}");
4421        // And re-parsing yields a structurally equal statement.
4422        let again = parse_statement(&s).unwrap();
4423        assert_eq!(parsed, again);
4424    }
4425
4426    // ── v6.1.2: CREATE / DROP PUBLICATION ────────────────────
4427
4428    #[test]
4429    fn parser_recognises_create_publication_bare() {
4430        let s = parse("CREATE PUBLICATION pub_a");
4431        let Statement::CreatePublication(p) = s else {
4432            panic!("expected CreatePublication, got {s:?}")
4433        };
4434        assert_eq!(p.name, "pub_a");
4435        assert_eq!(p.scope, PublicationScope::AllTables);
4436    }
4437
4438    #[test]
4439    fn parser_recognises_create_publication_for_all_tables() {
4440        let s = parse("CREATE PUBLICATION pub_a FOR ALL TABLES");
4441        let Statement::CreatePublication(p) = s else {
4442            panic!("expected CreatePublication, got {s:?}")
4443        };
4444        assert_eq!(p.name, "pub_a");
4445        assert_eq!(p.scope, PublicationScope::AllTables);
4446    }
4447
4448    #[test]
4449    fn parser_recognises_drop_publication() {
4450        let s = parse("DROP PUBLICATION pub_a");
4451        let Statement::DropPublication(name) = s else {
4452            panic!("expected DropPublication, got {s:?}")
4453        };
4454        assert_eq!(name, "pub_a");
4455    }
4456
4457    #[test]
4458    fn parser_recognises_for_table_list() {
4459        let s = parse("CREATE PUBLICATION pub_a FOR TABLE t1, t2, t3");
4460        let Statement::CreatePublication(p) = s else {
4461            panic!("expected CreatePublication, got {s:?}")
4462        };
4463        assert_eq!(p.name, "pub_a");
4464        let PublicationScope::ForTables(ts) = p.scope else {
4465            panic!("expected ForTables scope")
4466        };
4467        assert_eq!(ts, alloc::vec!["t1", "t2", "t3"]);
4468    }
4469
4470    #[test]
4471    fn parser_recognises_for_tables_plural() {
4472        // PG 19 accepts both `FOR TABLE` and `FOR TABLES` — match.
4473        let s = parse("CREATE PUBLICATION pub_a FOR TABLES t1, t2");
4474        let Statement::CreatePublication(p) = s else {
4475            panic!("expected CreatePublication, got {s:?}")
4476        };
4477        let PublicationScope::ForTables(ts) = p.scope else {
4478            panic!("expected ForTables")
4479        };
4480        assert_eq!(ts, alloc::vec!["t1", "t2"]);
4481    }
4482
4483    #[test]
4484    fn parser_recognises_for_all_tables_except_list() {
4485        let s = parse("CREATE PUBLICATION p FOR ALL TABLES EXCEPT t1, t2");
4486        let Statement::CreatePublication(p) = s else {
4487            panic!()
4488        };
4489        let PublicationScope::AllTablesExcept(ts) = p.scope else {
4490            panic!("expected AllTablesExcept")
4491        };
4492        assert_eq!(ts, alloc::vec!["t1", "t2"]);
4493    }
4494
4495    #[test]
4496    fn parser_rejects_for_table_with_empty_list() {
4497        // `FOR TABLE` with nothing after is a parse error.
4498        let err = parse_statement("CREATE PUBLICATION p FOR TABLE")
4499            .expect_err("must error on empty list");
4500        // No specific message asserted — the call falls through to
4501        // expect_ident_like which yields "expected identifier, got …".
4502        assert!(!err.message.is_empty());
4503    }
4504
4505    #[test]
4506    fn parser_recognises_show_publications() {
4507        // v6.1.3 — SHOW PUBLICATIONS lands here. PUBLICATIONS is a
4508        // bare ident in this position, NOT a reserved keyword.
4509        let s = parse("SHOW PUBLICATIONS");
4510        assert!(matches!(s, Statement::ShowPublications));
4511    }
4512
4513    // ── v6.1.4: CREATE / DROP SUBSCRIPTION + SHOW SUBSCRIPTIONS ─
4514
4515    #[test]
4516    fn parser_recognises_create_subscription_single_publication() {
4517        let s = parse(
4518            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=127.0.0.1 port=20002' PUBLICATION pub_a",
4519        );
4520        let Statement::CreateSubscription(c) = s else {
4521            panic!("expected CreateSubscription, got {s:?}")
4522        };
4523        assert_eq!(c.name, "sub_a");
4524        assert_eq!(c.conn_str, "host=127.0.0.1 port=20002");
4525        assert_eq!(c.publications, alloc::vec!["pub_a"]);
4526    }
4527
4528    #[test]
4529    fn parser_recognises_create_subscription_multi_publication() {
4530        let s = parse("CREATE SUBSCRIPTION sub_a CONNECTION 'host=h' PUBLICATION p1, p2, p3");
4531        let Statement::CreateSubscription(c) = s else {
4532            panic!()
4533        };
4534        assert_eq!(c.publications, alloc::vec!["p1", "p2", "p3"]);
4535    }
4536
4537    #[test]
4538    fn parser_rejects_create_subscription_missing_connection() {
4539        let err = parse_statement("CREATE SUBSCRIPTION s PUBLICATION p")
4540            .expect_err("must error on missing CONNECTION");
4541        assert!(err.message.contains("CONNECTION"), "got: {}", err.message);
4542    }
4543
4544    #[test]
4545    fn parser_rejects_create_subscription_missing_publication() {
4546        let err = parse_statement("CREATE SUBSCRIPTION s CONNECTION 'host=x'")
4547            .expect_err("must error on missing PUBLICATION");
4548        assert!(err.message.contains("PUBLICATION"), "got: {}", err.message);
4549    }
4550
4551    #[test]
4552    fn parser_recognises_drop_subscription() {
4553        let s = parse("DROP SUBSCRIPTION sub_a");
4554        let Statement::DropSubscription(name) = s else {
4555            panic!("expected DropSubscription, got {s:?}")
4556        };
4557        assert_eq!(name, "sub_a");
4558    }
4559
4560    #[test]
4561    fn parser_recognises_show_subscriptions() {
4562        let s = parse("SHOW SUBSCRIPTIONS");
4563        assert!(matches!(s, Statement::ShowSubscriptions));
4564    }
4565
4566    #[test]
4567    fn parser_recognises_wait_for_wal_position_no_timeout() {
4568        let s = parse("WAIT FOR WAL POSITION 12345");
4569        let Statement::WaitForWalPosition { pos, timeout_ms } = s else {
4570            panic!("expected WaitForWalPosition, got {s:?}")
4571        };
4572        assert_eq!(pos, 12345);
4573        assert!(timeout_ms.is_none());
4574    }
4575
4576    #[test]
4577    fn parser_recognises_wait_for_wal_position_with_timeout() {
4578        let s = parse("WAIT FOR WAL POSITION 67890 WITH TIMEOUT 5000");
4579        let Statement::WaitForWalPosition { pos, timeout_ms } = s else {
4580            panic!()
4581        };
4582        assert_eq!(pos, 67890);
4583        assert_eq!(timeout_ms, Some(5000));
4584    }
4585
4586    #[test]
4587    fn parser_rejects_wait_with_negative_position() {
4588        // The lexer treats `-` as a token; `expect_u64_literal`
4589        // only sees the Integer that follows, so the negative
4590        // arrives as a unary-minus expression at higher levels.
4591        // Bare `WAIT FOR WAL POSITION -1` thus surfaces as a
4592        // parse error one way or another.
4593        let err = parse_statement("WAIT FOR WAL POSITION -1").unwrap_err();
4594        assert!(!err.message.is_empty());
4595    }
4596
4597    #[test]
4598    fn parser_recognises_bare_analyze() {
4599        let s = parse("ANALYZE");
4600        assert!(matches!(s, Statement::Analyze(None)));
4601    }
4602
4603    #[test]
4604    fn parser_recognises_analyze_with_table() {
4605        let s = parse("ANALYZE users");
4606        let Statement::Analyze(Some(name)) = s else {
4607            panic!("expected Analyze, got {s:?}")
4608        };
4609        assert_eq!(name, "users");
4610    }
4611
4612    #[test]
4613    fn parser_recognises_analyze_with_quoted_table() {
4614        let s = parse("ANALYZE \"Mixed Case\"");
4615        let Statement::Analyze(Some(name)) = s else {
4616            panic!()
4617        };
4618        assert_eq!(name, "Mixed Case");
4619    }
4620
4621    #[test]
4622    fn parser_rejects_analyze_with_garbage_token() {
4623        let err = parse_statement("ANALYZE 42").expect_err("must error");
4624        assert!(!err.message.is_empty());
4625    }
4626
4627    #[test]
4628    fn analyze_display_roundtrips() {
4629        for sql in ["ANALYZE", "ANALYZE users"] {
4630            let s = parse(sql);
4631            let printed = s.to_string();
4632            let again = parse_statement(&printed)
4633                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
4634            assert_eq!(s, again);
4635        }
4636    }
4637
4638    #[test]
4639    fn wait_for_display_roundtrips() {
4640        for sql in [
4641            "WAIT FOR WAL POSITION 12345",
4642            "WAIT FOR WAL POSITION 67890 WITH TIMEOUT 5000",
4643        ] {
4644            let s = parse(sql);
4645            let printed = s.to_string();
4646            let again = parse_statement(&printed)
4647                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
4648            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
4649        }
4650    }
4651
4652    #[test]
4653    fn subscription_ddl_display_roundtrips() {
4654        for sql in [
4655            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=h port=20002' PUBLICATION pub_a",
4656            "CREATE SUBSCRIPTION sub_b CONNECTION 'host=h' PUBLICATION p1, p2",
4657            "DROP SUBSCRIPTION sub_a",
4658            "SHOW SUBSCRIPTIONS",
4659        ] {
4660            let s = parse(sql);
4661            let printed = s.to_string();
4662            let again = parse_statement(&printed)
4663                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
4664            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
4665        }
4666    }
4667
4668    #[test]
4669    fn parser_drop_dispatches_user_vs_publication() {
4670        // Pre-v6.1.2 DROP USER took the bare-ident path; v6.1.2
4671        // tokenises DROP. Both targets must still parse.
4672        let s = parse("DROP USER 'alice'");
4673        let Statement::DropUser(name) = s else {
4674            panic!("expected DropUser, got {s:?}")
4675        };
4676        assert_eq!(name, "alice");
4677        // And DROP PUBLICATION lands the new variant.
4678        let s = parse("DROP PUBLICATION p1");
4679        assert!(matches!(s, Statement::DropPublication(_)));
4680    }
4681
4682    #[test]
4683    fn publication_ddl_display_roundtrips() {
4684        // Every CREATE PUBLICATION variant must Display → parse →
4685        // same AST. v6.1.3 covers all three scope shapes.
4686        for sql in [
4687            "CREATE PUBLICATION pub_a",
4688            "CREATE PUBLICATION pub_a FOR ALL TABLES",
4689            "CREATE PUBLICATION pub_a FOR TABLE t1, t2",
4690            "CREATE PUBLICATION pub_a FOR ALL TABLES EXCEPT t1",
4691            "DROP PUBLICATION pub_a",
4692            "SHOW PUBLICATIONS",
4693        ] {
4694            let s = parse(sql);
4695            let printed = s.to_string();
4696            let again = parse_statement(&printed)
4697                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
4698            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
4699        }
4700    }
4701}