Skip to main content

spg_sql/
parser.rs

1//! Recursive-descent parser with a Pratt (precedence-climbing) sub-parser for
2//! expressions.
3//!
4//! Precedence (lowest → highest binding):
5//! `OR` (1) `<` `AND` (2) `<` `NOT` unary (3) `<`
6//! comparisons `=` `<>` `<` `<=` `>` `>=` (4) `<`
7//! `+` `-` (5) `<` `*` `/` (6) `<` unary `-` (7) `<` parens / atom.
8//!
9//! This matches PG's behaviour for the operators we support — e.g. `NOT a = b`
10//! parses as `NOT (a = b)` and `-a * b` as `(-a) * b`.
11
12use alloc::boxed::Box;
13use alloc::format;
14use alloc::string::{String, ToString};
15use alloc::vec;
16use alloc::vec::Vec;
17use core::fmt;
18use core::mem;
19
20use crate::ast::{
21    AssignTarget, BinOp, CastTarget, ColumnDef, ColumnName, ColumnTypeName,
22    CreateFunctionStatement, CreateIndexStatement, CreatePublicationStatement,
23    CreateSubscriptionStatement, CreateTableStatement, CreateTriggerStatement, Expr, ExtractField,
24    FkAction, ForeignKeyConstraint, FrameBound, FrameKind, FromClause, FromJoin, FunctionArg,
25    FunctionArgMode, FunctionArgType, FunctionBody, FunctionReturn, IndexMethod, InsertStatement,
26    JoinKind, Literal, NullTreatment, OrderBy, PlPgSqlBlock, PlPgSqlDeclare, PlPgSqlStmt,
27    PublicationScope, RaiseLevel, ReturnTarget, SelectItem, SelectStatement, Statement, TableRef,
28    TriggerEvent, TriggerForEach, TriggerTiming, UnOp, UnionKind, VecEncoding, WindowFrame,
29};
30use crate::lexer::{self, LexError, Token};
31
32/// v7.14.0 — true when the leading keyword of a top-level
33/// statement is one of the dump-emitted DDL forms SPG accepts
34/// as a no-op (no behavioural effect on the single-schema /
35/// single-database model). These statements are consumed up to
36/// the next `;` / EOF and returned as `Statement::Empty`.
37fn is_dump_noise_statement(lc: &str) -> bool {
38    matches!(
39        lc,
40        // Object comments / privileges / ownership — none of
41        // these change schema semantics on SPG.
42        "comment"
43            | "grant"
44            | "revoke"
45            // MySQL bulk-load brackets.
46            | "lock"
47            | "unlock"
48            // MySQL OPTIMIZE / ANALYZE TABLE / CHECK TABLE
49            // diagnostics that pg_dump-style tools also emit
50            // post-restore.
51            | "optimize"
52            | "check"
53            | "use"
54            // PG psql backslash meta-commands that newer
55            // pg_dump versions emit unescaped (\restrict /
56            // \unrestrict). Real psql intercepts these; SPG's
57            // PG-wire sees them as raw text.
58            | "\\restrict"
59            | "\\unrestrict"
60    )
61}
62
63/// v7.9.22 — recognise pgvector / SPG vector-index opclass names
64/// in CREATE INDEX. SPG's HNSW already routes by query operator;
65/// the opclass is accepted for `pg_dump` compatibility (mailrs
66/// migration follow-up G5).
67/// v7.13.0 — extended to recognise PG built-in / pg_trgm opclasses
68/// (mailrs round-5 G5). These are tokens-only acceptance — SPG
69/// doesn't change index behaviour based on them.
70fn is_vector_opclass_name(name: &str) -> bool {
71    let lc = name.to_ascii_lowercase();
72    matches!(
73        lc.as_str(),
74        "vector_cosine_ops"
75            | "vector_l2_ops"
76            | "vector_ip_ops"
77            | "halfvec_cosine_ops"
78            | "halfvec_l2_ops"
79            | "halfvec_ip_ops"
80            | "sq8_cosine_ops"
81            | "sq8_l2_ops"
82            | "sq8_ip_ops"
83            // pg_trgm — trigram operator class. SPG's GIN index
84            // already uses tsvector tokens; trigram-style LIKE
85            // pattern matching still routes through a sequential
86            // scan, but the opclass name is accepted so PG schemas
87            // load.
88            | "gin_trgm_ops"
89            | "gist_trgm_ops"
90            // PG built-in btree opclasses occasionally appear in
91            // pg_dump output for column types with multiple
92            // sort orders (text_pattern_ops, varchar_pattern_ops,
93            // bpchar_pattern_ops).
94            | "text_pattern_ops"
95            | "varchar_pattern_ops"
96            | "bpchar_pattern_ops"
97            | "int4_ops"
98            | "int8_ops"
99            | "text_ops"
100    )
101}
102
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub struct ParseError {
105    pub message: String,
106    /// Index into the token stream where parsing tripped. Not a byte offset.
107    pub token_pos: usize,
108}
109
110impl fmt::Display for ParseError {
111    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
112        write!(
113            f,
114            "parse error at token #{}: {}",
115            self.token_pos, self.message
116        )
117    }
118}
119
120impl From<LexError> for ParseError {
121    fn from(e: LexError) -> Self {
122        Self {
123            message: format!("lex: {e}"),
124            token_pos: 0,
125        }
126    }
127}
128
129/// v7.9.30 — parse a single expression (no trailing junk). Used by
130/// the engine to re-hydrate stored partial-index / unique-index
131/// predicates from their canonical Display form. The same Pratt
132/// parser the statement path uses; this entry point just skips the
133/// statement dispatch.
134pub fn parse_expression(input: &str) -> Result<Expr, ParseError> {
135    let tokens = lexer::tokenize(input)?;
136    let mut p = Parser::new(tokens);
137    let expr = p.parse_expr(0)?;
138    p.expect_eof()?;
139    Ok(expr)
140}
141
142/// Parse exactly one statement, swallow an optional trailing `;`, and require
143/// the token stream to end there.
144pub fn parse_statement(input: &str) -> Result<Statement, ParseError> {
145    let tokens = lexer::tokenize(input)?;
146    let mut p = Parser::new(tokens);
147    let stmt = p.parse_one_statement()?;
148    if matches!(p.peek(), Token::Semicolon) {
149        p.advance();
150    }
151    p.expect_eof()?;
152    Ok(stmt)
153}
154
155struct Parser {
156    tokens: Vec<Token>,
157    pos: usize,
158}
159
160impl Parser {
161    fn new(tokens: Vec<Token>) -> Self {
162        Self { tokens, pos: 0 }
163    }
164
165    fn peek(&self) -> &Token {
166        // tokens always ends with Eof; pos is clamped in advance().
167        &self.tokens[self.pos]
168    }
169
170    fn advance(&mut self) -> Token {
171        let t = mem::replace(&mut self.tokens[self.pos], Token::Eof);
172        if self.pos + 1 < self.tokens.len() {
173            self.pos += 1;
174        }
175        t
176    }
177
178    fn err(&self, message: String) -> ParseError {
179        ParseError {
180            message,
181            token_pos: self.pos,
182        }
183    }
184
185    fn expect_eof(&self) -> Result<(), ParseError> {
186        if matches!(self.peek(), Token::Eof) {
187            Ok(())
188        } else {
189            Err(self.err(format!("expected end of input, got {:?}", self.peek())))
190        }
191    }
192
193    /// v7.14.0 — swallow every token up to (but not including) the
194    /// next semicolon / EOF. Used by the dump-noise dispatcher
195    /// to consume `COMMENT ON …`, `GRANT …`, `LOCK TABLES …`,
196    /// etc. without modeling each grammar.
197    fn consume_until_statement_boundary(&mut self) {
198        loop {
199            match self.peek() {
200                Token::Semicolon | Token::Eof => return,
201                _ => self.advance(),
202            };
203        }
204    }
205
206    fn expect_ident_like(&mut self) -> Result<String, ParseError> {
207        let first = match self.advance() {
208            Token::Ident(s) | Token::QuotedIdent(s) => s,
209            other => {
210                return Err(ParseError {
211                    message: format!("expected identifier, got {other:?}"),
212                    token_pos: self.pos.saturating_sub(1),
213                });
214            }
215        };
216        // v7.14.0 — strip optional `<schema>.` prefix. PG dumps
217        // qualify every name with `public.` (and pg_catalog.* for
218        // functions); SPG is single-schema so we discard the
219        // prefix and return only the trailing ident. Same shape
220        // also handles MySQL `db.tbl` cross-database refs (SPG
221        // ignores the db part).
222        if matches!(self.peek(), Token::Dot) {
223            self.advance();
224            match self.advance() {
225                Token::Ident(s) | Token::QuotedIdent(s) => return Ok(s),
226                other => {
227                    return Err(ParseError {
228                        message: format!(
229                            "expected identifier after '{first}.', got {other:?}"
230                        ),
231                        token_pos: self.pos.saturating_sub(1),
232                    });
233                }
234            }
235        }
236        Ok(first)
237    }
238
239    #[allow(clippy::too_many_lines)]
240    fn parse_one_statement(&mut self) -> Result<Statement, ParseError> {
241        // v7.14.0 — empty / comment-only / semicolon-only input
242        // (after the lexer strips line + block + MySQL
243        // conditional comments) lands as Statement::Empty.
244        // pg_dump and mysqldump emit several wrappers that
245        // collapse to nothing after stripping (`/*!40101 SET …
246        // */;`, blank lines between statements); the engine
247        // returns CommandOk no-op so the dump loads cleanly.
248        if matches!(self.peek(), Token::Eof | Token::Semicolon) {
249            return Ok(Statement::Empty);
250        }
251        // v7.14.0 — pg_dump / mysqldump "noise" statements:
252        // catalog / metadata DDL that has no behavioural effect
253        // on SPG's single-schema, single-database, single-user
254        // model. Consume the whole statement up to the next
255        // semicolon / EOF and return Empty. This is broader than
256        // the per-keyword DROP / SET / COMMENT arms but lets the
257        // long tail of `LOCK TABLES`, `UNLOCK TABLES`, `GRANT`,
258        // `REVOKE`, `ALTER OWNER TO`, `\restrict`, `\unrestrict`,
259        // `BEGIN; COMMIT;` wrappers, etc. all pass through.
260        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek() {
261            let lc = s.to_ascii_lowercase();
262            if is_dump_noise_statement(&lc) {
263                self.consume_until_statement_boundary();
264                return Ok(Statement::Empty);
265            }
266        }
267        match self.peek() {
268            Token::Select => self.parse_select_stmt(),
269            // v7.9.27 — `DO $$ … $$ [LANGUAGE plpgsql]`. PG-only;
270            // SPG has no PL/pgSQL so the body is consumed (lexer
271            // already turned it into a Token::String) and the whole
272            // DO statement returns CommandOk no-op. mailrs H1 +
273            // pg_dump compat.
274            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("do") => {
275                self.advance();
276                // Body — single string token (dollar-quoted or
277                // ordinary).
278                match self.advance() {
279                    Token::String(_) => {}
280                    other => {
281                        return Err(self.err(alloc::format!(
282                            "expected dollar-quoted body after DO, got {other:?}"
283                        )));
284                    }
285                }
286                // Optional `LANGUAGE <name>` trailer (idents only).
287                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("language")) {
288                    self.advance();
289                    let _ = self.expect_ident_like()?;
290                }
291                Ok(Statement::DoBlock)
292            }
293            // v4.11: `WITH name AS (SELECT ...) [, ...] SELECT ...`.
294            // WITH isn't a reserved token in our lexer — comes through
295            // as `Token::Ident("with")` (case-insensitive).
296            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("with") => {
297                self.advance();
298                self.parse_with_cte_then_select()
299            }
300            // v4.26: `EXPLAIN [ANALYZE] <select>`. Comes through as
301            // an identifier — not a reserved keyword.
302            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("explain") => {
303                self.advance();
304                let mut analyze = false;
305                let mut suggest = false;
306                // v6.8.3 — `EXPLAIN (SUGGEST)` opt-in.
307                if matches!(self.peek(), Token::LParen) {
308                    self.advance();
309                    let opt = match self.peek().clone() {
310                        Token::Ident(s) | Token::QuotedIdent(s) => s,
311                        other => {
312                            return Err(self.err(format!(
313                                "expected option keyword inside EXPLAIN (…), got {other:?}"
314                            )));
315                        }
316                    };
317                    if !opt.eq_ignore_ascii_case("suggest") {
318                        return Err(self.err(format!(
319                            "unknown EXPLAIN option {opt:?}; v6.8.3 supports SUGGEST"
320                        )));
321                    }
322                    self.advance();
323                    if !matches!(self.peek(), Token::RParen) {
324                        return Err(self.err(format!(
325                            "expected ')' after EXPLAIN option, got {:?}",
326                            self.peek()
327                        )));
328                    }
329                    self.advance();
330                    suggest = true;
331                } else if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
332                    && (s.eq_ignore_ascii_case("analyze") || s.eq_ignore_ascii_case("analyse"))
333                {
334                    self.advance();
335                    analyze = true;
336                }
337                let inner = self.parse_select_stmt()?;
338                let Statement::Select(s) = inner else {
339                    return Err(self.err(format!("EXPLAIN body must be a SELECT, got {inner:?}")));
340                };
341                Ok(Statement::Explain(crate::ast::ExplainStatement {
342                    analyze,
343                    inner: Box::new(s),
344                    suggest,
345                }))
346            }
347            Token::Create => self.parse_create_stmt(),
348            Token::Insert => self.parse_insert_stmt(),
349            Token::Begin => {
350                self.advance();
351                Ok(Statement::Begin)
352            }
353            Token::Commit => {
354                self.advance();
355                Ok(Statement::Commit)
356            }
357            Token::Rollback => {
358                self.advance();
359                // `ROLLBACK TO [SAVEPOINT] <name>` returns to that
360                // savepoint without ending the transaction. Bare
361                // `ROLLBACK` drops the whole TX.
362                if matches!(self.peek(), Token::To) {
363                    self.advance();
364                    if matches!(self.peek(), Token::Savepoint) {
365                        self.advance();
366                    }
367                    let name = self.expect_ident_like()?;
368                    Ok(Statement::RollbackToSavepoint(name))
369                } else {
370                    Ok(Statement::Rollback)
371                }
372            }
373            Token::Savepoint => {
374                self.advance();
375                let name = self.expect_ident_like()?;
376                Ok(Statement::Savepoint(name))
377            }
378            Token::Release => {
379                self.advance();
380                // `RELEASE [SAVEPOINT] <name>` — the `SAVEPOINT` keyword
381                // is optional in standard SQL.
382                if matches!(self.peek(), Token::Savepoint) {
383                    self.advance();
384                }
385                let name = self.expect_ident_like()?;
386                Ok(Statement::ReleaseSavepoint(name))
387            }
388            Token::Show => {
389                self.advance();
390                // `SHOW TABLES` / `SHOW USERS` / `SHOW COLUMNS FROM <table>`.
391                // v6.1.2 promoted TABLES to a reserved keyword (for
392                // `CREATE PUBLICATION … FOR ALL TABLES`), so it now
393                // arrives as `Token::Tables` rather than a bare ident.
394                // USERS / COLUMNS remain bare idents.
395                let target = match self.advance() {
396                    Token::Tables => "tables".to_string(),
397                    Token::Ident(s) | Token::QuotedIdent(s) => s.to_ascii_lowercase(),
398                    other => {
399                        return Err(self.err(format!(
400                            "expected SHOW target, got {other:?}"
401                        )));
402                    }
403                };
404                match target.as_str() {
405                    "tables" => Ok(Statement::ShowTables),
406                    "users" => Ok(Statement::ShowUsers),
407                    // v6.1.3 — PUBLICATIONS plural is NOT a reserved
408                    // keyword on its own; it lands here as a bare
409                    // ident. Returning all publications + their
410                    // scope summary.
411                    "publications" => Ok(Statement::ShowPublications),
412                    // v6.1.4 — same shape for SUBSCRIPTIONS plural.
413                    "subscriptions" => Ok(Statement::ShowSubscriptions),
414                    "columns" => {
415                        if !matches!(self.peek(), Token::From) {
416                            return Err(self.err(format!(
417                                "expected FROM after SHOW COLUMNS, got {:?}",
418                                self.peek()
419                            )));
420                        }
421                        self.advance();
422                        let table = self.expect_ident_like()?;
423                        Ok(Statement::ShowColumns(table))
424                    }
425                    other => Err(self.err(format!(
426                        "unknown SHOW target {other:?}; supported: TABLES, COLUMNS, USERS, PUBLICATIONS"
427                    ))),
428                }
429            }
430            // v6.1.2: `DROP` is now a reserved keyword (it dispatches
431            // to DROP USER and DROP PUBLICATION today; DROP TABLE /
432            // DROP INDEX are still SHOW-shaped admin ops). Pre-6.1.2
433            // arrived as a bare ident; tokenising it dedicatedly
434            // keeps the dispatch tree small.
435            Token::Drop => {
436                self.advance();
437                match self.peek() {
438                    Token::Publication => {
439                        self.advance();
440                        let name = self.expect_ident_or_string()?;
441                        Ok(Statement::DropPublication(name))
442                    }
443                    Token::Subscription => {
444                        self.advance();
445                        let name = self.expect_ident_or_string()?;
446                        Ok(Statement::DropSubscription(name))
447                    }
448                    Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("user") => {
449                        self.advance();
450                        let name = self.expect_ident_or_string()?;
451                        Ok(Statement::DropUser(name))
452                    }
453                    // v7.12.4 — DROP TRIGGER [IF EXISTS] name ON table.
454                    Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("trigger") => {
455                        self.advance();
456                        let if_exists = self.consume_if_exists();
457                        let name = self.expect_ident_like()?;
458                        // ON <table>
459                        if !matches!(self.peek(), Token::On) {
460                            return Err(self.err(alloc::format!(
461                                "expected ON <table> after DROP TRIGGER {name:?}, got {:?}",
462                                self.peek()
463                            )));
464                        }
465                        self.advance();
466                        let table = self.expect_ident_like()?;
467                        Ok(Statement::DropTrigger {
468                            name,
469                            table,
470                            if_exists,
471                        })
472                    }
473                    // v7.12.4 — DROP FUNCTION [IF EXISTS] name [(args)].
474                    // v7.12.4 ignores any optional arg-list (signature-
475                    // based overload disambiguation lands in v7.12.5+).
476                    Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("function") => {
477                        self.advance();
478                        let if_exists = self.consume_if_exists();
479                        let name = self.expect_ident_like()?;
480                        // Optional `()` — consume + discard.
481                        if matches!(self.peek(), Token::LParen) {
482                            self.advance();
483                            // Skip until matching RParen, accepting any tokens (typed args we don't model yet).
484                            let mut depth = 1usize;
485                            while depth > 0 {
486                                match self.peek() {
487                                    Token::LParen => depth += 1,
488                                    Token::RParen => depth -= 1,
489                                    Token::Eof => {
490                                        return Err(self.err(alloc::format!(
491                                            "unterminated arg list in DROP FUNCTION {name:?}"
492                                        )));
493                                    }
494                                    _ => {}
495                                }
496                                self.advance();
497                            }
498                        }
499                        Ok(Statement::DropFunction { name, if_exists })
500                    }
501                    // v7.14.0 — DROP TABLE [IF EXISTS] name [, name…]
502                    // [CASCADE|RESTRICT]. pg_dump and mysqldump both
503                    // emit DROP TABLE IF EXISTS at the head of every
504                    // CREATE TABLE block so re-importing a dump
505                    // overwrites prior state. SPG accepts and removes
506                    // matching tables; CASCADE/RESTRICT trailers
507                    // accepted silently.
508                    Token::Table => {
509                        self.advance();
510                        let if_exists = self.consume_if_exists();
511                        let mut names: Vec<String> = Vec::new();
512                        loop {
513                            names.push(self.expect_ident_like()?);
514                            if matches!(self.peek(), Token::Comma) {
515                                self.advance();
516                                continue;
517                            }
518                            break;
519                        }
520                        if matches!(
521                            self.peek(),
522                            Token::Ident(s) if s.eq_ignore_ascii_case("cascade")
523                                || s.eq_ignore_ascii_case("restrict")
524                        ) {
525                            self.advance();
526                        }
527                        Ok(Statement::DropTable { names, if_exists })
528                    }
529                    // v7.14.0 — DROP INDEX [IF EXISTS] name
530                    // [CASCADE|RESTRICT]. PG / mysqldump emit this
531                    // for partial-index renames and pgvector
532                    // migrations. SPG removes the matching index;
533                    // IF EXISTS makes the drop idempotent.
534                    Token::Index => {
535                        self.advance();
536                        let if_exists = self.consume_if_exists();
537                        let name = self.expect_ident_like()?;
538                        if matches!(
539                            self.peek(),
540                            Token::Ident(s) if s.eq_ignore_ascii_case("cascade")
541                                || s.eq_ignore_ascii_case("restrict")
542                        ) {
543                            self.advance();
544                        }
545                        Ok(Statement::DropIndex { name, if_exists })
546                    }
547                    // v7.14.0 — DROP SCHEMA [IF EXISTS] name
548                    // [CASCADE|RESTRICT]. SPG is single-database;
549                    // schemas are accepted as no-ops (any name
550                    // resolves to the single catalog).
551                    Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("schema") => {
552                        self.advance();
553                        let _ = self.consume_if_exists();
554                        let _ = self.expect_ident_like()?;
555                        if matches!(
556                            self.peek(),
557                            Token::Ident(s) if s.eq_ignore_ascii_case("cascade")
558                                || s.eq_ignore_ascii_case("restrict")
559                        ) {
560                            self.advance();
561                        }
562                        Ok(Statement::Empty)
563                    }
564                    // v7.14.0 — DROP SEQUENCE [IF EXISTS] name
565                    // [CASCADE|RESTRICT]. SPG has no separate
566                    // sequence object — SERIAL/BIGSERIAL is column-
567                    // local AUTO_INCREMENT — so DROP SEQUENCE
568                    // resolves as a no-op.
569                    Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("sequence") => {
570                        self.advance();
571                        let _ = self.consume_if_exists();
572                        let _ = self.expect_ident_like()?;
573                        if matches!(
574                            self.peek(),
575                            Token::Ident(s) if s.eq_ignore_ascii_case("cascade")
576                                || s.eq_ignore_ascii_case("restrict")
577                        ) {
578                            self.advance();
579                        }
580                        Ok(Statement::Empty)
581                    }
582                    other => Err(self.err(format!(
583                        "expected TABLE / INDEX / SCHEMA / SEQUENCE / USER / PUBLICATION / \
584                         SUBSCRIPTION / TRIGGER / FUNCTION after DROP, got {other:?}"
585                    ))),
586                }
587            }
588            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("update") => {
589                self.advance();
590                self.parse_update_after_keyword()
591            }
592            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("delete") => {
593                self.advance();
594                self.parse_delete_after_keyword()
595            }
596            // v6.0.4: ALTER INDEX <name> REBUILD [WITH (encoding = ...)].
597            // ALTER is not a reserved keyword in the lexer — handled
598            // as a bare ident here.
599            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("alter") => {
600                self.advance();
601                self.parse_alter_after_keyword()
602            }
603            // v6.1.7: WAIT FOR WAL POSITION <pos> [WITH TIMEOUT <ms>].
604            // WAIT / POSITION / TIMEOUT are bare idents — no lexer
605            // additions needed.
606            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("wait") => {
607                self.advance();
608                self.parse_wait_after_keyword()
609            }
610            // v6.2.0: ANALYZE [<table>]. ANALYZE is a bare ident.
611            // Bare ANALYZE → analyse every user table; ANALYZE
612            // <name> → re-stats one. The argument is an optional
613            // ident (or quoted ident); anything else is a parse
614            // error.
615            // v6.7.3 — `COMPACT COLD SEGMENTS`. No arguments, no
616            // `WHERE` filter (carved out per V6_7_DESIGN.md
617            // STABILITY). Lex order: identifier "compact" → "cold"
618            // → "segments". Anything else after `COMPACT` is a
619            // parse error.
620            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("compact") => {
621                self.advance();
622                let next = self.peek().clone();
623                let cold = match next {
624                    Token::Ident(s) | Token::QuotedIdent(s) => s,
625                    _ => {
626                        return Err(
627                            self.err(format!("expected COLD after COMPACT, got {:?}", self.peek()))
628                        );
629                    }
630                };
631                if !cold.eq_ignore_ascii_case("cold") {
632                    return Err(self.err(format!("expected COLD after COMPACT, got {cold:?}")));
633                }
634                self.advance();
635                let next = self.peek().clone();
636                let segments = match next {
637                    Token::Ident(s) | Token::QuotedIdent(s) => s,
638                    _ => {
639                        return Err(self.err(format!(
640                            "expected SEGMENTS after COMPACT COLD, got {:?}",
641                            self.peek()
642                        )));
643                    }
644                };
645                if !segments.eq_ignore_ascii_case("segments") {
646                    return Err(self.err(format!(
647                        "expected SEGMENTS after COMPACT COLD, got {segments:?}"
648                    )));
649                }
650                self.advance();
651                Ok(Statement::CompactColdSegments)
652            }
653            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("analyze") => {
654                self.advance();
655                let target = match self.peek() {
656                    Token::Eof | Token::Semicolon => None,
657                    Token::Ident(_) | Token::QuotedIdent(_) => {
658                        Some(self.expect_ident_like()?)
659                    }
660                    other => {
661                        return Err(self.err(format!(
662                            "expected table name or end of statement after ANALYZE, got {other:?}"
663                        )));
664                    }
665                };
666                Ok(Statement::Analyze(target))
667            }
668            // v7.12.1 — `SET <name> [TO|=] <value>`. The
669            // `default_text_search_config` parameter is consumed
670            // by the FTS function dispatcher; other parameter
671            // names are recorded but treated as a no-op so PG
672            // dump output loads.
673            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("set") => {
674                self.advance();
675                // PG allows `SET LOCAL` / `SET SESSION` qualifiers
676                // — accept and ignore. MySQL adds `SET GLOBAL` too
677                // (and the alias `SET @@global.name = …` which the
678                // SessionVar path handles).
679                if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("local") || s.eq_ignore_ascii_case("session") || s.eq_ignore_ascii_case("global"))
680                {
681                    self.advance();
682                }
683                // v7.14.0 — MySQL `SET NAMES <charset> [COLLATE
684                // <collation>]` — change the connection client
685                // charset. SPG stores UTF-8 always and orders
686                // bytewise; accept as a no-op.
687                if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("names"))
688                {
689                    self.advance();
690                    // Charset ident-or-string.
691                    if matches!(
692                        self.peek(),
693                        Token::Ident(_) | Token::QuotedIdent(_) | Token::String(_)
694                    ) {
695                        self.advance();
696                    }
697                    // Optional `COLLATE <name>`.
698                    if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("collate"))
699                    {
700                        self.advance();
701                        if matches!(
702                            self.peek(),
703                            Token::Ident(_) | Token::QuotedIdent(_) | Token::String(_)
704                        ) {
705                            self.advance();
706                        }
707                    }
708                    return Ok(Statement::Empty);
709                }
710                // v7.14.0 — MySQL `SET CHARACTER SET <charset>`
711                // alias — same accept-as-no-op as SET NAMES.
712                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("character"))
713                    && matches!(self.tokens.get(self.pos + 1), Some(Token::Ident(s)) if s.eq_ignore_ascii_case("set"))
714                {
715                    self.advance(); // CHARACTER
716                    self.advance(); // SET
717                    if matches!(
718                        self.peek(),
719                        Token::Ident(_) | Token::QuotedIdent(_) | Token::String(_)
720                    ) {
721                        self.advance();
722                    }
723                    return Ok(Statement::Empty);
724                }
725                // v7.14.0 — multi-assignment form
726                // `SET a = 1, b = 2, …`. Single-assignment is the
727                // 1-element case. Each LHS may be a regular ident
728                // or a SessionVar (`@VAR` / `@@VAR`).
729                let mut pairs: Vec<(String, crate::ast::SetValue)> = Vec::new();
730                loop {
731                    let lhs = match self.peek().clone() {
732                        Token::SessionVar(s) => {
733                            self.advance();
734                            s
735                        }
736                        Token::Ident(_) | Token::QuotedIdent(_) => self.parse_set_param_name()?,
737                        other => {
738                            return Err(self.err(format!(
739                                "expected parameter name after SET, got {other:?}"
740                            )));
741                        }
742                    };
743                    // Accept either `=` or the bare `TO` keyword.
744                    match self.peek() {
745                        Token::Eq => {
746                            self.advance();
747                        }
748                        Token::To => {
749                            self.advance();
750                        }
751                        other => {
752                            return Err(self.err(format!(
753                                "expected `=` or TO after SET {lhs}, got {other:?}"
754                            )));
755                        }
756                    }
757                    let value = self.parse_set_value()?;
758                    pairs.push((lhs, value));
759                    if matches!(self.peek(), Token::Comma) {
760                        self.advance();
761                        continue;
762                    }
763                    break;
764                }
765                if pairs.len() == 1 {
766                    let (name, value) = pairs.into_iter().next().unwrap();
767                    Ok(Statement::SetParameter { name, value })
768                } else {
769                    Ok(Statement::SetParameterList(pairs))
770                }
771            }
772            // v7.12.1 — `RESET <name>` / `RESET ALL`.
773            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("reset") => {
774                self.advance();
775                match self.peek().clone() {
776                    Token::All => {
777                        self.advance();
778                        Ok(Statement::ResetParameter(None))
779                    }
780                    Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("all") => {
781                        self.advance();
782                        Ok(Statement::ResetParameter(None))
783                    }
784                    _ => {
785                        let name = self.parse_set_param_name()?;
786                        Ok(Statement::ResetParameter(Some(name)))
787                    }
788                }
789            }
790            other => Err(self.err(format!(
791                "expected SELECT / CREATE / DROP / INSERT / UPDATE / DELETE / ALTER / BEGIN / COMMIT / \
792                 ROLLBACK / SAVEPOINT / RELEASE / SHOW at start of statement, got {other:?}"
793            ))),
794        }
795    }
796
797    fn parse_create_stmt(&mut self) -> Result<Statement, ParseError> {
798        debug_assert!(matches!(self.peek(), Token::Create));
799        self.advance();
800        match self.peek() {
801            Token::Table => self.parse_create_table_stmt_after_create(),
802            Token::Index => self.parse_create_index_stmt_after_create(false),
803            // v7.9.29 — `CREATE UNIQUE INDEX … [WHERE pred]`.
804            // The `UNIQUE` modifier turns a partial index into a
805            // partial-uniqueness invariant (only rows matching the
806            // WHERE predicate are checked for duplicates). mailrs
807            // K1 (3 hits: email_templates default, calendar_events
808            // master, calendar_events instance).
809            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("unique") => {
810                self.advance();
811                if !matches!(self.peek(), Token::Index) {
812                    return Err(self.err(alloc::format!(
813                        "expected INDEX after CREATE UNIQUE, got {:?}",
814                        self.peek()
815                    )));
816                }
817                self.parse_create_index_stmt_after_create(true)
818            }
819            Token::Publication => {
820                self.advance();
821                self.parse_create_publication_after_keyword()
822            }
823            Token::Subscription => {
824                self.advance();
825                self.parse_create_subscription_after_keyword()
826            }
827            // v4.1: CREATE USER 'name' WITH PASSWORD 'pw' [ROLE 'role'].
828            // USER isn't a reserved keyword — we look for the bare
829            // identifier so the lexer doesn't have to grow a token.
830            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("user") => {
831                self.advance();
832                self.parse_create_user_after_keyword()
833            }
834            // v7.9.15 — `CREATE EXTENSION [IF NOT EXISTS] <name>
835            // [WITH SCHEMA …] [VERSION '…'] [CASCADE]` as a
836            // no-op. mailrs follow-up F3.
837            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("extension") => {
838                self.advance();
839                self.parse_create_extension_after_keyword()
840            }
841            // v7.12.4 — `CREATE [OR REPLACE] FUNCTION …` and
842            // `CREATE [OR REPLACE] TRIGGER …`. `OR REPLACE` is
843            // optional; absorb it here and forward to the
844            // per-kind parsers with the flag. OR is a reserved
845            // keyword token.
846            Token::Or => {
847                self.advance();
848                let next = self.peek();
849                let (Token::Ident(s2) | Token::QuotedIdent(s2)) = next else {
850                    return Err(self.err(alloc::format!(
851                        "expected REPLACE after CREATE OR, got {next:?}"
852                    )));
853                };
854                if !s2.eq_ignore_ascii_case("replace") {
855                    return Err(self.err(alloc::format!(
856                        "expected REPLACE after CREATE OR, got {s2:?}"
857                    )));
858                }
859                self.advance();
860                self.parse_create_function_or_trigger_after_or_replace(true)
861            }
862            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("function") => {
863                self.advance();
864                self.parse_create_function_after_keyword(false)
865            }
866            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("trigger") => {
867                self.advance();
868                self.parse_create_trigger_after_keyword(false)
869            }
870            // v7.14.0 — pg_dump / mysqldump emit
871            // `CREATE SEQUENCE / SCHEMA / VIEW / MATERIALIZED VIEW
872            // / TYPE / DOMAIN / DATABASE / ROLE / POLICY / OPERATOR`.
873            // SPG is single-schema / single-database; these have
874            // no behavioural effect, so consume + return Empty.
875            Token::Ident(s) | Token::QuotedIdent(s)
876                if matches!(
877                    s.to_ascii_lowercase().as_str(),
878                    "sequence"
879                        | "schema"
880                        | "view"
881                        | "materialized"
882                        | "type"
883                        | "domain"
884                        | "database"
885                        | "role"
886                        | "policy"
887                        | "operator"
888                        | "cast"
889                        | "rule"
890                        | "aggregate"
891                        | "language"
892                        | "collation"
893                        | "conversion"
894                ) =>
895            {
896                self.consume_until_statement_boundary();
897                return Ok(Statement::Empty);
898            }
899            other => Err(self.err(format!(
900                "expected TABLE / INDEX / USER / EXTENSION / PUBLICATION / SUBSCRIPTION / FUNCTION / TRIGGER / SEQUENCE / SCHEMA / VIEW / TYPE / DOMAIN [OR REPLACE …] after CREATE, got {other:?}"
901            ))),
902        }
903    }
904
905    /// v7.12.4 — `CREATE OR REPLACE` already consumed; the next
906    /// keyword decides whether we parse a function or trigger
907    /// body. PG accepts other `OR REPLACE`-able objects (VIEW,
908    /// PROCEDURE) — those land in later releases.
909    fn parse_create_function_or_trigger_after_or_replace(
910        &mut self,
911        or_replace: bool,
912    ) -> Result<Statement, ParseError> {
913        let tok = self.peek();
914        let (Token::Ident(s) | Token::QuotedIdent(s)) = tok else {
915            return Err(self.err(alloc::format!(
916                "expected FUNCTION / TRIGGER after CREATE OR REPLACE, got {tok:?}"
917            )));
918        };
919        if s.eq_ignore_ascii_case("function") {
920            self.advance();
921            self.parse_create_function_after_keyword(or_replace)
922        } else if s.eq_ignore_ascii_case("trigger") {
923            self.advance();
924            self.parse_create_trigger_after_keyword(or_replace)
925        } else {
926            Err(self.err(alloc::format!(
927                "expected FUNCTION / TRIGGER after CREATE OR REPLACE, got {s:?}"
928            )))
929        }
930    }
931
932    /// v7.9.15 — accept and discard `CREATE EXTENSION` DDL.
933    /// SPG doesn't have a registry; pgvector / similar are
934    /// either builtin (VECTOR(N) ↔ pgvector) or n/a. Parsing
935    /// the syntax lets dual-target schemas keep the line.
936    fn parse_create_extension_after_keyword(&mut self) -> Result<Statement, ParseError> {
937        // Optional `IF NOT EXISTS`.
938        self.consume_if_not_exists();
939        let name = self.expect_ident_like()?;
940        // Drain optional WITH SCHEMA <ident> / VERSION '<v>' /
941        // CASCADE / FROM '<v>' clauses; we don't model them.
942        loop {
943            match self.peek() {
944                Token::Ident(s) if s.eq_ignore_ascii_case("with") => {
945                    self.advance();
946                    continue;
947                }
948                Token::Ident(s) if s.eq_ignore_ascii_case("schema") => {
949                    self.advance();
950                    let _ = self.expect_ident_like()?;
951                    continue;
952                }
953                Token::Ident(s) if s.eq_ignore_ascii_case("version") => {
954                    self.advance();
955                    // String or ident literal.
956                    let _ = self.advance();
957                    continue;
958                }
959                Token::Ident(s) if s.eq_ignore_ascii_case("from") => {
960                    self.advance();
961                    let _ = self.advance();
962                    continue;
963                }
964                Token::Ident(s) if s.eq_ignore_ascii_case("cascade") => {
965                    self.advance();
966                    continue;
967                }
968                _ => break,
969            }
970        }
971        Ok(Statement::CreateExtension(name))
972    }
973
974    /// v7.12.4 — body of `CREATE [OR REPLACE] FUNCTION`. The
975    /// `[OR REPLACE]` flag (and the `FUNCTION` keyword) have
976    /// already been consumed by the caller. Grammar accepted:
977    ///
978    ///   name `(` arg-list `)`
979    ///   `RETURNS` return-type
980    ///   [ `LANGUAGE` ident ]
981    ///   `AS` $$ body $$
982    ///   [ `LANGUAGE` ident ]
983    ///
984    /// Either `LANGUAGE` position is allowed; PG accepts both.
985    fn parse_create_function_after_keyword(
986        &mut self,
987        or_replace: bool,
988    ) -> Result<Statement, ParseError> {
989        let name = self.expect_ident_like()?;
990        // Argument list. v7.12.4 commonly sees the empty `()`
991        // (trigger functions); typed args parse and round-trip
992        // but the executor only invokes nullary functions.
993        if !matches!(self.peek(), Token::LParen) {
994            return Err(self.err(alloc::format!(
995                "expected '(' after function name {name:?}, got {:?}",
996                self.peek()
997            )));
998        }
999        self.advance();
1000        let args = self.parse_function_arg_list()?;
1001        // RETURNS clause.
1002        let tok = self.peek();
1003        let (Token::Ident(s) | Token::QuotedIdent(s)) = tok else {
1004            return Err(self.err(alloc::format!(
1005                "expected RETURNS after function arg list, got {tok:?}"
1006            )));
1007        };
1008        if !s.eq_ignore_ascii_case("returns") {
1009            return Err(self.err(alloc::format!(
1010                "expected RETURNS after function arg list, got {s:?}"
1011            )));
1012        }
1013        self.advance();
1014        let returns = self.parse_function_return()?;
1015        // Optional LANGUAGE clause (PG also accepts after AS — we'll
1016        // re-check after the body too).
1017        let mut language: Option<String> = self.parse_optional_language()?;
1018        // `AS` followed by a $$-quoted body (lexer already
1019        // collapses both `$$…$$` and `$tag$…$tag$` to a single
1020        // Token::String). AS is a reserved keyword (Token::As).
1021        if !matches!(self.peek(), Token::As) {
1022            return Err(self.err(alloc::format!(
1023                "expected AS before function body, got {:?}",
1024                self.peek()
1025            )));
1026        }
1027        self.advance();
1028        let body_text = match self.peek() {
1029            Token::String(s) => {
1030                let body = s.clone();
1031                self.advance();
1032                body
1033            }
1034            other => {
1035                return Err(self.err(alloc::format!(
1036                    "expected $$-quoted function body after AS, got {other:?}"
1037                )));
1038            }
1039        };
1040        // Trailing optional LANGUAGE clause (the other PG position).
1041        if language.is_none() {
1042            language = self.parse_optional_language()?;
1043        }
1044        let language = language.unwrap_or_else(|| String::from("sql"));
1045        // PL/pgSQL bodies get structure-parsed. Other languages
1046        // (or PL/pgSQL bodies the v7.12.4 parser doesn't yet
1047        // recognise) round-trip as Raw text — the executor errors
1048        // when invoked with a clear unsupported message.
1049        let body = if language.eq_ignore_ascii_case("plpgsql") {
1050            match parse_plpgsql_body(&body_text) {
1051                Ok(block) => FunctionBody::PlPgSql(block),
1052                // Best-effort: if the body parser doesn't yet
1053                // support a construct used inside, fall back to
1054                // raw — keeps `CREATE FUNCTION` itself working
1055                // (catalogue accepts), executor errors on
1056                // invocation only.
1057                Err(_) => FunctionBody::Raw(body_text),
1058            }
1059        } else {
1060            FunctionBody::Raw(body_text)
1061        };
1062        Ok(Statement::CreateFunction(CreateFunctionStatement {
1063            name,
1064            or_replace,
1065            args,
1066            returns,
1067            language,
1068            body,
1069        }))
1070    }
1071
1072    /// Closing `)`-terminated argument list. v7.12.4 commonly
1073    /// sees the empty `()`; typed args round-trip but the
1074    /// executor (yet) doesn't invoke them.
1075    fn parse_function_arg_list(&mut self) -> Result<Vec<FunctionArg>, ParseError> {
1076        let mut args: Vec<FunctionArg> = Vec::new();
1077        if matches!(self.peek(), Token::RParen) {
1078            self.advance();
1079            return Ok(args);
1080        }
1081        loop {
1082            // Optional `IN` / `OUT` / `INOUT` mode keyword. IN is
1083            // a reserved token; OUT / INOUT are bare idents.
1084            let mode = if matches!(self.peek(), Token::In) {
1085                self.advance();
1086                FunctionArgMode::In
1087            } else if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("out"))
1088            {
1089                self.advance();
1090                FunctionArgMode::Out
1091            } else if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("inout"))
1092            {
1093                self.advance();
1094                FunctionArgMode::InOut
1095            } else {
1096                FunctionArgMode::In
1097            };
1098            // Optional name. The next token is either a name
1099            // (followed by a type ident) or the type itself.
1100            // Disambiguate by peeking ahead: if the token after
1101            // the next ident is also an ident, we treat the
1102            // first as the name.
1103            let (name, ty_token) = {
1104                let first = self.expect_ident_like()?;
1105                // Peek next: if it's an ident (i.e. a type
1106                // name) the `first` was the arg name.
1107                match self.peek() {
1108                    Token::Ident(_) | Token::QuotedIdent(_) => {
1109                        let ty = self.expect_ident_like()?;
1110                        (Some(first), ty)
1111                    }
1112                    _ => (None, first),
1113                }
1114            };
1115            // Type — try to map to ColumnTypeName, else Raw.
1116            let ty = match map_type_ident_to_column_type_name(&ty_token) {
1117                Some(t) => FunctionArgType::Typed(t),
1118                None => FunctionArgType::Raw(ty_token),
1119            };
1120            args.push(FunctionArg { mode, name, ty });
1121            match self.peek() {
1122                Token::Comma => {
1123                    self.advance();
1124                    continue;
1125                }
1126                Token::RParen => {
1127                    self.advance();
1128                    return Ok(args);
1129                }
1130                other => {
1131                    return Err(self.err(alloc::format!(
1132                        "expected , or ) in function arg list, got {other:?}"
1133                    )));
1134                }
1135            }
1136        }
1137    }
1138
1139    fn parse_function_return(&mut self) -> Result<FunctionReturn, ParseError> {
1140        let ident = self.expect_ident_like()?;
1141        if ident.eq_ignore_ascii_case("trigger") {
1142            return Ok(FunctionReturn::Trigger);
1143        }
1144        if ident.eq_ignore_ascii_case("void") {
1145            return Ok(FunctionReturn::Void);
1146        }
1147        match map_type_ident_to_column_type_name(&ident) {
1148            Some(t) => Ok(FunctionReturn::Type(t)),
1149            None => Ok(FunctionReturn::Other(ident)),
1150        }
1151    }
1152
1153    fn parse_optional_language(&mut self) -> Result<Option<String>, ParseError> {
1154        match self.peek() {
1155            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("language") => {
1156                self.advance();
1157                let lang = self.expect_ident_like()?;
1158                Ok(Some(lang.to_ascii_lowercase()))
1159            }
1160            _ => Ok(None),
1161        }
1162    }
1163
1164    /// v7.12.4 — body of `CREATE [OR REPLACE] TRIGGER`. The
1165    /// `[OR REPLACE]` flag and the `TRIGGER` keyword have already
1166    /// been consumed.
1167    fn parse_create_trigger_after_keyword(
1168        &mut self,
1169        or_replace: bool,
1170    ) -> Result<Statement, ParseError> {
1171        let name = self.expect_ident_like()?;
1172        let timing = {
1173            let ident = self.expect_ident_like()?;
1174            if ident.eq_ignore_ascii_case("before") {
1175                TriggerTiming::Before
1176            } else if ident.eq_ignore_ascii_case("after") {
1177                TriggerTiming::After
1178            } else if ident.eq_ignore_ascii_case("instead") {
1179                let next = self.expect_ident_like()?;
1180                if !next.eq_ignore_ascii_case("of") {
1181                    return Err(self.err(alloc::format!(
1182                        "expected OF after INSTEAD in trigger timing, got {next:?}"
1183                    )));
1184                }
1185                TriggerTiming::InsteadOf
1186            } else {
1187                return Err(self.err(alloc::format!(
1188                    "expected BEFORE / AFTER / INSTEAD OF in trigger timing, got {ident:?}"
1189                )));
1190            }
1191        };
1192        // Events: INSERT [ OR UPDATE [ OR DELETE [ OR TRUNCATE ] ] ].
1193        // OR is a reserved keyword token (Token::Or), not an Ident.
1194        // v7.13.0 — after an UPDATE event we may optionally see
1195        // `OF col, col, …` (mailrs round-5 G7). Columns are
1196        // captured into `update_columns` once across the whole
1197        // events list; multiple `UPDATE OF` clauses are rejected.
1198        let mut events: Vec<TriggerEvent> = Vec::new();
1199        let mut update_columns: Vec<String> = Vec::new();
1200        let (first_ev, first_cols) = self.parse_trigger_event_with_optional_of()?;
1201        events.push(first_ev);
1202        if !first_cols.is_empty() {
1203            update_columns = first_cols;
1204        }
1205        while matches!(self.peek(), Token::Or) {
1206            self.advance();
1207            let (ev, cols) = self.parse_trigger_event_with_optional_of()?;
1208            events.push(ev);
1209            if !cols.is_empty() {
1210                if !update_columns.is_empty() {
1211                    return Err(self.err(
1212                        "CREATE TRIGGER: `UPDATE OF cols` may appear at most once".into(),
1213                    ));
1214                }
1215                update_columns = cols;
1216            }
1217        }
1218        // ON <table>
1219        let tok = self.peek();
1220        let Token::On = tok else {
1221            return Err(self.err(alloc::format!(
1222                "expected ON after trigger events, got {tok:?}"
1223            )));
1224        };
1225        self.advance();
1226        let table = self.expect_ident_like()?;
1227        // FOR EACH ROW / FOR EACH STATEMENT. FOR is a reserved
1228        // keyword (Token::For); EACH / ROW / STATEMENT are bare
1229        // idents.
1230        if !matches!(self.peek(), Token::For) {
1231            return Err(self.err(alloc::format!(
1232                "expected FOR EACH ROW / STATEMENT, got {:?}",
1233                self.peek()
1234            )));
1235        }
1236        self.advance();
1237        let for_each = {
1238            let e = self.expect_ident_like()?;
1239            if !e.eq_ignore_ascii_case("each") {
1240                return Err(self.err(alloc::format!("expected EACH after FOR, got {e:?}")));
1241            }
1242            let unit = self.expect_ident_like()?;
1243            if unit.eq_ignore_ascii_case("row") {
1244                TriggerForEach::Row
1245            } else if unit.eq_ignore_ascii_case("statement") {
1246                TriggerForEach::Statement
1247            } else {
1248                return Err(self.err(alloc::format!(
1249                    "expected ROW / STATEMENT after FOR EACH, got {unit:?}"
1250                )));
1251            }
1252        };
1253        // EXECUTE FUNCTION/PROCEDURE name(...)
1254        let exec = self.expect_ident_like()?;
1255        if !exec.eq_ignore_ascii_case("execute") {
1256            return Err(self.err(alloc::format!(
1257                "expected EXECUTE FUNCTION/PROCEDURE in CREATE TRIGGER, got {exec:?}"
1258            )));
1259        }
1260        let fn_or_proc = self.expect_ident_like()?;
1261        if !(fn_or_proc.eq_ignore_ascii_case("function")
1262            || fn_or_proc.eq_ignore_ascii_case("procedure"))
1263        {
1264            return Err(self.err(alloc::format!(
1265                "expected FUNCTION / PROCEDURE after EXECUTE, got {fn_or_proc:?}"
1266            )));
1267        }
1268        let function = self.expect_ident_like()?;
1269        // Optional empty arg list `()`.
1270        if matches!(self.peek(), Token::LParen) {
1271            self.advance();
1272            if !matches!(self.peek(), Token::RParen) {
1273                return Err(self.err(alloc::format!(
1274                    "v7.12.4 trigger function calls take no args; got {:?}",
1275                    self.peek()
1276                )));
1277            }
1278            self.advance();
1279        }
1280        Ok(Statement::CreateTrigger(CreateTriggerStatement {
1281            name,
1282            or_replace,
1283            timing,
1284            events,
1285            table,
1286            for_each,
1287            function,
1288            update_columns,
1289        }))
1290    }
1291
1292    /// v7.13.0 — parse one trigger event, then optionally consume
1293    /// `OF col, col, …` after `UPDATE` (mailrs round-5 G7). Other
1294    /// events (INSERT/DELETE/TRUNCATE) don't accept the OF tail.
1295    fn parse_trigger_event_with_optional_of(
1296        &mut self,
1297    ) -> Result<(TriggerEvent, Vec<String>), ParseError> {
1298        let ev = self.parse_trigger_event()?;
1299        if !matches!(ev, TriggerEvent::Update) {
1300            return Ok((ev, Vec::new()));
1301        }
1302        // `OF` is a bare ident.
1303        if !matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("of")) {
1304            return Ok((ev, Vec::new()));
1305        }
1306        self.advance(); // OF
1307        let mut cols: Vec<String> = Vec::new();
1308        loop {
1309            cols.push(self.expect_ident_like()?);
1310            if matches!(self.peek(), Token::Comma) {
1311                self.advance();
1312                continue;
1313            }
1314            break;
1315        }
1316        if cols.is_empty() {
1317            return Err(self.err(
1318                "CREATE TRIGGER: `UPDATE OF` requires at least one column name".into(),
1319            ));
1320        }
1321        Ok((ev, cols))
1322    }
1323
1324    /// v7.12.4 — `BEGIN stmt; stmt; … END[;]` PL/pgSQL block.
1325    /// v7.12.6 — optional `DECLARE var TYPE [:= init];` prelude
1326    /// before `BEGIN`, and IF / RAISE / embedded SQL statements
1327    /// inside the body.
1328    /// Called by [`parse_plpgsql_body`] after the body's tokens
1329    /// have been lexed into this temporary parser.
1330    pub(crate) fn parse_plpgsql_block(&mut self) -> Result<PlPgSqlBlock, ParseError> {
1331        // v7.12.6 — optional DECLARE prelude.
1332        let declarations = if matches!(
1333            self.peek(),
1334            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("declare")
1335        ) {
1336            self.advance();
1337            self.parse_plpgsql_declare_block()?
1338        } else {
1339            Vec::new()
1340        };
1341        // BEGIN keyword (PL/pgSQL — distinct from the SQL
1342        // `BEGIN` transaction-start, but we can reuse the
1343        // reserved Token::Begin since the body is a separate
1344        // lex/parse context).
1345        if !matches!(self.peek(), Token::Begin) {
1346            return Err(self.err(alloc::format!(
1347                "expected BEGIN at start of plpgsql block, got {:?}",
1348                self.peek()
1349            )));
1350        }
1351        self.advance();
1352        let statements = self.parse_plpgsql_stmt_list_until_end()?;
1353        Ok(PlPgSqlBlock {
1354            declarations,
1355            statements,
1356        })
1357    }
1358
1359    /// v7.12.6 — parse the `DECLARE ... [var TYPE [:= init];]+`
1360    /// prelude. Caller has already consumed `DECLARE`. We stop
1361    /// reading entries when we hit `BEGIN`.
1362    fn parse_plpgsql_declare_block(&mut self) -> Result<Vec<PlPgSqlDeclare>, ParseError> {
1363        let mut out: Vec<PlPgSqlDeclare> = Vec::new();
1364        loop {
1365            if matches!(self.peek(), Token::Begin) {
1366                return Ok(out);
1367            }
1368            let name = self.expect_ident_like()?;
1369            let ty_token = self.expect_ident_like()?;
1370            let ty = match map_type_ident_to_column_type_name(&ty_token) {
1371                Some(t) => FunctionArgType::Typed(t),
1372                None => FunctionArgType::Raw(ty_token),
1373            };
1374            let default = match self.peek() {
1375                Token::ColonEq => {
1376                    self.advance();
1377                    Some(self.parse_expr(0)?)
1378                }
1379                Token::Eq => {
1380                    // PL/pgSQL also accepts `=` for the
1381                    // DECLARE default (PG treats them the same
1382                    // in this position).
1383                    self.advance();
1384                    Some(self.parse_expr(0)?)
1385                }
1386                _ => None,
1387            };
1388            // Mandatory `;` between declarations.
1389            if !matches!(self.peek(), Token::Semicolon) {
1390                return Err(self.err(alloc::format!(
1391                    "expected ; after DECLARE entry for {name:?}, got {:?}",
1392                    self.peek()
1393                )));
1394            }
1395            self.advance();
1396            out.push(PlPgSqlDeclare { name, ty, default });
1397        }
1398    }
1399
1400    /// v7.12.6 — parse PL/pgSQL statements up to (and consuming)
1401    /// the terminating `END;` (or `END IF;` etc — handled by the
1402    /// per-construct sub-parsers). Used by both the outer block
1403    /// and the IF/ELSE branch bodies.
1404    fn parse_plpgsql_stmt_list_until_end(&mut self) -> Result<Vec<PlPgSqlStmt>, ParseError> {
1405        let mut statements: Vec<PlPgSqlStmt> = Vec::new();
1406        loop {
1407            // Allow trailing semicolons + END.
1408            while matches!(self.peek(), Token::Semicolon) {
1409                self.advance();
1410            }
1411            // END / ELSE / ELSIF — handled by the caller.
1412            if matches!(
1413                self.peek(),
1414                Token::Ident(s) | Token::QuotedIdent(s)
1415                    if s.eq_ignore_ascii_case("end")
1416                        || s.eq_ignore_ascii_case("else")
1417                        || s.eq_ignore_ascii_case("elsif")
1418                        || s.eq_ignore_ascii_case("elseif")
1419            ) {
1420                return Ok(statements);
1421            }
1422            // Otherwise: one statement, then expect `;` or
1423            // a block-terminator keyword.
1424            let stmt = self.parse_plpgsql_stmt()?;
1425            statements.push(stmt);
1426            match self.peek() {
1427                Token::Semicolon => {
1428                    self.advance();
1429                }
1430                Token::Ident(s) | Token::QuotedIdent(s)
1431                    if s.eq_ignore_ascii_case("end")
1432                        || s.eq_ignore_ascii_case("else")
1433                        || s.eq_ignore_ascii_case("elsif")
1434                        || s.eq_ignore_ascii_case("elseif") =>
1435                {
1436                    // Final statement of the block without `;`.
1437                }
1438                other => {
1439                    return Err(self.err(alloc::format!(
1440                        "expected ; or END/ELSE/ELSIF after plpgsql statement, got {other:?}"
1441                    )));
1442                }
1443            }
1444        }
1445    }
1446
1447    fn parse_plpgsql_stmt(&mut self) -> Result<PlPgSqlStmt, ParseError> {
1448        // RETURN keyword?
1449        if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("return"))
1450        {
1451            self.advance();
1452            return self.parse_plpgsql_return();
1453        }
1454        // v7.12.6 — IF block.
1455        if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("if"))
1456        {
1457            self.advance();
1458            return self.parse_plpgsql_if();
1459        }
1460        // v7.12.6 — RAISE.
1461        if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("raise"))
1462        {
1463            self.advance();
1464            return self.parse_plpgsql_raise();
1465        }
1466        // v7.12.6 — embedded SQL statements. INSERT/UPDATE/DELETE/
1467        // SELECT can appear directly inside a trigger body; we
1468        // recurse into the regular Statement parser, which will
1469        // stop at the trailing `;` (which our caller then
1470        // consumes).
1471        if matches!(self.peek(), Token::Insert)
1472            || matches!(self.peek(), Token::Select)
1473            || matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("update") || s.eq_ignore_ascii_case("delete"))
1474        {
1475            let stmt = self.parse_one_statement()?;
1476            return Ok(PlPgSqlStmt::EmbeddedSql(Box::new(stmt)));
1477        }
1478        // Otherwise: assignment. `NEW.col` / `OLD.col` / `var`
1479        // followed by `:=` and an expression.
1480        let target = self.parse_plpgsql_assign_target()?;
1481        // PL/pgSQL assignment uses `:=`. The lexer represents
1482        // this as a colon followed by `=`; check both shapes.
1483        match self.peek() {
1484            Token::ColonEq => {
1485                self.advance();
1486            }
1487            Token::Colon => {
1488                self.advance();
1489                if !matches!(self.peek(), Token::Eq) {
1490                    return Err(self.err(alloc::format!(
1491                        "expected := after plpgsql assign target, got `:` then {:?}",
1492                        self.peek()
1493                    )));
1494                }
1495                self.advance();
1496            }
1497            other => {
1498                return Err(self.err(alloc::format!(
1499                    "expected := after plpgsql assign target, got {other:?}"
1500                )));
1501            }
1502        }
1503        let value = self.parse_expr(0)?;
1504        Ok(PlPgSqlStmt::Assign { target, value })
1505    }
1506
1507    /// v7.12.6 — `IF cond THEN body [ELSIF cond THEN body]*
1508    /// [ELSE body] END IF`. `IF` keyword already consumed.
1509    fn parse_plpgsql_if(&mut self) -> Result<PlPgSqlStmt, ParseError> {
1510        let mut branches: Vec<(Expr, Vec<PlPgSqlStmt>)> = Vec::new();
1511        let mut else_branch: Vec<PlPgSqlStmt> = Vec::new();
1512        loop {
1513            // <expr> THEN
1514            let cond = self.parse_expr(0)?;
1515            let then_kw = self.expect_ident_like()?;
1516            if !then_kw.eq_ignore_ascii_case("then") {
1517                return Err(self.err(alloc::format!(
1518                    "expected THEN after IF/ELSIF condition, got {then_kw:?}"
1519                )));
1520            }
1521            let body = self.parse_plpgsql_stmt_list_until_end()?;
1522            branches.push((cond, body));
1523            // Look at terminator: ELSIF/ELSEIF, ELSE, or END IF.
1524            match self.peek() {
1525                Token::Ident(s) | Token::QuotedIdent(s)
1526                    if s.eq_ignore_ascii_case("elsif") || s.eq_ignore_ascii_case("elseif") =>
1527                {
1528                    self.advance();
1529                    continue;
1530                }
1531                Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("else") => {
1532                    self.advance();
1533                    else_branch = self.parse_plpgsql_stmt_list_until_end()?;
1534                    break;
1535                }
1536                Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("end") => {
1537                    break;
1538                }
1539                other => {
1540                    return Err(self.err(alloc::format!(
1541                        "expected ELSIF / ELSE / END after IF branch body, got {other:?}"
1542                    )));
1543                }
1544            }
1545        }
1546        // Expect `END IF` (the END keyword is the one we're
1547        // looking at right now).
1548        let end_kw = self.expect_ident_like()?;
1549        if !end_kw.eq_ignore_ascii_case("end") {
1550            return Err(self.err(alloc::format!("expected END IF, got {end_kw:?}")));
1551        }
1552        let if_kw = self.expect_ident_like()?;
1553        if !if_kw.eq_ignore_ascii_case("if") {
1554            return Err(self.err(alloc::format!("expected END IF, got END {if_kw:?}")));
1555        }
1556        Ok(PlPgSqlStmt::If {
1557            branches,
1558            else_branch,
1559        })
1560    }
1561
1562    /// v7.12.6 — `RAISE { NOTICE | WARNING | INFO | LOG | DEBUG
1563    /// | EXCEPTION } '<message>' [, args]*`. The `RAISE` keyword
1564    /// is already consumed.
1565    fn parse_plpgsql_raise(&mut self) -> Result<PlPgSqlStmt, ParseError> {
1566        let lvl_ident = self.expect_ident_like()?;
1567        let level = match lvl_ident.to_ascii_lowercase().as_str() {
1568            "notice" => RaiseLevel::Notice,
1569            "warning" => RaiseLevel::Warning,
1570            "info" => RaiseLevel::Info,
1571            "log" => RaiseLevel::Log,
1572            "debug" => RaiseLevel::Debug,
1573            "exception" => RaiseLevel::Exception,
1574            other => {
1575                return Err(self.err(alloc::format!(
1576                    "expected RAISE level (NOTICE/WARNING/INFO/LOG/DEBUG/EXCEPTION), got {other:?}"
1577                )));
1578            }
1579        };
1580        // Message: required for v7.12.6. PG accepts a bare
1581        // RAISE-rethrow form (no message), reserved for future
1582        // RAISE-no-args support.
1583        let Token::String(msg) = self.peek() else {
1584            return Err(self.err(alloc::format!(
1585                "expected RAISE message string, got {:?}",
1586                self.peek()
1587            )));
1588        };
1589        let message = msg.clone();
1590        self.advance();
1591        // Optional comma-separated args (PG `%` format substitution).
1592        let mut args: Vec<Expr> = Vec::new();
1593        while matches!(self.peek(), Token::Comma) {
1594            self.advance();
1595            args.push(self.parse_expr(0)?);
1596        }
1597        Ok(PlPgSqlStmt::Raise {
1598            level,
1599            message,
1600            args,
1601        })
1602    }
1603
1604    fn parse_plpgsql_assign_target(&mut self) -> Result<AssignTarget, ParseError> {
1605        let head = self.expect_ident_like()?;
1606        if matches!(self.peek(), Token::Dot) {
1607            self.advance();
1608            let col = self.expect_ident_like()?;
1609            if head.eq_ignore_ascii_case("new") {
1610                return Ok(AssignTarget::NewColumn(col));
1611            }
1612            if head.eq_ignore_ascii_case("old") {
1613                return Ok(AssignTarget::OldColumn(col));
1614            }
1615            return Err(self.err(alloc::format!(
1616                "v7.12.4 plpgsql assign target must be NEW.<col> / OLD.<col> / <local_var>; \
1617                 got {head:?}.<col>"
1618            )));
1619        }
1620        Ok(AssignTarget::Local(head))
1621    }
1622
1623    fn parse_plpgsql_return(&mut self) -> Result<PlPgSqlStmt, ParseError> {
1624        // RETURN NEW / OLD / NULL — bare-ident forms.
1625        match self.peek() {
1626            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("new") => {
1627                self.advance();
1628                return Ok(PlPgSqlStmt::Return(ReturnTarget::New));
1629            }
1630            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("old") => {
1631                self.advance();
1632                return Ok(PlPgSqlStmt::Return(ReturnTarget::Old));
1633            }
1634            Token::Null => {
1635                self.advance();
1636                return Ok(PlPgSqlStmt::Return(ReturnTarget::Null));
1637            }
1638            // Bare `RETURN;` (no value) — treated as `RETURN NULL`
1639            // per PL/pgSQL convention.
1640            Token::Semicolon => {
1641                return Ok(PlPgSqlStmt::Return(ReturnTarget::Null));
1642            }
1643            _ => {}
1644        }
1645        // Fall through: parse a full expression.
1646        let e = self.parse_expr(0)?;
1647        Ok(PlPgSqlStmt::Return(ReturnTarget::Expr(e)))
1648    }
1649
1650    fn parse_trigger_event(&mut self) -> Result<TriggerEvent, ParseError> {
1651        // INSERT is a reserved Token; UPDATE / DELETE / TRUNCATE
1652        // are ident-shaped (the parser keys off case-insensitive
1653        // match — same shape used by the top-level Update / Delete
1654        // dispatchers at parse_one_statement).
1655        if matches!(self.peek(), Token::Insert) {
1656            self.advance();
1657            return Ok(TriggerEvent::Insert);
1658        }
1659        match self.peek() {
1660            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("update") => {
1661                self.advance();
1662                Ok(TriggerEvent::Update)
1663            }
1664            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("delete") => {
1665                self.advance();
1666                Ok(TriggerEvent::Delete)
1667            }
1668            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("truncate") => {
1669                self.advance();
1670                Ok(TriggerEvent::Truncate)
1671            }
1672            other => Err(self.err(alloc::format!(
1673                "expected INSERT / UPDATE / DELETE / TRUNCATE in trigger event list, got {other:?}"
1674            ))),
1675        }
1676    }
1677
1678    /// v6.1.2 → v6.1.3 — `CREATE PUBLICATION <name>` body. Accepts:
1679    ///   - (no clause) → implicit `FOR ALL TABLES`
1680    ///   - `FOR ALL TABLES`
1681    ///   - `FOR ALL TABLES EXCEPT t1, t2, …` (v6.1.3)
1682    ///   - `FOR TABLE t1, t2, …` (v6.1.3) — `FOR TABLES …` also
1683    ///     accepted (PG accepts both forms in PG 19).
1684    fn parse_create_publication_after_keyword(&mut self) -> Result<Statement, ParseError> {
1685        let name = self.expect_ident_or_string()?;
1686        // Bare DDL maps to FOR ALL TABLES — matches the v6.1.2
1687        // shape so existing publications keep parsing identically.
1688        let scope = if matches!(self.peek(), Token::For) {
1689            self.advance();
1690            if matches!(self.peek(), Token::All) {
1691                self.advance();
1692                if !matches!(self.peek(), Token::Tables) {
1693                    return Err(self.err(format!(
1694                        "expected TABLES after FOR ALL, got {:?}",
1695                        self.peek()
1696                    )));
1697                }
1698                self.advance();
1699                if matches!(self.peek(), Token::Except) {
1700                    self.advance();
1701                    let tables = self.parse_publication_table_list()?;
1702                    PublicationScope::AllTablesExcept(tables)
1703                } else {
1704                    PublicationScope::AllTables
1705                }
1706            } else if matches!(self.peek(), Token::Table | Token::Tables) {
1707                // PG 19 accepts both `FOR TABLE …` (singular) and
1708                // `FOR TABLES …` (plural); SPG matches.
1709                self.advance();
1710                let tables = self.parse_publication_table_list()?;
1711                PublicationScope::ForTables(tables)
1712            } else {
1713                return Err(self.err(format!(
1714                    "expected ALL TABLES or TABLE <list> after FOR, got {:?}",
1715                    self.peek()
1716                )));
1717            }
1718        } else {
1719            PublicationScope::AllTables
1720        };
1721        Ok(Statement::CreatePublication(CreatePublicationStatement {
1722            name,
1723            scope,
1724        }))
1725    }
1726
1727    /// v6.1.3 — Comma-separated identifier list for the publication
1728    /// FOR-clause. Requires at least one entry; empty list is a
1729    /// parse error (PG behaviour). Quoted idents are accepted; the
1730    /// names round-trip through `Display` as `quote_ident(name)`.
1731    fn parse_publication_table_list(&mut self) -> Result<Vec<String>, ParseError> {
1732        let first = self.expect_ident_like()?;
1733        let mut out = alloc::vec![first];
1734        while matches!(self.peek(), Token::Comma) {
1735            self.advance();
1736            out.push(self.expect_ident_like()?);
1737        }
1738        Ok(out)
1739    }
1740
1741    /// v6.1.4 — `CREATE SUBSCRIPTION <name>
1742    ///                 CONNECTION '<conn>'
1743    ///                 PUBLICATION <pub> [, <pub> ...]`.
1744    ///
1745    /// The clause order is fixed (CONNECTION first, then
1746    /// PUBLICATION) to match PG. No WITH-options accepted in
1747    /// v6.1.4 — `enabled` defaults to true, no other knobs ship.
1748    fn parse_create_subscription_after_keyword(&mut self) -> Result<Statement, ParseError> {
1749        let name = self.expect_ident_or_string()?;
1750        if !matches!(self.peek(), Token::Connection) {
1751            return Err(self.err(format!(
1752                "expected CONNECTION after CREATE SUBSCRIPTION <name>, got {:?}",
1753                self.peek()
1754            )));
1755        }
1756        self.advance();
1757        let conn_str = self.expect_string_literal()?;
1758        if !matches!(self.peek(), Token::Publication) {
1759            return Err(self.err(format!(
1760                "expected PUBLICATION after CONNECTION '<conn>', got {:?}",
1761                self.peek()
1762            )));
1763        }
1764        self.advance();
1765        // Reuse the publication FOR-list parser shape: at least one
1766        // identifier, comma-separated.
1767        let first = self.expect_ident_like()?;
1768        let mut publications = alloc::vec![first];
1769        while matches!(self.peek(), Token::Comma) {
1770            self.advance();
1771            publications.push(self.expect_ident_like()?);
1772        }
1773        Ok(Statement::CreateSubscription(CreateSubscriptionStatement {
1774            name,
1775            conn_str,
1776            publications,
1777        }))
1778    }
1779
1780    /// v6.1.7 — `WAIT FOR WAL POSITION <pos> [WITH TIMEOUT <ms>]`.
1781    /// All keywords after `WAIT` are bare idents in v6.1.x; no
1782    /// lexer churn. Both `<pos>` and `<ms>` are positive integers
1783    /// that fit `u64`.
1784    /// v7.12.1 — parameter name in `SET <name>` may be dotted
1785    /// (`pg_catalog.default_text_search_config` etc).
1786    fn parse_set_param_name(&mut self) -> Result<String, ParseError> {
1787        let mut name = self.expect_ident_like()?;
1788        while matches!(self.peek(), Token::Dot) {
1789            self.advance();
1790            let next = self.expect_ident_like()?;
1791            name.push('.');
1792            name.push_str(&next);
1793        }
1794        Ok(name.to_ascii_lowercase())
1795    }
1796
1797    fn parse_set_value(&mut self) -> Result<crate::ast::SetValue, ParseError> {
1798        match self.advance() {
1799            Token::String(s) => Ok(crate::ast::SetValue::String(s)),
1800            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("default") => {
1801                Ok(crate::ast::SetValue::Default)
1802            }
1803            Token::Ident(s) | Token::QuotedIdent(s) => {
1804                let mut accum = s;
1805                while matches!(self.peek(), Token::Dot) {
1806                    self.advance();
1807                    let next = self.expect_ident_like()?;
1808                    accum.push('.');
1809                    accum.push_str(&next);
1810                }
1811                Ok(crate::ast::SetValue::Ident(accum))
1812            }
1813            Token::Integer(n) => Ok(crate::ast::SetValue::Number(n.to_string())),
1814            Token::Float(f) => Ok(crate::ast::SetValue::Number(f.to_string())),
1815            // v7.14.0 — MySQL session/user variable RHS
1816            // (e.g. `SET OLD_FOREIGN_KEY_CHECKS = @@FOREIGN_KEY_CHECKS`).
1817            // Wrap as Ident so the SET handler can record it; the
1818            // engine treats `@VAR` / `@@VAR` values as opaque
1819            // strings.
1820            Token::SessionVar(s) => Ok(crate::ast::SetValue::Ident(s)),
1821            // v7.14.0 — `SET sql_mode = 'NO_AUTO_VALUE_ON_ZERO,STRICT_TRANS_TABLES'`
1822            // is the common MySQL preamble shape. Allow a `+` or
1823            // `-` prefix on negative numerics for parity with PG
1824            // (some param defaults are negative).
1825            Token::Minus => match self.advance() {
1826                Token::Integer(n) => Ok(crate::ast::SetValue::Number(alloc::format!("-{n}"))),
1827                Token::Float(f) => Ok(crate::ast::SetValue::Number(alloc::format!("-{f}"))),
1828                other => Err(self.err(format!(
1829                    "expected numeric after `-` in SET value, got {other:?}"
1830                ))),
1831            },
1832            other => Err(self.err(format!(
1833                "expected literal, identifier, or DEFAULT after `=` in SET, got {other:?}"
1834            ))),
1835        }
1836    }
1837
1838    fn parse_wait_after_keyword(&mut self) -> Result<Statement, ParseError> {
1839        // FOR is a v6.1.2-reserved keyword (Token::For). The
1840        // other two are bare idents — they've never needed lexer
1841        // support and we keep it that way.
1842        if !matches!(self.peek(), Token::For) {
1843            return Err(self.err(format!("expected FOR after WAIT, got {:?}", self.peek())));
1844        }
1845        self.advance();
1846        self.expect_keyword_ident("wal")?;
1847        self.expect_keyword_ident("position")?;
1848        let pos = self.expect_u64_literal()?;
1849        let timeout_ms = if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("with"))
1850        {
1851            self.advance();
1852            self.expect_keyword_ident("timeout")?;
1853            Some(self.expect_u64_literal()?)
1854        } else {
1855            None
1856        };
1857        Ok(Statement::WaitForWalPosition { pos, timeout_ms })
1858    }
1859
1860    /// v6.1.7 helper — consume a `Token::Integer` and check it
1861    /// fits `u64`. WAL positions and millisecond timeouts are
1862    /// non-negative.
1863    fn expect_u64_literal(&mut self) -> Result<u64, ParseError> {
1864        match self.advance() {
1865            Token::Integer(n) if n >= 0 => Ok(n as u64),
1866            Token::Integer(n) => Err(ParseError {
1867                message: format!("expected non-negative integer, got {n}"),
1868                token_pos: self.pos.saturating_sub(1),
1869            }),
1870            other => Err(ParseError {
1871                message: format!("expected integer literal, got {other:?}"),
1872                token_pos: self.pos.saturating_sub(1),
1873            }),
1874        }
1875    }
1876
1877    /// `CREATE USER` body — name + WITH PASSWORD '<pw>' + optional
1878    /// ROLE '<role>' (defaults to readonly). All string slots accept
1879    /// either a quoted ident or a quoted string literal.
1880    fn parse_create_user_after_keyword(&mut self) -> Result<Statement, ParseError> {
1881        let name = self.expect_ident_or_string()?;
1882        self.expect_keyword_ident("with")?;
1883        self.expect_keyword_ident("password")?;
1884        let password = self.expect_string_literal()?;
1885        let role = if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
1886            && s.eq_ignore_ascii_case("role")
1887        {
1888            self.advance();
1889            self.expect_string_literal()?
1890        } else {
1891            "readonly".to_string()
1892        };
1893        Ok(Statement::CreateUser(crate::ast::CreateUserStatement {
1894            name,
1895            password,
1896            role,
1897        }))
1898    }
1899
1900    /// v4.4 `UPDATE <table> SET col = expr [, col = expr]* [WHERE cond]`.
1901    /// Caller already consumed the leading `UPDATE` ident.
1902    fn parse_update_after_keyword(&mut self) -> Result<Statement, ParseError> {
1903        let table = self.expect_ident_like()?;
1904        self.expect_keyword_ident("set")?;
1905        let mut assignments = Vec::new();
1906        loop {
1907            let col = self.expect_ident_like()?;
1908            if !matches!(self.peek(), Token::Eq) {
1909                return Err(self.err(format!(
1910                    "expected `=` after column name in UPDATE SET, got {:?}",
1911                    self.peek()
1912                )));
1913            }
1914            self.advance();
1915            let value = self.parse_expr(0)?;
1916            assignments.push((col, value));
1917            if matches!(self.peek(), Token::Comma) {
1918                self.advance();
1919                continue;
1920            }
1921            break;
1922        }
1923        let where_ = if matches!(self.peek(), Token::Where) {
1924            self.advance();
1925            Some(self.parse_expr(0)?)
1926        } else {
1927            None
1928        };
1929        let returning = self.parse_optional_returning()?;
1930        Ok(Statement::Update(crate::ast::UpdateStatement {
1931            table,
1932            assignments,
1933            where_,
1934            returning,
1935        }))
1936    }
1937
1938    /// v4.4 `DELETE FROM <table> [WHERE cond]`. Caller already consumed
1939    /// the leading `DELETE` ident.
1940    fn parse_delete_after_keyword(&mut self) -> Result<Statement, ParseError> {
1941        if !matches!(self.peek(), Token::From) {
1942            return Err(self.err(format!("expected FROM after DELETE, got {:?}", self.peek())));
1943        }
1944        self.advance();
1945        let table = self.expect_ident_like()?;
1946        let where_ = if matches!(self.peek(), Token::Where) {
1947            self.advance();
1948            Some(self.parse_expr(0)?)
1949        } else {
1950            None
1951        };
1952        let returning = self.parse_optional_returning()?;
1953        Ok(Statement::Delete(crate::ast::DeleteStatement {
1954            table,
1955            where_,
1956            returning,
1957        }))
1958    }
1959
1960    /// v7.9.4 — parse the optional trailing `RETURNING <projection>`
1961    /// clause on INSERT / UPDATE / DELETE. Same projection grammar
1962    /// as SELECT, so `RETURNING *`, `RETURNING col`,
1963    /// `RETURNING expr AS alias`, and `RETURNING a, b, c` all work.
1964    fn parse_optional_returning(
1965        &mut self,
1966    ) -> Result<Option<Vec<crate::ast::SelectItem>>, ParseError> {
1967        let is_returning_kw = matches!(
1968            self.peek(),
1969            Token::Ident(s) if s.eq_ignore_ascii_case("returning")
1970        );
1971        if !is_returning_kw {
1972            return Ok(None);
1973        }
1974        self.advance();
1975        let mut items = Vec::new();
1976        loop {
1977            items.push(self.parse_select_item()?);
1978            if matches!(self.peek(), Token::Comma) {
1979                self.advance();
1980                continue;
1981            }
1982            break;
1983        }
1984        Ok(Some(items))
1985    }
1986
1987    /// v6.0.4 — parse the tail of an ALTER statement after the
1988    /// leading `ALTER` keyword has been consumed. Only one form is
1989    /// supported in v6.0.4:
1990    ///
1991    /// ```text
1992    /// ALTER INDEX <name> REBUILD [WITH (encoding = <enc>)]
1993    /// ```
1994    fn parse_alter_after_keyword(&mut self) -> Result<Statement, ParseError> {
1995        // ALTER INDEX <name> ... | ALTER TABLE <name> SET hot_tier_bytes = <n>
1996        // v7.14.0 — `ALTER TABLE ONLY` modifier (PG partition-
1997        // exclusion) is accepted by stripping the `ONLY` keyword
1998        // before the table parse.
1999        // v7.14.0 — `ALTER SEQUENCE / ALTER VIEW / ALTER OWNER`
2000        // and the long PG-dump tail are accepted as no-ops.
2001        match self.advance() {
2002            Token::Index => {}
2003            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("index") => {}
2004            // v6.7.2 — ALTER TABLE t SET hot_tier_bytes = X
2005            // v7.14.0 — ALTER TABLE ONLY t … strip the `ONLY`.
2006            Token::Table => {
2007                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("only")) {
2008                    self.advance();
2009                }
2010                return self.parse_alter_table_after_keyword();
2011            }
2012            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("table") => {
2013                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("only")) {
2014                    self.advance();
2015                }
2016                return self.parse_alter_table_after_keyword();
2017            }
2018            // v7.14.0 — ALTER SEQUENCE / ALTER VIEW / ALTER
2019            // FUNCTION / ALTER TYPE / ALTER DOMAIN / ALTER
2020            // DATABASE / ALTER USER / ALTER ROLE / ALTER SCHEMA
2021            // / ALTER OWNER / ALTER DEFAULT PRIVILEGES — accept
2022            // as no-op so pg_dump's tail loads.
2023            Token::Ident(s) | Token::QuotedIdent(s)
2024                if matches!(
2025                    s.to_ascii_lowercase().as_str(),
2026                    "sequence"
2027                        | "view"
2028                        | "function"
2029                        | "type"
2030                        | "domain"
2031                        | "database"
2032                        | "role"
2033                        | "schema"
2034                        | "owner"
2035                        | "default"
2036                        | "extension"
2037                        | "materialized"
2038                        | "policy"
2039                        | "publication"
2040                        | "subscription"
2041                ) =>
2042            {
2043                self.consume_until_statement_boundary();
2044                return Ok(Statement::Empty);
2045            }
2046            other => {
2047                return Err(self.err(format!(
2048                    "expected INDEX / TABLE / SEQUENCE / VIEW / FUNCTION / TYPE / OWNER / etc \
2049                     after ALTER, got {other:?}"
2050                )));
2051            }
2052        }
2053        let name = self.expect_ident_like()?;
2054        // REBUILD
2055        self.expect_keyword_ident("rebuild")?;
2056        // Optional: WITH (encoding = <enc>)
2057        let encoding = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("with")) {
2058            self.advance();
2059            if !matches!(self.peek(), Token::LParen) {
2060                return Err(self.err(format!(
2061                    "expected '(' after WITH in ALTER INDEX REBUILD, got {:?}",
2062                    self.peek()
2063                )));
2064            }
2065            self.advance();
2066            self.expect_keyword_ident("encoding")?;
2067            if !matches!(self.peek(), Token::Eq) {
2068                return Err(self.err(format!(
2069                    "expected '=' after encoding in ALTER INDEX REBUILD, got {:?}",
2070                    self.peek()
2071                )));
2072            }
2073            self.advance();
2074            let enc_ident = match self.advance() {
2075                Token::Ident(s) | Token::QuotedIdent(s) => s,
2076                other => {
2077                    return Err(self.err(format!("expected encoding name after =, got {other:?}")));
2078                }
2079            };
2080            let enc = match enc_ident.to_ascii_lowercase().as_str() {
2081                "f32" => VecEncoding::F32,
2082                "sq8" => VecEncoding::Sq8,
2083                "half" => VecEncoding::F16,
2084                other => {
2085                    return Err(self.err(format!(
2086                        "unknown vector encoding {other:?} in ALTER INDEX REBUILD; supported: F32, SQ8, HALF"
2087                    )));
2088                }
2089            };
2090            if !matches!(self.peek(), Token::RParen) {
2091                return Err(self.err(format!(
2092                    "expected ')' after encoding value, got {:?}",
2093                    self.peek()
2094                )));
2095            }
2096            self.advance();
2097            Some(enc)
2098        } else {
2099            None
2100        };
2101        Ok(Statement::AlterIndex(crate::ast::AlterIndexStatement {
2102            name,
2103            target: crate::ast::AlterIndexTarget::Rebuild { encoding },
2104        }))
2105    }
2106
2107    /// v6.7.2 — `ALTER TABLE <name> SET hot_tier_bytes = <n>`. The
2108    /// only `SET` form currently supported; future v6.7.x can add
2109    /// more SET subjects without changing the dispatch shape.
2110    /// v7.13.2 — mailrs round-6 S1: accepts comma-separated
2111    /// subactions. Single-subaction shape stays a 1-element vec.
2112    fn parse_alter_table_after_keyword(&mut self) -> Result<Statement, ParseError> {
2113        let table_name = self.expect_ident_like()?;
2114        let mut targets: Vec<crate::ast::AlterTableTarget> = Vec::new();
2115        loop {
2116            let subaction = self.parse_alter_table_subaction()?;
2117            // ADD COLUMN with inline REFERENCES emits both an
2118            // AddColumn and an AddForeignKey subaction; the
2119            // helper returns 1 or 2 items.
2120            targets.extend(subaction);
2121            if matches!(self.peek(), Token::Comma) {
2122                self.advance();
2123                continue;
2124            }
2125            break;
2126        }
2127        Ok(Statement::AlterTable(crate::ast::AlterTableStatement {
2128            name: table_name,
2129            targets,
2130        }))
2131    }
2132
2133    /// Parse one ALTER TABLE subaction. Returns a Vec because
2134    /// inline `REFERENCES` on `ADD COLUMN` produces both an
2135    /// AddColumn and an AddForeignKey entry (mailrs round-6 S3).
2136    fn parse_alter_table_subaction(
2137        &mut self,
2138    ) -> Result<Vec<crate::ast::AlterTableTarget>, ParseError> {
2139        match self.peek() {
2140            Token::Ident(s) if s.eq_ignore_ascii_case("set") => {
2141                self.advance();
2142                let setting = self.expect_ident_like()?;
2143                if !setting.eq_ignore_ascii_case("hot_tier_bytes") {
2144                    return Err(self.err(alloc::format!(
2145                        "ALTER TABLE SET: unknown setting {setting:?}; supported: hot_tier_bytes"
2146                    )));
2147                }
2148                if !matches!(self.peek(), Token::Eq) {
2149                    return Err(self.err(alloc::format!(
2150                        "expected '=' after hot_tier_bytes, got {:?}",
2151                        self.peek()
2152                    )));
2153                }
2154                self.advance();
2155                let n = self.expect_u64_literal()?;
2156                Ok(alloc::vec![crate::ast::AlterTableTarget::SetHotTierBytes(n)])
2157            }
2158            Token::Ident(s) if s.eq_ignore_ascii_case("add") => {
2159                self.advance();
2160                // v7.14.0 — ADD CONSTRAINT <name> { FOREIGN KEY |
2161                // PRIMARY KEY | UNIQUE | CHECK }. pg_dump emits
2162                // PRIMARY KEY this way; mysqldump emits both.
2163                // Peek-only dispatch (no advance) — `advance()`
2164                // destructively replaces consumed tokens with Eof,
2165                // so saved-pos restore would land on Eofs.
2166                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("constraint"))
2167                {
2168                    // The next-but-one ident is the constraint
2169                    // name; the one after THAT is the kind.
2170                    let kind_pos = self.pos + 2;
2171                    let kind = self.tokens.get(kind_pos).cloned();
2172                    if matches!(&kind, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("foreign"))
2173                    {
2174                        let fk = self.parse_table_level_fk()?;
2175                        return Ok(alloc::vec![
2176                            crate::ast::AlterTableTarget::AddForeignKey(fk)
2177                        ]);
2178                    }
2179                    if matches!(&kind, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("primary"))
2180                    {
2181                        self.advance(); // CONSTRAINT
2182                        let _name = self.expect_ident_like()?;
2183                        self.advance(); // PRIMARY
2184                        self.expect_keyword_ident("key")?;
2185                        let cols = self.parse_paren_ident_list("PRIMARY KEY")?;
2186                        return Ok(alloc::vec![
2187                            crate::ast::AlterTableTarget::AddTableConstraint(
2188                                crate::ast::TableConstraint::PrimaryKey {
2189                                    name: None,
2190                                    columns: cols,
2191                                }
2192                            )
2193                        ]);
2194                    }
2195                    if matches!(&kind, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("unique"))
2196                    {
2197                        self.advance(); // CONSTRAINT
2198                        let _name = self.expect_ident_like()?;
2199                        self.advance(); // UNIQUE
2200                        let cols = self.parse_paren_ident_list("UNIQUE")?;
2201                        return Ok(alloc::vec![
2202                            crate::ast::AlterTableTarget::AddTableConstraint(
2203                                crate::ast::TableConstraint::Unique {
2204                                    name: None,
2205                                    columns: cols,
2206                                    nulls_not_distinct: false,
2207                                }
2208                            )
2209                        ]);
2210                    }
2211                    if matches!(&kind, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("check"))
2212                    {
2213                        self.advance(); // CONSTRAINT
2214                        let _name = self.expect_ident_like()?;
2215                        self.advance(); // CHECK
2216                        if !matches!(self.peek(), Token::LParen) {
2217                            return Err(self.err(alloc::format!(
2218                                "expected '(' after CHECK, got {:?}", self.peek()
2219                            )));
2220                        }
2221                        self.advance();
2222                        let expr = self.parse_expr(0)?;
2223                        if matches!(self.peek(), Token::RParen) {
2224                            self.advance();
2225                        }
2226                        return Ok(alloc::vec![
2227                            crate::ast::AlterTableTarget::AddTableConstraint(
2228                                crate::ast::TableConstraint::Check { name: None, expr }
2229                            )
2230                        ]);
2231                    }
2232                    // Unknown kind — fall through to FK path which
2233                    // produces a descriptive parse error.
2234                }
2235                let is_fk = matches!(
2236                    self.peek(),
2237                    Token::Ident(s) if s.eq_ignore_ascii_case("constraint")
2238                        || s.eq_ignore_ascii_case("foreign")
2239                );
2240                if is_fk {
2241                    let fk = self.parse_table_level_fk()?;
2242                    return Ok(alloc::vec![crate::ast::AlterTableTarget::AddForeignKey(fk)]);
2243                }
2244                // v7.14.0 — bare ADD PRIMARY KEY / UNIQUE / CHECK
2245                // (no CONSTRAINT prefix) — same dispatch.
2246                match self.peek().clone() {
2247                    Token::Ident(s) if s.eq_ignore_ascii_case("primary") => {
2248                        self.advance();
2249                        self.expect_keyword_ident("key")?;
2250                        let cols = self.parse_paren_ident_list("PRIMARY KEY")?;
2251                        return Ok(alloc::vec![
2252                            crate::ast::AlterTableTarget::AddTableConstraint(
2253                                crate::ast::TableConstraint::PrimaryKey {
2254                                    name: None,
2255                                    columns: cols,
2256                                }
2257                            )
2258                        ]);
2259                    }
2260                    Token::Ident(s) if s.eq_ignore_ascii_case("unique") => {
2261                        self.advance();
2262                        let cols = self.parse_paren_ident_list("UNIQUE")?;
2263                        return Ok(alloc::vec![
2264                            crate::ast::AlterTableTarget::AddTableConstraint(
2265                                crate::ast::TableConstraint::Unique {
2266                                    name: None,
2267                                    columns: cols,
2268                                    nulls_not_distinct: false,
2269                                }
2270                            )
2271                        ]);
2272                    }
2273                    _ => {}
2274                }
2275                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("column")) {
2276                    self.advance();
2277                }
2278                let mut if_not_exists = false;
2279                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("if")) {
2280                    self.advance();
2281                    if !matches!(self.peek(), Token::Not) {
2282                        return Err(self.err(alloc::format!(
2283                            "expected NOT after IF in ALTER TABLE ADD COLUMN, got {:?}",
2284                            self.peek()
2285                        )));
2286                    }
2287                    self.advance();
2288                    if !matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("exists")) {
2289                        return Err(self.err(alloc::format!(
2290                            "expected EXISTS after IF NOT in ALTER TABLE ADD COLUMN, got {:?}",
2291                            self.peek()
2292                        )));
2293                    }
2294                    self.advance();
2295                    if_not_exists = true;
2296                }
2297                // v7.13.2 — mailrs round-6 S3: `ADD COLUMN col TYPE
2298                // REFERENCES other(col) [ON DELETE …]`. parse_column_def
2299                // returns ColumnDef + an optional inline FK.
2300                let (column, col_level_fk) = self.parse_column_def_with_fk()?;
2301                let col_name = column.name.clone();
2302                let mut out = alloc::vec![crate::ast::AlterTableTarget::AddColumn {
2303                    column,
2304                    if_not_exists,
2305                }];
2306                if let Some(mut fk) = col_level_fk {
2307                    if fk.columns.is_empty() {
2308                        fk.columns.push(col_name);
2309                    }
2310                    out.push(crate::ast::AlterTableTarget::AddForeignKey(fk));
2311                }
2312                Ok(out)
2313            }
2314            Token::Drop => {
2315                self.advance();
2316                // v7.13.3 — dispatch on the next token. mailrs round-7
2317                // S8 closed DROP COLUMN; round-6 S7 closed
2318                // DROP CONSTRAINT. Both share IF EXISTS / CASCADE /
2319                // RESTRICT modifiers.
2320                //   DROP CONSTRAINT [IF EXISTS] <name> [CASCADE|RESTRICT]
2321                //   DROP [COLUMN] [IF EXISTS] <col> [CASCADE|RESTRICT]
2322                let subject = match self.peek() {
2323                    Token::Ident(s) if s.eq_ignore_ascii_case("constraint") => {
2324                        self.advance();
2325                        "constraint"
2326                    }
2327                    Token::Ident(s) if s.eq_ignore_ascii_case("column") => {
2328                        self.advance();
2329                        "column"
2330                    }
2331                    // PG-canonical bare `DROP <col>` without COLUMN
2332                    // keyword is also valid; treat any other ident
2333                    // as the column name.
2334                    Token::Ident(_) | Token::QuotedIdent(_) => "column",
2335                    other => {
2336                        return Err(self.err(alloc::format!(
2337                            "expected COLUMN / CONSTRAINT after DROP in ALTER TABLE, got {other:?}"
2338                        )));
2339                    }
2340                };
2341                let mut if_exists = false;
2342                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("if")) {
2343                    let n1 = self.tokens.get(self.pos + 1);
2344                    if matches!(n1, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("exists")) {
2345                        self.advance();
2346                        self.advance();
2347                        if_exists = true;
2348                    }
2349                }
2350                let name = self.expect_ident_like()?;
2351                let mut cascade = false;
2352                if matches!(
2353                    self.peek(),
2354                    Token::Ident(s) if s.eq_ignore_ascii_case("cascade")
2355                        || s.eq_ignore_ascii_case("restrict")
2356                ) {
2357                    if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("cascade"))
2358                    {
2359                        cascade = true;
2360                    }
2361                    self.advance();
2362                }
2363                if subject == "constraint" {
2364                    Ok(alloc::vec![crate::ast::AlterTableTarget::DropForeignKey {
2365                        name,
2366                        if_exists,
2367                    }])
2368                } else {
2369                    Ok(alloc::vec![crate::ast::AlterTableTarget::DropColumn {
2370                        column: name,
2371                        if_exists,
2372                        cascade,
2373                    }])
2374                }
2375            }
2376            Token::Ident(s) if s.eq_ignore_ascii_case("alter") => {
2377                self.advance();
2378                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("column")) {
2379                    self.advance();
2380                }
2381                let col_name = self.expect_ident_like()?;
2382                match self.peek() {
2383                    Token::Ident(s) if s.eq_ignore_ascii_case("type") => {
2384                        self.advance();
2385                    }
2386                    // v7.14.0 — pg_dump emits BIGSERIAL via
2387                    // `ALTER TABLE … ALTER COLUMN id SET DEFAULT
2388                    // nextval('seq')` (the sequence is created
2389                    // separately). SPG's BIGSERIAL already uses
2390                    // AUTO_INCREMENT; accept SET DEFAULT / DROP
2391                    // DEFAULT / SET NOT NULL / DROP NOT NULL as
2392                    // engine no-ops by consuming the tail.
2393                    Token::Ident(s) if s.eq_ignore_ascii_case("set") => {
2394                        // ALTER COLUMN col SET DEFAULT … / SET NOT
2395                        // NULL — accept as a no-op on SPG (BIGSERIAL
2396                        // already auto-increments; nullability change
2397                        // would need row scan — deferred).
2398                        self.consume_until_statement_boundary();
2399                        return Ok(Vec::new());
2400                    }
2401                    Token::Ident(s) if s.eq_ignore_ascii_case("drop") => {
2402                        // ALTER COLUMN col DROP DEFAULT / DROP NOT NULL.
2403                        self.consume_until_statement_boundary();
2404                        return Ok(Vec::new());
2405                    }
2406                    other => {
2407                        return Err(self.err(alloc::format!(
2408                            "expected TYPE / SET / DROP after ALTER COLUMN <name>, got {other:?}"
2409                        )));
2410                    }
2411                }
2412                let new_type = self.parse_column_type_name()?;
2413                let using = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("using"))
2414                {
2415                    self.advance();
2416                    Some(self.parse_expr(0)?)
2417                } else {
2418                    None
2419                };
2420                Ok(alloc::vec![crate::ast::AlterTableTarget::AlterColumnType {
2421                    column: col_name,
2422                    new_type,
2423                    using,
2424                }])
2425            }
2426            other => Err(self.err(alloc::format!(
2427                "expected SET / ADD / DROP / ALTER in ALTER TABLE, got {other:?}"
2428            ))),
2429        }
2430    }
2431
2432    /// Consume a bare ident if its lowercase matches `kw`, else err.
2433    fn expect_keyword_ident(&mut self, kw: &str) -> Result<(), ParseError> {
2434        match self.advance() {
2435            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case(kw) => Ok(()),
2436            other => Err(ParseError {
2437                message: format!("expected {kw:?}, got {other:?}"),
2438                token_pos: self.pos.saturating_sub(1),
2439            }),
2440        }
2441    }
2442
2443    /// Accept either a quoted identifier (`"foo"`) or a quoted string
2444    /// literal (`'foo'`) — same shape used by CREATE USER for the
2445    /// username slot.
2446    fn expect_ident_or_string(&mut self) -> Result<String, ParseError> {
2447        match self.advance() {
2448            Token::Ident(s) | Token::QuotedIdent(s) | Token::String(s) => Ok(s),
2449            other => Err(ParseError {
2450                message: format!("expected identifier or string, got {other:?}"),
2451                token_pos: self.pos.saturating_sub(1),
2452            }),
2453        }
2454    }
2455
2456    fn expect_string_literal(&mut self) -> Result<String, ParseError> {
2457        match self.advance() {
2458            Token::String(s) => Ok(s),
2459            other => Err(ParseError {
2460                message: format!("expected quoted string, got {other:?}"),
2461                token_pos: self.pos.saturating_sub(1),
2462            }),
2463        }
2464    }
2465
2466    fn parse_select_stmt(&mut self) -> Result<Statement, ParseError> {
2467        // Caller dispatches on Token::Select; the inner helper handles
2468        // the rest. ORDER BY / LIMIT bind at this top level; UNION peers
2469        // get a fresh bare-select parse and may not have their own ORDER
2470        // BY / LIMIT.
2471        let mut head = self.parse_bare_select()?;
2472        while matches!(self.peek(), Token::Union) {
2473            self.advance();
2474            let kind = if matches!(self.peek(), Token::All) {
2475                self.advance();
2476                UnionKind::All
2477            } else {
2478                UnionKind::Distinct
2479            };
2480            let peer = self.parse_bare_select()?;
2481            head.unions.push((kind, peer));
2482        }
2483        head.order_by = if matches!(self.peek(), Token::Order) {
2484            self.advance();
2485            if !matches!(self.peek(), Token::By) {
2486                return Err(self.err(format!("expected BY after ORDER, got {:?}", self.peek())));
2487            }
2488            self.advance();
2489            // v6.4.0 — multi-key ORDER BY. Loop over comma-separated
2490            // `<expr> [ASC|DESC]` items.
2491            let mut keys = Vec::new();
2492            loop {
2493                let expr = self.parse_expr(0)?;
2494                let desc = if matches!(self.peek(), Token::Desc) {
2495                    self.advance();
2496                    true
2497                } else if matches!(self.peek(), Token::Asc) {
2498                    self.advance();
2499                    false
2500                } else {
2501                    false
2502                };
2503                keys.push(OrderBy { expr, desc });
2504                if matches!(self.peek(), Token::Comma) {
2505                    self.advance();
2506                } else {
2507                    break;
2508                }
2509            }
2510            keys
2511        } else {
2512            Vec::new()
2513        };
2514        head.limit = if matches!(self.peek(), Token::Limit) {
2515            self.advance();
2516            Some(self.parse_limit_expr("LIMIT")?)
2517        } else {
2518            None
2519        };
2520        head.offset = if matches!(self.peek(), Token::Offset) {
2521            self.advance();
2522            Some(self.parse_limit_expr("OFFSET")?)
2523        } else {
2524            None
2525        };
2526        Ok(Statement::Select(head))
2527    }
2528
2529    /// v7.9.24 — accept `LIMIT <int>` or `LIMIT $N`. mailrs H2.
2530    /// Bind value gets resolved during prepared-statement Execute;
2531    /// the Pratt expression parser would over-accept here (e.g.
2532    /// `LIMIT 5 + 5`), so we narrowly accept only the two PG forms.
2533    fn parse_limit_expr(&mut self, label: &str) -> Result<crate::ast::LimitExpr, ParseError> {
2534        match self.advance() {
2535            Token::Integer(n) if n >= 0 => u32::try_from(n)
2536                .map(crate::ast::LimitExpr::Literal)
2537                .map_err(|_| ParseError {
2538                    message: alloc::format!("{label} value too large: {n}"),
2539                    token_pos: self.pos.saturating_sub(1),
2540                }),
2541            Token::Placeholder(n) => Ok(crate::ast::LimitExpr::Placeholder(n)),
2542            other => Err(ParseError {
2543                message: alloc::format!(
2544                    "expected non-negative integer or $N placeholder after {label}, got {other:?}"
2545                ),
2546                token_pos: self.pos.saturating_sub(1),
2547            }),
2548        }
2549    }
2550
2551    /// Parse one SELECT block without ORDER BY / LIMIT / UNION chaining —
2552    /// just `[DISTINCT] items [FROM] [WHERE] [GROUP BY]`. Returned with
2553    /// `unions` empty and `order_by` / `limit` `None`; the top-level
2554    /// `parse_select_stmt` is responsible for filling those in.
2555    fn parse_bare_select(&mut self) -> Result<SelectStatement, ParseError> {
2556        if !matches!(self.peek(), Token::Select) {
2557            return Err(self.err(format!(
2558                "expected SELECT to start a query block, got {:?}",
2559                self.peek()
2560            )));
2561        }
2562        self.advance();
2563        let distinct = if matches!(self.peek(), Token::Distinct) {
2564            self.advance();
2565            true
2566        } else {
2567            false
2568        };
2569        let items = self.parse_select_list()?;
2570        let from = if matches!(self.peek(), Token::From) {
2571            self.advance();
2572            Some(self.parse_from_clause()?)
2573        } else {
2574            None
2575        };
2576        let where_ = if matches!(self.peek(), Token::Where) {
2577            self.advance();
2578            Some(self.parse_expr(0)?)
2579        } else {
2580            None
2581        };
2582        let mut group_by_all = false;
2583        let group_by = if matches!(self.peek(), Token::Group) {
2584            self.advance();
2585            if !matches!(self.peek(), Token::By) {
2586                return Err(self.err(format!("expected BY after GROUP, got {:?}", self.peek())));
2587            }
2588            self.advance();
2589            // v6.4.1 — `GROUP BY ALL` shortcut. Planner expands to
2590            // every non-aggregate SELECT-list item later.
2591            if matches!(self.peek(), Token::All) {
2592                self.advance();
2593                group_by_all = true;
2594                None
2595            } else {
2596                let mut groups = Vec::new();
2597                loop {
2598                    groups.push(self.parse_expr(0)?);
2599                    if matches!(self.peek(), Token::Comma) {
2600                        self.advance();
2601                    } else {
2602                        break;
2603                    }
2604                }
2605                Some(groups)
2606            }
2607        } else {
2608            None
2609        };
2610        let having = if matches!(self.peek(), Token::Having) {
2611            self.advance();
2612            Some(self.parse_expr(0)?)
2613        } else {
2614            None
2615        };
2616        Ok(SelectStatement {
2617            ctes: Vec::new(),
2618            distinct,
2619            items,
2620            from,
2621            where_,
2622            group_by,
2623            group_by_all,
2624            having,
2625            unions: Vec::new(),
2626            order_by: Vec::new(),
2627            limit: None,
2628            offset: None,
2629        })
2630    }
2631
2632    fn parse_create_table_stmt_after_create(&mut self) -> Result<Statement, ParseError> {
2633        // Caller already consumed CREATE; we're sitting on TABLE.
2634        debug_assert!(matches!(self.peek(), Token::Table));
2635        self.advance();
2636        let if_not_exists = self.consume_if_not_exists();
2637        let name = self.expect_ident_like()?;
2638        if !matches!(self.peek(), Token::LParen) {
2639            return Err(self.err(format!(
2640                "expected '(' after table name, got {:?}",
2641                self.peek()
2642            )));
2643        }
2644        self.advance();
2645        let mut columns = Vec::new();
2646        let mut foreign_keys: Vec<ForeignKeyConstraint> = Vec::new();
2647        let mut table_constraints: Vec<crate::ast::TableConstraint> = Vec::new();
2648        loop {
2649            // v7.6.0 / v7.9.18 — distinguish table-level constraint
2650            // clauses from column definitions. Constraints start
2651            // with `CONSTRAINT <name> …`, `FOREIGN KEY (…)`,
2652            // `PRIMARY KEY (…)`, or `UNIQUE (…)`. Anything else is
2653            // a column.
2654            if self.peek_table_level_pk_start() {
2655                table_constraints.push(self.parse_table_level_primary_key()?);
2656            } else if self.peek_table_level_unique_start() {
2657                table_constraints.push(self.parse_table_level_unique()?);
2658            } else if self.peek_table_level_check_start() {
2659                // v7.13.0 — table-level CHECK (mailrs round-5 G3).
2660                table_constraints.push(self.parse_table_level_check()?);
2661            } else if self.peek_mysql_inline_key_start() {
2662                // v7.14.0 — mysqldump emits inline `KEY name (cols)`,
2663                // `INDEX name (cols)`, `UNIQUE KEY name (cols)`,
2664                // `FULLTEXT KEY name (cols)`, `SPATIAL KEY name (cols)`
2665                // inside the column list. Skip name + paren list;
2666                // for UNIQUE KEY, register as a UC.
2667                if let Some(uc) = self.parse_mysql_inline_key()? {
2668                    table_constraints.push(uc);
2669                }
2670            } else if self.peek_constraint_or_fk_start() {
2671                foreign_keys.push(self.parse_table_level_fk()?);
2672            } else {
2673                let (col, col_level_fk) = self.parse_column_def_with_fk()?;
2674                // v7.13.0 — fold inline UNIQUE / CHECK column
2675                // constraints into table-level entries so the
2676                // engine path stays uniform.
2677                if col.is_unique {
2678                    table_constraints.push(crate::ast::TableConstraint::Unique {
2679                        name: None,
2680                        columns: alloc::vec![col.name.clone()],
2681                        nulls_not_distinct: false,
2682                    });
2683                }
2684                if let Some(check_expr) = col.check.clone() {
2685                    table_constraints.push(crate::ast::TableConstraint::Check {
2686                        name: None,
2687                        expr: check_expr,
2688                    });
2689                }
2690                columns.push(col);
2691                if let Some(fk) = col_level_fk {
2692                    foreign_keys.push(fk);
2693                }
2694            }
2695            match self.peek() {
2696                Token::Comma => {
2697                    self.advance();
2698                }
2699                Token::RParen => {
2700                    self.advance();
2701                    break;
2702                }
2703                other => {
2704                    return Err(
2705                        self.err(format!("expected ',' or ')' in column list, got {other:?}"))
2706                    );
2707                }
2708            }
2709        }
2710        if columns.is_empty() {
2711            return Err(self.err("CREATE TABLE requires at least one column".into()));
2712        }
2713        // v7.14.0 — consume MySQL/MariaDB table options after the
2714        // closing `)`. mysqldump emits things like
2715        // `ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
2716        // AUTO_INCREMENT=42 ROW_FORMAT=DYNAMIC COMMENT='blog posts'`.
2717        // SPG accepts all forms as no-ops (each option is
2718        // `<ident> [=] <ident-or-string>` separated by whitespace).
2719        self.consume_mysql_table_options();
2720        Ok(Statement::CreateTable(CreateTableStatement {
2721            name,
2722            columns,
2723            if_not_exists,
2724            foreign_keys,
2725            table_constraints,
2726        }))
2727    }
2728
2729    /// v7.14.0 — true when the next tokens look like an inline
2730    /// MySQL index declaration: KEY / INDEX / UNIQUE KEY /
2731    /// UNIQUE INDEX / FULLTEXT [KEY|INDEX] / SPATIAL [KEY|INDEX]
2732    /// — each followed by an optional name + `(...)`. Critical:
2733    /// a column NAMED `key` / `index` (PG accepts as ident) must
2734    /// NOT be mistaken for the KEY constraint shape. We disambig
2735    /// by requiring the keyword to be followed by either `(` or
2736    /// `<ident> (`.
2737    fn peek_mysql_inline_key_start(&self) -> bool {
2738        let cur = self.peek();
2739        // Shapes:
2740        //   KEY (cols)
2741        //   KEY name (cols)
2742        //   INDEX (cols)
2743        //   INDEX name (cols)
2744        //   UNIQUE KEY [name] (cols)
2745        //   UNIQUE INDEX [name] (cols)
2746        //   FULLTEXT [KEY|INDEX] [name] (cols)
2747        //   SPATIAL [KEY|INDEX] [name] (cols)
2748        let after_keyword_followed_by_paren_or_ident_paren = |skip: usize| -> bool {
2749            // tokens at skip = the position AFTER the index-form
2750            // keywords (KEY/INDEX) have been consumed.
2751            match self.tokens.get(skip) {
2752                Some(Token::LParen) => true,
2753                Some(Token::Ident(_) | Token::QuotedIdent(_)) => {
2754                    matches!(self.tokens.get(skip + 1), Some(Token::LParen))
2755                }
2756                _ => false,
2757            }
2758        };
2759        match cur {
2760            Token::Ident(s)
2761                if s.eq_ignore_ascii_case("key") || s.eq_ignore_ascii_case("index") =>
2762            {
2763                after_keyword_followed_by_paren_or_ident_paren(self.pos + 1)
2764            }
2765            Token::Ident(s)
2766                if s.eq_ignore_ascii_case("fulltext") || s.eq_ignore_ascii_case("spatial") =>
2767            {
2768                let nxt = self.tokens.get(self.pos + 1);
2769                let after_after = if matches!(
2770                    nxt,
2771                    Some(Token::Ident(t))
2772                        if t.eq_ignore_ascii_case("key") || t.eq_ignore_ascii_case("index")
2773                ) {
2774                    self.pos + 2
2775                } else {
2776                    self.pos + 1
2777                };
2778                after_keyword_followed_by_paren_or_ident_paren(after_after)
2779            }
2780            Token::Ident(s) if s.eq_ignore_ascii_case("unique") => {
2781                let nxt = self.tokens.get(self.pos + 1);
2782                if !matches!(
2783                    nxt,
2784                    Some(Token::Ident(t))
2785                        if t.eq_ignore_ascii_case("key") || t.eq_ignore_ascii_case("index")
2786                ) {
2787                    return false;
2788                }
2789                after_keyword_followed_by_paren_or_ident_paren(self.pos + 2)
2790            }
2791            _ => false,
2792        }
2793    }
2794
2795    /// v7.14.0 — parse the MySQL inline KEY/INDEX form. Returns
2796    /// Some(TableConstraint::Unique) for UNIQUE KEY (so SPG
2797    /// enforces uniqueness on INSERT); returns None for plain
2798    /// KEY/INDEX/FULLTEXT/SPATIAL (accepted but doesn't create
2799    /// an index in v7.14 — only the surface is unblocked).
2800    fn parse_mysql_inline_key(
2801        &mut self,
2802    ) -> Result<Option<crate::ast::TableConstraint>, ParseError> {
2803        // Detect UNIQUE prefix.
2804        let is_unique = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("unique"))
2805        {
2806            self.advance();
2807            true
2808        } else {
2809            false
2810        };
2811        // Consume FULLTEXT / SPATIAL prefix (silent).
2812        if matches!(
2813            self.peek(),
2814            Token::Ident(s) if s.eq_ignore_ascii_case("fulltext") || s.eq_ignore_ascii_case("spatial")
2815        ) {
2816            self.advance();
2817        }
2818        // KEY / INDEX keyword.
2819        match self.peek() {
2820            Token::Ident(s) if s.eq_ignore_ascii_case("key") || s.eq_ignore_ascii_case("index") => {
2821                self.advance();
2822            }
2823            other => {
2824                return Err(self.err(alloc::format!(
2825                    "expected KEY/INDEX in inline index declaration, got {other:?}"
2826                )));
2827            }
2828        }
2829        // Optional index name (an ident before the `(`).
2830        if matches!(self.peek(), Token::Ident(_) | Token::QuotedIdent(_))
2831            && !matches!(self.tokens.get(self.pos + 1), Some(Token::LParen) | None)
2832        {
2833            // Identifier IS the name when followed by `(`; otherwise
2834            // it's part of a USING clause we don't model.
2835        }
2836        if matches!(self.peek(), Token::Ident(_) | Token::QuotedIdent(_))
2837            && matches!(
2838                self.tokens.get(self.pos + 1),
2839                Some(Token::LParen)
2840                    | Some(Token::Ident(_))
2841                    | Some(Token::QuotedIdent(_))
2842            )
2843        {
2844            // Skip name if it precedes the `(`.
2845            if matches!(self.tokens.get(self.pos + 1), Some(Token::LParen)) {
2846                self.advance();
2847            }
2848        }
2849        // Optional `USING BTREE` / `USING HASH` (MySQL).
2850        if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("using")) {
2851            self.advance();
2852            if matches!(self.peek(), Token::Ident(_) | Token::QuotedIdent(_)) {
2853                self.advance();
2854            }
2855        }
2856        // Required column list `(col [, col]*)`.
2857        if !matches!(self.peek(), Token::LParen) {
2858            return Err(self.err(alloc::format!(
2859                "expected '(' in inline KEY/INDEX, got {:?}",
2860                self.peek()
2861            )));
2862        }
2863        self.advance();
2864        let mut cols: Vec<String> = Vec::new();
2865        loop {
2866            match self.peek().clone() {
2867                Token::Ident(s) | Token::QuotedIdent(s) => {
2868                    self.advance();
2869                    cols.push(s);
2870                }
2871                _ => break,
2872            }
2873            // Skip optional `(length)` per-column prefix.
2874            if matches!(self.peek(), Token::LParen) {
2875                let mut depth = 1usize;
2876                self.advance();
2877                while depth > 0 {
2878                    match self.peek() {
2879                        Token::LParen => depth += 1,
2880                        Token::RParen => depth -= 1,
2881                        Token::Eof => break,
2882                        _ => {}
2883                    }
2884                    self.advance();
2885                }
2886            }
2887            // Skip optional ASC / DESC.
2888            if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("asc") || s.eq_ignore_ascii_case("desc"))
2889                || matches!(self.peek(), Token::Asc | Token::Desc)
2890            {
2891                self.advance();
2892            }
2893            if matches!(self.peek(), Token::Comma) {
2894                self.advance();
2895                continue;
2896            }
2897            break;
2898        }
2899        if matches!(self.peek(), Token::RParen) {
2900            self.advance();
2901        }
2902        // Trailing options on the inline index — comment / etc.
2903        // Skip until comma or `)`.
2904        while !matches!(self.peek(), Token::Comma | Token::RParen | Token::Eof) {
2905            self.advance();
2906        }
2907        if is_unique && !cols.is_empty() {
2908            Ok(Some(crate::ast::TableConstraint::Unique {
2909                name: None,
2910                columns: cols,
2911                nulls_not_distinct: false,
2912            }))
2913        } else {
2914            Ok(None)
2915        }
2916    }
2917
2918    /// v7.14.0 — consume MySQL/MariaDB table-options tail after
2919    /// the closing `)`: ENGINE=..., DEFAULT CHARSET=...,
2920    /// COLLATE=..., AUTO_INCREMENT=N, ROW_FORMAT=..., COMMENT='...'
2921    /// (in any order, separated by whitespace).
2922    fn consume_mysql_table_options(&mut self) {
2923        loop {
2924            // Heuristic: a table option is an ident (or `DEFAULT`
2925            // reserved keyword) followed by `=` and an
2926            // ident / string / integer.
2927            let name_lc = match self.peek().clone() {
2928                Token::Ident(s) | Token::QuotedIdent(s) => s.to_ascii_lowercase(),
2929                Token::Default => alloc::string::String::from("default"),
2930                _ => break,
2931            };
2932            let known = matches!(
2933                name_lc.as_str(),
2934                "engine"
2935                    | "default"
2936                    | "charset"
2937                    | "collate"
2938                    | "auto_increment"
2939                    | "row_format"
2940                    | "comment"
2941                    | "pack_keys"
2942                    | "stats_persistent"
2943                    | "stats_auto_recalc"
2944                    | "stats_sample_pages"
2945                    | "key_block_size"
2946                    | "tablespace"
2947                    | "min_rows"
2948                    | "max_rows"
2949                    | "checksum"
2950                    | "delay_key_write"
2951                    | "insert_method"
2952                    | "data"
2953                    | "index"
2954                    | "encryption"
2955                    | "compression"
2956            );
2957            if !known {
2958                break;
2959            }
2960            self.advance(); // option name
2961            // `DEFAULT` optional prefix is followed by `CHARSET` /
2962            // `COLLATE`; consume the next ident too.
2963            if name_lc == "default" {
2964                if matches!(self.peek(), Token::Ident(_) | Token::QuotedIdent(_)) {
2965                    self.advance();
2966                }
2967            }
2968            if matches!(self.peek(), Token::Eq) {
2969                self.advance();
2970            }
2971            match self.peek() {
2972                Token::Ident(_) | Token::QuotedIdent(_) | Token::String(_) | Token::Integer(_) => {
2973                    self.advance();
2974                }
2975                _ => {}
2976            }
2977        }
2978    }
2979
2980    /// v7.9.18 — true when the next tokens are `PRIMARY KEY (…)`.
2981    /// PRIMARY and KEY are bare idents; we look-ahead 2 to be
2982    /// sure (otherwise a column literally named `primary` would
2983    /// be mistaken).
2984    fn peek_table_level_pk_start(&self) -> bool {
2985        let cur = self.peek();
2986        let nxt = self.tokens.get(self.pos + 1);
2987        let nxt2 = self.tokens.get(self.pos + 2);
2988        let is_primary = matches!(cur, Token::Ident(s) if s.eq_ignore_ascii_case("primary"));
2989        let is_key = matches!(nxt, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("key"));
2990        let is_lparen = matches!(nxt2, Some(Token::LParen));
2991        is_primary && is_key && is_lparen
2992    }
2993
2994    /// v7.9.18 — true when the next tokens are `UNIQUE (…)`.
2995    /// v7.13.0 — also matches `UNIQUE NULLS [NOT] DISTINCT (…)`
2996    /// (mailrs round-5 G10).
2997    fn peek_table_level_unique_start(&self) -> bool {
2998        let cur = self.peek();
2999        let is_unique = matches!(cur, Token::Ident(s) if s.eq_ignore_ascii_case("unique"));
3000        if !is_unique {
3001            return false;
3002        }
3003        let n1 = self.tokens.get(self.pos + 1);
3004        // Plain `UNIQUE (…)`.
3005        if matches!(n1, Some(Token::LParen)) {
3006            return true;
3007        }
3008        // `UNIQUE NULLS [NOT] DISTINCT (…)`.
3009        let is_nulls = matches!(n1, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("nulls"));
3010        if !is_nulls {
3011            return false;
3012        }
3013        let n2 = self.tokens.get(self.pos + 2);
3014        let n3 = self.tokens.get(self.pos + 3);
3015        let n4 = self.tokens.get(self.pos + 4);
3016        // `UNIQUE NULLS DISTINCT (…)` — 4 tokens before `(`.
3017        if matches!(n2, Some(Token::Distinct)) && matches!(n3, Some(Token::LParen)) {
3018            return true;
3019        }
3020        // `UNIQUE NULLS NOT DISTINCT (…)` — 5 tokens before `(`.
3021        if matches!(n2, Some(Token::Not))
3022            && matches!(n3, Some(Token::Distinct))
3023            && matches!(n4, Some(Token::LParen))
3024        {
3025            return true;
3026        }
3027        false
3028    }
3029
3030    fn parse_table_level_primary_key(&mut self) -> Result<crate::ast::TableConstraint, ParseError> {
3031        self.advance(); // PRIMARY
3032        self.advance(); // KEY
3033        let columns = self.parse_paren_ident_list("PRIMARY KEY")?;
3034        Ok(crate::ast::TableConstraint::PrimaryKey {
3035            name: None,
3036            columns,
3037        })
3038    }
3039
3040    fn parse_table_level_unique(&mut self) -> Result<crate::ast::TableConstraint, ParseError> {
3041        self.advance(); // UNIQUE
3042        // v7.13.0 — optional `NULLS NOT DISTINCT` modifier
3043        // (mailrs round-5 G10, PG 15+ surface). Default behaviour
3044        // is `NULLS DISTINCT` per the SQL standard.
3045        let mut nulls_not_distinct = false;
3046        if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("nulls")) {
3047            let n1 = self.tokens.get(self.pos + 1);
3048            let n2 = self.tokens.get(self.pos + 2);
3049            let is_not = matches!(n1, Some(Token::Not));
3050            let is_distinct = matches!(n2, Some(Token::Distinct));
3051            if is_not && is_distinct {
3052                self.advance(); // NULLS
3053                self.advance(); // NOT
3054                self.advance(); // DISTINCT
3055                nulls_not_distinct = true;
3056            } else if matches!(n1, Some(Token::Distinct)) {
3057                self.advance(); // NULLS
3058                self.advance(); // DISTINCT
3059            }
3060        }
3061        let columns = self.parse_paren_ident_list("UNIQUE")?;
3062        Ok(crate::ast::TableConstraint::Unique {
3063            name: None,
3064            columns,
3065            nulls_not_distinct,
3066        })
3067    }
3068
3069    /// v7.13.0 — table-level `CHECK (<expr>)` constraint
3070    /// (mailrs round-5 G3). Consumes `CHECK` then a parenthesised
3071    /// expression.
3072    fn parse_table_level_check(&mut self) -> Result<crate::ast::TableConstraint, ParseError> {
3073        self.advance(); // CHECK
3074        if !matches!(self.peek(), Token::LParen) {
3075            return Err(self.err(alloc::format!(
3076                "expected '(' after CHECK, got {:?}",
3077                self.peek()
3078            )));
3079        }
3080        self.advance();
3081        let expr = self.parse_expr(0)?;
3082        if !matches!(self.peek(), Token::RParen) {
3083            return Err(self.err(alloc::format!(
3084                "expected ')' to close CHECK predicate, got {:?}",
3085                self.peek()
3086            )));
3087        }
3088        self.advance();
3089        Ok(crate::ast::TableConstraint::Check { name: None, expr })
3090    }
3091
3092    /// v7.13.0 — `true` when the next token is `CHECK` (a bare ident).
3093    fn peek_table_level_check_start(&self) -> bool {
3094        matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("check"))
3095    }
3096
3097    fn parse_paren_ident_list(&mut self, ctx: &str) -> Result<Vec<String>, ParseError> {
3098        if !matches!(self.peek(), Token::LParen) {
3099            return Err(self.err(alloc::format!(
3100                "expected '(' after {ctx}, got {:?}",
3101                self.peek()
3102            )));
3103        }
3104        self.advance();
3105        let mut out = Vec::new();
3106        loop {
3107            out.push(self.expect_ident_like()?);
3108            match self.peek() {
3109                Token::Comma => {
3110                    self.advance();
3111                }
3112                Token::RParen => {
3113                    self.advance();
3114                    break;
3115                }
3116                other => {
3117                    return Err(self.err(alloc::format!(
3118                        "expected ',' or ')' in {ctx} list, got {other:?}"
3119                    )));
3120                }
3121            }
3122        }
3123        if out.is_empty() {
3124            return Err(self.err(alloc::format!("{ctx} requires at least one column")));
3125        }
3126        Ok(out)
3127    }
3128
3129    /// v7.6.0 — true when the next tokens are `CONSTRAINT <name>
3130    /// FOREIGN KEY` or bare `FOREIGN KEY`. Both introduce a
3131    /// table-level FK; a column def never starts with either keyword
3132    /// (column names are not in this reserved set).
3133    fn peek_constraint_or_fk_start(&self) -> bool {
3134        let is_constraint_kw = matches!(
3135            self.peek(),
3136            Token::Ident(s) if s.eq_ignore_ascii_case("constraint")
3137        );
3138        let is_foreign_kw = matches!(
3139            self.peek(),
3140            Token::Ident(s) if s.eq_ignore_ascii_case("foreign")
3141        );
3142        is_constraint_kw || is_foreign_kw
3143    }
3144
3145    /// v7.6.0 — parse a table-level FK clause:
3146    /// `[CONSTRAINT <name>] FOREIGN KEY (<col>[,<col>]*) REFERENCES
3147    /// <tbl> [(<pcol>[,<pcol>]*)] [ON DELETE <action>] [ON UPDATE <action>]`.
3148    fn parse_table_level_fk(&mut self) -> Result<ForeignKeyConstraint, ParseError> {
3149        let mut name: Option<String> = None;
3150        if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("constraint")) {
3151            self.advance();
3152            name = Some(self.expect_ident_like()?);
3153        }
3154        // `FOREIGN`
3155        match self.advance() {
3156            Token::Ident(s) if s.eq_ignore_ascii_case("foreign") => {}
3157            other => return Err(self.err(format!("expected FOREIGN, got {other:?}"))),
3158        }
3159        // `KEY`
3160        match self.advance() {
3161            Token::Ident(s) if s.eq_ignore_ascii_case("key") => {}
3162            other => return Err(self.err(format!("expected KEY after FOREIGN, got {other:?}"))),
3163        }
3164        // `(col, col, ...)`
3165        if !matches!(self.peek(), Token::LParen) {
3166            return Err(self.err(format!(
3167                "expected '(' after FOREIGN KEY, got {:?}",
3168                self.peek()
3169            )));
3170        }
3171        self.advance();
3172        let mut columns = Vec::new();
3173        loop {
3174            columns.push(self.expect_ident_like()?);
3175            match self.peek() {
3176                Token::Comma => {
3177                    self.advance();
3178                }
3179                Token::RParen => {
3180                    self.advance();
3181                    break;
3182                }
3183                other => {
3184                    return Err(self.err(format!(
3185                        "expected ',' or ')' in FK column list, got {other:?}"
3186                    )));
3187                }
3188            }
3189        }
3190        if columns.is_empty() {
3191            return Err(self.err("FOREIGN KEY requires at least one column".into()));
3192        }
3193        let (parent_table, parent_columns, on_delete, on_update) =
3194            self.parse_references_tail(columns.len())?;
3195        Ok(ForeignKeyConstraint {
3196            name,
3197            columns,
3198            parent_table,
3199            parent_columns,
3200            on_delete,
3201            on_update,
3202        })
3203    }
3204
3205    /// v7.6.0 — parse the tail `REFERENCES <tbl> [(<pcol>...)] [ON
3206    /// DELETE <action>] [ON UPDATE <action>]`. `expected_arity` is
3207    /// the local column count, used to default the parent column
3208    /// list when omitted (SQL spec: parent's PK is implied).
3209    fn parse_references_tail(
3210        &mut self,
3211        expected_arity: usize,
3212    ) -> Result<(String, Vec<String>, FkAction, FkAction), ParseError> {
3213        match self.advance() {
3214            Token::Ident(s) if s.eq_ignore_ascii_case("references") => {}
3215            other => return Err(self.err(format!("expected REFERENCES, got {other:?}"))),
3216        }
3217        let parent_table = self.expect_ident_like()?;
3218        let mut parent_columns: Vec<String> = Vec::new();
3219        if matches!(self.peek(), Token::LParen) {
3220            self.advance();
3221            loop {
3222                parent_columns.push(self.expect_ident_like()?);
3223                match self.peek() {
3224                    Token::Comma => {
3225                        self.advance();
3226                    }
3227                    Token::RParen => {
3228                        self.advance();
3229                        break;
3230                    }
3231                    other => {
3232                        return Err(self.err(format!(
3233                            "expected ',' or ')' in REFERENCES column list, got {other:?}"
3234                        )));
3235                    }
3236                }
3237            }
3238        }
3239        if !parent_columns.is_empty() && parent_columns.len() != expected_arity {
3240            return Err(self.err(format!(
3241                "FK arity mismatch: {} local column(s) vs {} parent column(s)",
3242                expected_arity,
3243                parent_columns.len()
3244            )));
3245        }
3246        // v7.6.7 — accept and reject `[NOT] DEFERRABLE [INITIALLY
3247        // {DEFERRED | IMMEDIATE}]` so existing PG dumps don't fail
3248        // at parse time. SPG's single-writer model has no deferred
3249        // constraint window, so we surface this as a clean
3250        // unsupported-feature error rather than a syntax error.
3251        loop {
3252            if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("deferrable")) {
3253                return Err(self.err(
3254                    "DEFERRABLE constraints are not supported (SPG is single-writer; \
3255                     constraints are always evaluated immediately at commit)"
3256                        .into(),
3257                ));
3258            }
3259            if matches!(self.peek(), Token::Not) {
3260                let look = self.tokens.get(self.pos + 1);
3261                if matches!(look, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("deferrable")) {
3262                    // NOT DEFERRABLE — accept as the SPG default
3263                    // and consume both tokens silently.
3264                    self.advance();
3265                    self.advance();
3266                    // Optional `INITIALLY IMMEDIATE` clause.
3267                    if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("initially"))
3268                    {
3269                        self.advance();
3270                        match self.advance() {
3271                            Token::Ident(s) if s.eq_ignore_ascii_case("immediate") => {}
3272                            other => {
3273                                return Err(self.err(format!(
3274                                    "expected IMMEDIATE after INITIALLY for NOT DEFERRABLE, \
3275                                     got {other:?}"
3276                                )));
3277                            }
3278                        }
3279                    }
3280                    continue;
3281                }
3282                break;
3283            }
3284            break;
3285        }
3286        // Optional `ON DELETE <action>` and `ON UPDATE <action>` in
3287        // either order, each at most once.
3288        let mut on_delete = FkAction::Restrict;
3289        let mut on_update = FkAction::Restrict;
3290        let mut seen_on_delete = false;
3291        let mut seen_on_update = false;
3292        loop {
3293            if !matches!(self.peek(), Token::On) {
3294                break;
3295            }
3296            self.advance();
3297            let which = self.advance();
3298            let action = self.parse_fk_action()?;
3299            match which {
3300                Token::Ident(ref s) if s.eq_ignore_ascii_case("delete") => {
3301                    if seen_on_delete {
3302                        return Err(self.err("ON DELETE specified twice".into()));
3303                    }
3304                    seen_on_delete = true;
3305                    on_delete = action;
3306                }
3307                Token::Ident(ref s) if s.eq_ignore_ascii_case("update") => {
3308                    if seen_on_update {
3309                        return Err(self.err("ON UPDATE specified twice".into()));
3310                    }
3311                    seen_on_update = true;
3312                    on_update = action;
3313                }
3314                other => {
3315                    return Err(
3316                        self.err(format!("expected DELETE or UPDATE after ON, got {other:?}"))
3317                    );
3318                }
3319            }
3320        }
3321        Ok((parent_table, parent_columns, on_delete, on_update))
3322    }
3323
3324    /// v7.6.0 — parse `CASCADE | RESTRICT | SET NULL | SET DEFAULT |
3325    /// NO ACTION`.
3326    fn parse_fk_action(&mut self) -> Result<FkAction, ParseError> {
3327        match self.advance() {
3328            Token::Ident(s) if s.eq_ignore_ascii_case("cascade") => Ok(FkAction::Cascade),
3329            Token::Ident(s) if s.eq_ignore_ascii_case("restrict") => Ok(FkAction::Restrict),
3330            Token::Ident(s) if s.eq_ignore_ascii_case("set") => match self.advance() {
3331                Token::Null => Ok(FkAction::SetNull),
3332                Token::Default => Ok(FkAction::SetDefault),
3333                other => Err(self.err(format!(
3334                    "expected NULL or DEFAULT after SET in FK action, got {other:?}"
3335                ))),
3336            },
3337            Token::Ident(s) if s.eq_ignore_ascii_case("no") => match self.advance() {
3338                Token::Ident(s) if s.eq_ignore_ascii_case("action") => Ok(FkAction::NoAction),
3339                other => Err(self.err(format!(
3340                    "expected ACTION after NO in FK action, got {other:?}"
3341                ))),
3342            },
3343            other => Err(self.err(format!(
3344                "expected CASCADE | RESTRICT | SET NULL | SET DEFAULT | NO ACTION, got {other:?}"
3345            ))),
3346        }
3347    }
3348
3349    /// Recognise the optional `IF NOT EXISTS` prefix shared by `CREATE
3350    /// TABLE` and `CREATE INDEX`. Returns `true` if consumed.
3351    fn consume_if_not_exists(&mut self) -> bool {
3352        // `IF` arrives as a bare Ident (we don't reserve it because it
3353        // also appears mid-expression in PG, though we don't support
3354        // those forms yet).
3355        let looks_like_if = matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("if"));
3356        if !looks_like_if {
3357            return false;
3358        }
3359        // Peek one ahead before committing: only consume IF when it's
3360        // actually `IF NOT EXISTS`.
3361        if !matches!(self.tokens.get(self.pos + 1), Some(Token::Not)) {
3362            return false;
3363        }
3364        if !matches!(
3365            self.tokens.get(self.pos + 2),
3366            Some(Token::Ident(s)) if s.eq_ignore_ascii_case("exists")
3367        ) {
3368            return false;
3369        }
3370        self.advance(); // IF
3371        self.advance(); // NOT
3372        self.advance(); // EXISTS
3373        true
3374    }
3375
3376    /// v7.12.4 — `IF EXISTS` modifier for DROP statements.
3377    /// Consumes IF EXISTS as a pair; returns false otherwise
3378    /// without consuming any tokens.
3379    fn consume_if_exists(&mut self) -> bool {
3380        let looks_like_if = matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("if"));
3381        if !looks_like_if {
3382            return false;
3383        }
3384        if !matches!(
3385            self.tokens.get(self.pos + 1),
3386            Some(Token::Ident(s)) if s.eq_ignore_ascii_case("exists")
3387        ) {
3388            return false;
3389        }
3390        self.advance(); // IF
3391        self.advance(); // EXISTS
3392        true
3393    }
3394
3395    /// v7.9.14 — consume `ASC | DESC | NULLS FIRST | NULLS LAST`
3396    /// qualifiers after an index column ref. ASC / DESC are
3397    /// reserved tokens; NULLS / FIRST / LAST are bare idents.
3398    /// We accept and discard them since single-column BTree
3399    /// stores rows in natural key order today.
3400    fn consume_optional_index_column_qualifiers(&mut self) {
3401        loop {
3402            match self.peek() {
3403                Token::Asc | Token::Desc => {
3404                    self.advance();
3405                }
3406                Token::Ident(s) if s.eq_ignore_ascii_case("nulls") => {
3407                    let look = self.tokens.get(self.pos + 1);
3408                    if matches!(
3409                        look,
3410                        Some(Token::Ident(k)) if k.eq_ignore_ascii_case("first")
3411                            || k.eq_ignore_ascii_case("last")
3412                    ) {
3413                        self.advance();
3414                        self.advance();
3415                    } else {
3416                        break;
3417                    }
3418                }
3419                _ => break,
3420            }
3421        }
3422    }
3423
3424    fn parse_create_index_stmt_after_create(
3425        &mut self,
3426        is_unique: bool,
3427    ) -> Result<Statement, ParseError> {
3428        // Caller consumed CREATE (and the optional UNIQUE); we're on INDEX.
3429        debug_assert!(matches!(self.peek(), Token::Index));
3430        self.advance();
3431        let if_not_exists = self.consume_if_not_exists();
3432        let name = self.expect_ident_like()?;
3433        if !matches!(self.peek(), Token::On) {
3434            return Err(self.err(format!(
3435                "expected ON after CREATE INDEX <name>, got {:?}",
3436                self.peek()
3437            )));
3438        }
3439        self.advance();
3440        let table = self.expect_ident_like()?;
3441        // Optional `USING <method>` — only recognised method in v2.0 is
3442        // `hnsw` (a single-layer NSW graph for kNN). `USING` is the bare
3443        // ident `using` (we don't promote it to a reserved keyword
3444        // because it isn't reserved anywhere else in our SQL surface).
3445        let method = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("using")) {
3446            self.advance();
3447            let m = self.expect_ident_like()?;
3448            match m.to_ascii_lowercase().as_str() {
3449                "hnsw" => IndexMethod::Hnsw,
3450                "btree" => IndexMethod::BTree,
3451                "brin" => IndexMethod::Brin,
3452                // v7.12.3 — real GIN inverted index over `tsvector`.
3453                // v7.9.26b's `USING gin` → BTree silent fallback is
3454                // gone; the engine validates that the indexed column
3455                // is `tsvector` at CREATE INDEX time.
3456                "gin" => IndexMethod::Gin,
3457                // v7.9.26b — PG `pg_dump` emits `USING gist` /
3458                // `USING spgist` / `USING hash` for their built-in
3459                // AMs that SPG doesn't have a matching
3460                // implementation for; degrade to BTree on the
3461                // leading column so the schema loads + the index
3462                // catalogue stays consistent. Operator pays the
3463                // planner cost only for the queries that would have
3464                // used the specialised AM.
3465                "gist" | "spgist" | "hash" => IndexMethod::BTree,
3466                // v7.11.3 — pgvector ships both `ivfflat` and
3467                // `hnsw`. Customers shouldn't have to choose
3468                // their on-disk index method based on what SPG
3469                // implements; accept `ivfflat` as a synonym for
3470                // `hnsw` so PG schemas using either method drop
3471                // in. The vector distance op (`<->` / `<#>` /
3472                // `<=>`) at query time still picks the metric.
3473                "ivfflat" => IndexMethod::Hnsw,
3474                other => {
3475                    return Err(self.err(alloc::format!(
3476                        "unknown index method {other:?}; supported: hnsw, btree, brin, gin (gist/spgist/hash accepted as BTree fallback)"
3477                    )));
3478                }
3479            }
3480        } else {
3481            IndexMethod::BTree
3482        };
3483        if !matches!(self.peek(), Token::LParen) {
3484            return Err(self.err(format!(
3485                "expected '(' before indexed column, got {:?}",
3486                self.peek()
3487            )));
3488        }
3489        self.advance();
3490        // v6.8.2 — accept either a bare column ident (legacy) or
3491        // an expression `fn(col, …)` for expression indexes.
3492        // Distinguish by peeking the token *after* the current
3493        // ident: `ident )` is the legacy column-only path;
3494        // anything else triggers the Pratt expression parser.
3495        // (`advance()` uses `mem::replace` to nil out the current
3496        // slot, so we can't save+rewind cleanly — peek-ahead via
3497        // direct index avoids the mutation.)
3498        let (column, expression): (String, Option<Expr>) = match self.peek().clone() {
3499            // Single column with `)` immediately after — fast path.
3500            // v7.9.29 — also: bare column followed by `,` (the
3501            // multi-column form `(a, b, c)`). Without this branch
3502            // the leading ident gets pulled into `parse_expr`
3503            // which then sets `expression = Some(Column(a))` and
3504            // breaks Display round-trip on the multi-column shape.
3505            Token::Ident(s) | Token::QuotedIdent(s)
3506                if matches!(
3507                    self.tokens.get(self.pos + 1),
3508                    Some(Token::RParen | Token::Comma)
3509                ) =>
3510            {
3511                self.advance();
3512                (s, None)
3513            }
3514            // v7.9.22 — single column followed by a pgvector
3515            // opclass ident: `(col vector_cosine_ops)`. mailrs G5.
3516            // SPG's HNSW currently picks its distance metric from
3517            // the query's operator (`<->` / `<#>` / `<=>`), so the
3518            // opclass is informational — accepted and discarded.
3519            // Recognised opclasses: vector_cosine_ops, vector_l2_ops,
3520            // vector_ip_ops, halfvec_*_ops, sq8_*_ops.
3521            Token::Ident(s) | Token::QuotedIdent(s)
3522                if matches!(
3523                    self.tokens.get(self.pos + 1),
3524                    Some(Token::Ident(op) | Token::QuotedIdent(op))
3525                        if is_vector_opclass_name(op)
3526                ) =>
3527            {
3528                self.advance(); // column name
3529                self.advance(); // opclass ident — drop
3530                (s, None)
3531            }
3532            Token::Ident(_) | Token::QuotedIdent(_) => {
3533                let key_expr = self.parse_expr(0)?;
3534                let primary = extract_first_column(&key_expr).ok_or_else(|| {
3535                    self.err("expression index key must reference at least one column".into())
3536                })?;
3537                (primary, Some(key_expr))
3538            }
3539            other => {
3540                return Err(self.err(format!(
3541                    "expected column ident or expression, got {other:?}"
3542                )));
3543            }
3544        };
3545        // v7.9.14 — accept extra comma-separated columns inside
3546        // the index key parens (`CREATE INDEX … (a, b, c)`).
3547        // mailrs F2. Each extra column may carry an optional
3548        // `ASC` / `DESC` / `NULLS FIRST` / `NULLS LAST` clause
3549        // — parsed and discarded; SPG doesn't honour direction
3550        // on a BTree index today (column ordering is intrinsic
3551        // to the storage). v7.10 will widen to genuine composite
3552        // index keys.
3553        let mut extra_columns: Vec<String> = Vec::new();
3554        // The leading column may also have ASC/DESC after it.
3555        self.consume_optional_index_column_qualifiers();
3556        while matches!(self.peek(), Token::Comma) {
3557            self.advance();
3558            let extra = self.expect_ident_like()?;
3559            self.consume_optional_index_column_qualifiers();
3560            extra_columns.push(extra);
3561        }
3562        if !matches!(self.peek(), Token::RParen) {
3563            return Err(self.err(format!(
3564                "expected ')' after indexed column / expression, got {:?}",
3565                self.peek()
3566            )));
3567        }
3568        self.advance();
3569        // v6.8.0 — optional `INCLUDE (col1, col2, …)` clause for
3570        // index-only-scan annotation. Bare ident (not a reserved
3571        // keyword) so we test by case-insensitive string match.
3572        let included_columns = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("include"))
3573        {
3574            self.advance();
3575            if !matches!(self.peek(), Token::LParen) {
3576                return Err(self.err(format!("expected '(' after INCLUDE, got {:?}", self.peek())));
3577            }
3578            self.advance();
3579            let mut cols = Vec::new();
3580            loop {
3581                cols.push(self.expect_ident_like()?);
3582                match self.peek() {
3583                    Token::Comma => {
3584                        self.advance();
3585                    }
3586                    Token::RParen => {
3587                        self.advance();
3588                        break;
3589                    }
3590                    other => {
3591                        return Err(self.err(format!(
3592                            "expected ',' or ')' in INCLUDE list, got {other:?}"
3593                        )));
3594                    }
3595                }
3596            }
3597            cols
3598        } else {
3599            Vec::new()
3600        };
3601        // v7.11.3 — accept and discard PG `WITH (k = v, ...)` index
3602        // storage parameters. pgvector emits `WITH (lists = N)` for
3603        // ivfflat and `WITH (m = N, ef_construction = M)` for hnsw;
3604        // SPG's HNSW picks its own parameters today (tunable via
3605        // env vars), so the WITH clause is informational and dropped.
3606        if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("with")) {
3607            self.advance();
3608            if !matches!(self.peek(), Token::LParen) {
3609                return Err(self.err(format!(
3610                    "expected '(' after WITH in CREATE INDEX, got {:?}",
3611                    self.peek()
3612                )));
3613            }
3614            self.advance();
3615            loop {
3616                if matches!(self.peek(), Token::RParen) {
3617                    self.advance();
3618                    break;
3619                }
3620                // Drain `key = value` or bare `key` tokens.
3621                let _ = self.advance(); // key
3622                if matches!(self.peek(), Token::Eq) {
3623                    self.advance();
3624                    let _ = self.advance(); // value (int / string / ident)
3625                }
3626                match self.peek() {
3627                    Token::Comma => {
3628                        self.advance();
3629                    }
3630                    Token::RParen => {
3631                        self.advance();
3632                        break;
3633                    }
3634                    other => {
3635                        return Err(self.err(format!(
3636                            "expected ',' or ')' in WITH (…) clause, got {other:?}"
3637                        )));
3638                    }
3639                }
3640            }
3641        }
3642        // v6.8.1 — optional `WHERE <expr>` partial-index predicate.
3643        let partial_predicate = if matches!(self.peek(), Token::Where) {
3644            self.advance();
3645            Some(self.parse_expr(0)?)
3646        } else {
3647            None
3648        };
3649        // v7.9.29 — UNIQUE on a vector index (HNSW) makes no
3650        // sense: uniqueness over an ANN structure has no clean
3651        // semantics. Reject early. (BRIN UNIQUE is similarly
3652        // meaningless — block both.)
3653        if is_unique && !matches!(method, IndexMethod::BTree) {
3654            return Err(self.err(alloc::format!(
3655                "UNIQUE is only supported on BTree indexes, got USING {:?}",
3656                method
3657            )));
3658        }
3659        Ok(Statement::CreateIndex(CreateIndexStatement {
3660            name,
3661            table,
3662            column,
3663            method,
3664            if_not_exists,
3665            included_columns,
3666            partial_predicate,
3667            extra_columns: extra_columns.clone(),
3668            expression,
3669            is_unique,
3670        }))
3671    }
3672
3673    /// v7.6.0 — wraps `parse_column_def` and consumes an optional
3674    /// column-level `REFERENCES ...` clause. The trailing FK is
3675    /// normalised into table-level shape (single-element columns +
3676    /// parent_columns) so the engine sees one uniform constraint list.
3677    fn parse_column_def_with_fk(
3678        &mut self,
3679    ) -> Result<(ColumnDef, Option<ForeignKeyConstraint>), ParseError> {
3680        let col = self.parse_column_def()?;
3681        // Inline form: `col INT REFERENCES tbl(pcol) [ON DELETE ...] [ON UPDATE ...]`.
3682        let inline_references = matches!(
3683            self.peek(),
3684            Token::Ident(s) if s.eq_ignore_ascii_case("references")
3685        );
3686        if !inline_references {
3687            return Ok((col, None));
3688        }
3689        let (parent_table, parent_columns, on_delete, on_update) = self.parse_references_tail(1)?;
3690        let fk = ForeignKeyConstraint {
3691            name: None,
3692            columns: vec![col.name.clone()],
3693            parent_table,
3694            parent_columns,
3695            on_delete,
3696            on_update,
3697        };
3698        Ok((col, Some(fk)))
3699    }
3700
3701    /// v7.13.0 — parse a column type (consuming the type ident and
3702    /// any trailing parameters / `[]`), without surrounding column
3703    /// constraints. Used by ALTER COLUMN TYPE (mailrs round-5 G8).
3704    /// Returns the resolved `ColumnTypeName` plus implied
3705    /// `(auto_increment, not_null)` flags from PG SERIAL family
3706    /// shorthands — callers that don't expect those (ALTER COLUMN
3707    /// TYPE) can discard them.
3708    fn parse_column_type_name(&mut self) -> Result<ColumnTypeName, ParseError> {
3709        let (ty, _, _) = self.parse_type_with_implied_flags()?;
3710        Ok(ty)
3711    }
3712
3713    fn parse_type_with_implied_flags(
3714        &mut self,
3715    ) -> Result<(ColumnTypeName, bool, bool), ParseError> {
3716        let ty_ident = match self.advance() {
3717            Token::Ident(s) => s,
3718            other => {
3719                return Err(ParseError {
3720                    message: format!("expected column type, got {other:?}"),
3721                    token_pos: self.pos.saturating_sub(1),
3722                });
3723            }
3724        };
3725        let mut implied_auto_increment = false;
3726        let mut implied_not_null = false;
3727        let mut ty = match ty_ident.as_str() {
3728            // PG SERIAL family. Implies NOT NULL + AUTO_INCREMENT.
3729            "smallserial" | "serial2" => {
3730                implied_auto_increment = true;
3731                implied_not_null = true;
3732                ColumnTypeName::SmallInt
3733            }
3734            "serial" | "serial4" => {
3735                implied_auto_increment = true;
3736                implied_not_null = true;
3737                ColumnTypeName::Int
3738            }
3739            "bigserial" | "serial8" => {
3740                implied_auto_increment = true;
3741                implied_not_null = true;
3742                ColumnTypeName::BigInt
3743            }
3744            // MySQL flavours we accept by aliasing to the closest SPG
3745            // type. TINYINT covers MySQL's i8 — held inside SMALLINT
3746            // since SPG doesn't have a dedicated i8. MEDIUMINT (MySQL
3747            // 24-bit) → INT. UNSIGNED modifiers are consumed below
3748            // without semantic effect.
3749            "smallint" | "tinyint" => {
3750                // v7.14.0 — MySQL display-width on integers
3751                // (`TINYINT(1)`, `INT(11)`, `BIGINT(20)`). The
3752                // parenthesised number is purely cosmetic — it
3753                // doesn't change storage. Accept + discard.
3754                self.consume_optional_paren_size();
3755                ColumnTypeName::SmallInt
3756            }
3757            "int" | "integer" | "mediumint" => {
3758                self.consume_optional_paren_size();
3759                ColumnTypeName::Int
3760            }
3761            "bigint" => {
3762                self.consume_optional_paren_size();
3763                ColumnTypeName::BigInt
3764            }
3765            // DOUBLE / REAL are 64-bit IEEE — same as our FLOAT.
3766            // v7.13.0 — `DOUBLE PRECISION` (PG canonical spelling)
3767            // (mailrs round-5 G6). Consume the optional `PRECISION`
3768            // tail when the type keyword was `double` / `DOUBLE`.
3769            "float" | "double" | "real" => {
3770                if ty_ident.eq_ignore_ascii_case("double")
3771                    && matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("precision"))
3772                {
3773                    self.advance();
3774                }
3775                ColumnTypeName::Float
3776            }
3777            // v7.13.0 — `FLOAT8` (PG short form) maps the same as FLOAT.
3778            "float4" | "float8" => ColumnTypeName::Float,
3779            "text" => ColumnTypeName::Text,
3780            "bool" | "boolean" => ColumnTypeName::Bool,
3781            "varchar" => ColumnTypeName::Varchar(self.parse_paren_size("VARCHAR")?),
3782            "char" => ColumnTypeName::Char(self.parse_paren_size("CHAR")?),
3783            "vector" => {
3784                let dim = self.parse_paren_size("VECTOR")?;
3785                let encoding = self.parse_optional_vector_encoding()?;
3786                ColumnTypeName::Vector { dim, encoding }
3787            }
3788            "numeric" => {
3789                let (precision, scale) = self.parse_optional_numeric_params()?;
3790                ColumnTypeName::Numeric(precision, scale)
3791            }
3792            "date" => ColumnTypeName::Date,
3793            // MySQL's `DATETIME` is the same domain as standard
3794            // `TIMESTAMP` — accept both spellings.
3795            "timestamp" | "datetime" => {
3796                // v7.14.0 — PG canonical `TIMESTAMP WITH TIME ZONE`
3797                // / `TIMESTAMP WITHOUT TIME ZONE`. pg_dump emits
3798                // the full form. SPG canonicalises:
3799                //   - WITH TIME ZONE    → Timestamptz
3800                //   - WITHOUT TIME ZONE → Timestamp
3801                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("with"))
3802                    && matches!(self.tokens.get(self.pos + 1), Some(Token::Ident(s)) if s.eq_ignore_ascii_case("time"))
3803                    && matches!(self.tokens.get(self.pos + 2), Some(Token::Ident(s)) if s.eq_ignore_ascii_case("zone"))
3804                {
3805                    self.advance(); // WITH
3806                    self.advance(); // TIME
3807                    self.advance(); // ZONE
3808                    ColumnTypeName::Timestamptz
3809                } else if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("without"))
3810                    && matches!(self.tokens.get(self.pos + 1), Some(Token::Ident(s)) if s.eq_ignore_ascii_case("time"))
3811                    && matches!(self.tokens.get(self.pos + 2), Some(Token::Ident(s)) if s.eq_ignore_ascii_case("zone"))
3812                {
3813                    self.advance(); // WITHOUT
3814                    self.advance(); // TIME
3815                    self.advance(); // ZONE
3816                    ColumnTypeName::Timestamp
3817                } else {
3818                    // Optional `(precision)` parenthesised modifier
3819                    // (PG fractional seconds precision). SPG stores
3820                    // µs always; accept + discard.
3821                    self.consume_optional_paren_size();
3822                    ColumnTypeName::Timestamp
3823                }
3824            }
3825            // v7.9.2 — `TIMESTAMPTZ` and full PG spelling
3826            // `TIMESTAMP WITH TIME ZONE`. Same storage as TIMESTAMP;
3827            // only PG-wire OID differs.
3828            "timestamptz" => ColumnTypeName::Timestamptz,
3829            // v4.9: JSON / JSONB. Stored as raw text — no parse-time
3830            // validation. We accept the JSONB spelling too because
3831            // most PG clients default to it; SPG doesn't distinguish
3832            // the two (no path-operator perf advantage to model).
3833            "json" => ColumnTypeName::Json,
3834            "jsonb" => ColumnTypeName::Jsonb,
3835            // v7.10.4 — PG `BYTEA` and the SPG `BYTES` alias both
3836            // surface here. Same storage shape; mapping happens at
3837            // the engine side via the ColumnTypeName → DataType
3838            // resolver. Literal forms are handled at coerce_value
3839            // time so the lexer stays untouched.
3840            "bytea" | "bytes" => ColumnTypeName::Bytes,
3841            // v7.12.0 — PG full-text search types. mailrs G-CRIT-3.
3842            // The actual `to_tsvector` / `@@` / `ts_rank` surface
3843            // arrives in v7.12.1+; the type itself loads here so
3844            // mailrs's `scripts/init-schema.sql` runs unmodified.
3845            "tsvector" => ColumnTypeName::TsVector,
3846            "tsquery" => ColumnTypeName::TsQuery,
3847            other => {
3848                return Err(ParseError {
3849                    message: format!("unsupported column type {other:?}"),
3850                    token_pos: self.pos.saturating_sub(1),
3851                });
3852            }
3853        };
3854        // MySQL's `UNSIGNED` modifier sits right after the type
3855        // keyword. SPG doesn't carry a separate unsigned variant —
3856        // accepting the keyword keeps existing schemas compatible
3857        // without changing semantics. Drop it silently.
3858        if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("unsigned")) {
3859            self.advance();
3860        }
3861        // v7.14.0 — mysqldump emits `<type> CHARACTER SET <name>` and
3862        // `<type> COLLATE <name>` post-fixes on text columns. SPG
3863        // stores text as UTF-8 always and orders bytewise; charset /
3864        // collate are accepted as no-ops so PG / MySQL / MariaDB
3865        // dumps load without parser noise.
3866        loop {
3867            if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("character"))
3868                && matches!(self.tokens.get(self.pos + 1), Some(Token::Ident(s)) if s.eq_ignore_ascii_case("set"))
3869            {
3870                self.advance(); // CHARACTER
3871                self.advance(); // SET
3872                if matches!(self.peek(), Token::Ident(_) | Token::QuotedIdent(_) | Token::String(_))
3873                {
3874                    self.advance();
3875                }
3876                continue;
3877            }
3878            if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("collate")) {
3879                self.advance(); // COLLATE
3880                if matches!(self.peek(), Token::Ident(_) | Token::QuotedIdent(_) | Token::String(_))
3881                {
3882                    self.advance();
3883                }
3884                continue;
3885            }
3886            break;
3887        }
3888        // v7.10.10 — postfix `[]` widens TEXT → TEXT[]. PG accepts
3889        // `TYPE[]` after any base type; v7.10 only models TEXT[]
3890        // so we reject other base types here. mailrs uses TEXT[]
3891        // for labels / addresses / message-on-thread.
3892        if matches!(self.peek(), Token::LBracket) {
3893            self.advance();
3894            if !matches!(self.peek(), Token::RBracket) {
3895                return Err(self.err(alloc::format!(
3896                    "TEXT[] takes no dimension; got {:?}",
3897                    self.peek()
3898                )));
3899            }
3900            self.advance();
3901            // v7.11.13 — widened to INT[] and BIGINT[] in addition
3902            // to TEXT[]. Other base types (BOOL[], NUMERIC[], etc.)
3903            // still error here.
3904            ty = match ty {
3905                ColumnTypeName::Text => ColumnTypeName::TextArray,
3906                ColumnTypeName::Int => ColumnTypeName::IntArray,
3907                ColumnTypeName::BigInt => ColumnTypeName::BigIntArray,
3908                other => {
3909                    return Err(self.err(alloc::format!(
3910                        "v7.11 supports TEXT[] / INT[] / BIGINT[] only; got {other:?}[]"
3911                    )));
3912                }
3913            };
3914        }
3915        Ok((ty, implied_auto_increment, implied_not_null))
3916    }
3917
3918    fn parse_column_def(&mut self) -> Result<ColumnDef, ParseError> {
3919        let name = self.expect_ident_like()?;
3920        let (ty, implied_auto_increment, implied_not_null) =
3921            self.parse_type_with_implied_flags()?;
3922        // Column constraints: `DEFAULT <expr>`, `NOT NULL`, and the
3923        // MySQL-flavoured `AUTO_INCREMENT` may appear in any order;
3924        // each at most once.
3925        let mut default: Option<Expr> = None;
3926        let mut nullable = !implied_not_null;
3927        let mut nullability_seen = implied_not_null;
3928        let mut auto_increment = implied_auto_increment;
3929        let mut is_primary_key = false;
3930        let mut is_unique = false;
3931        let mut check: Option<Expr> = None;
3932        loop {
3933            if matches!(self.peek(), Token::Default) {
3934                if default.is_some() {
3935                    return Err(self.err("DEFAULT specified twice".into()));
3936                }
3937                self.advance();
3938                default = Some(self.parse_expr(0)?);
3939                continue;
3940            }
3941            if matches!(self.peek(), Token::Not) {
3942                if nullability_seen {
3943                    return Err(self.err("NOT NULL specified twice".into()));
3944                }
3945                self.advance();
3946                if !matches!(self.peek(), Token::Null) {
3947                    return Err(self.err(format!(
3948                        "expected NULL after NOT in column def, got {:?}",
3949                        self.peek()
3950                    )));
3951                }
3952                self.advance();
3953                nullable = false;
3954                nullability_seen = true;
3955                continue;
3956            }
3957            // v7.14.0 — MySQL accepts a bare `NULL` as an explicit
3958            // "this column is nullable" marker (the default in
3959            // standard SQL anyway). mysqldump emits it routinely
3960            // (`col TYPE NULL DEFAULT NULL` for nullable
3961            // timestamps etc). Accept + no-op.
3962            if matches!(self.peek(), Token::Null) {
3963                if nullability_seen && !nullable {
3964                    return Err(self.err(
3965                        "column declared NOT NULL then NULL — pick one".into(),
3966                    ));
3967                }
3968                self.advance();
3969                nullable = true;
3970                nullability_seen = true;
3971                continue;
3972            }
3973            // `AUTO_INCREMENT` or its abbreviated form `AUTOINCREMENT`
3974            // arrives as a bare Ident. Match either, case-insensitive.
3975            if let Token::Ident(s) = self.peek()
3976                && (s.eq_ignore_ascii_case("auto_increment")
3977                    || s.eq_ignore_ascii_case("autoincrement"))
3978            {
3979                if auto_increment {
3980                    return Err(self.err("AUTO_INCREMENT specified twice".into()));
3981                }
3982                self.advance();
3983                auto_increment = true;
3984                continue;
3985            }
3986            // v7.9.13 — inline `PRIMARY KEY` column constraint
3987            // (mailrs F1). Implies `NOT NULL`. The engine creates
3988            // a BTree index for the PK column at CREATE TABLE time
3989            // so FK parent-side index lookups resolve.
3990            if let Token::Ident(s) = self.peek()
3991                && s.eq_ignore_ascii_case("primary")
3992            {
3993                if is_primary_key {
3994                    return Err(self.err("PRIMARY KEY specified twice".into()));
3995                }
3996                // Peek-ahead for the required `KEY` token.
3997                let next = self.tokens.get(self.pos + 1);
3998                let next_is_key = matches!(
3999                    next,
4000                    Some(Token::Ident(k)) if k.eq_ignore_ascii_case("key")
4001                );
4002                if !next_is_key {
4003                    return Err(self.err(format!(
4004                        "expected KEY after PRIMARY in column def, got {:?}",
4005                        next
4006                    )));
4007                }
4008                self.advance(); // PRIMARY
4009                self.advance(); // KEY
4010                is_primary_key = true;
4011                if nullability_seen && nullable {
4012                    return Err(self.err(
4013                        "column declared NULL but inline PRIMARY KEY implies NOT NULL".into(),
4014                    ));
4015                }
4016                nullable = false;
4017                nullability_seen = true;
4018                continue;
4019            }
4020            // v7.13.0 — inline `UNIQUE` column constraint
4021            // (mailrs round-5 G2). Fold into a single-column
4022            // table-level UNIQUE at CREATE TABLE post-process time.
4023            if let Token::Ident(s) = self.peek()
4024                && s.eq_ignore_ascii_case("unique")
4025            {
4026                if is_unique {
4027                    return Err(self.err("UNIQUE specified twice".into()));
4028                }
4029                self.advance();
4030                is_unique = true;
4031                continue;
4032            }
4033            // v7.13.0 — inline `CHECK (<expr>)` column constraint
4034            // (mailrs round-5 G3). PG semantics: column-level
4035            // CHECK is equivalent to a table-level CHECK. Multiple
4036            // inline CHECKs on the same column AND together.
4037            if let Token::Ident(s) = self.peek()
4038                && s.eq_ignore_ascii_case("check")
4039            {
4040                self.advance();
4041                if !matches!(self.peek(), Token::LParen) {
4042                    return Err(self.err(alloc::format!(
4043                        "expected '(' after CHECK in column def, got {:?}",
4044                        self.peek()
4045                    )));
4046                }
4047                self.advance();
4048                let pred = self.parse_expr(0)?;
4049                if !matches!(self.peek(), Token::RParen) {
4050                    return Err(self.err(alloc::format!(
4051                        "expected ')' to close CHECK predicate, got {:?}",
4052                        self.peek()
4053                    )));
4054                }
4055                self.advance();
4056                check = Some(match check.take() {
4057                    Some(prev) => Expr::Binary {
4058                        op: BinOp::And,
4059                        lhs: Box::new(prev),
4060                        rhs: Box::new(pred),
4061                    },
4062                    None => pred,
4063                });
4064                continue;
4065            }
4066            break;
4067        }
4068        Ok(ColumnDef {
4069            name,
4070            ty,
4071            nullable,
4072            default,
4073            auto_increment,
4074            is_primary_key,
4075            is_unique,
4076            check,
4077        })
4078    }
4079
4080    /// `NUMERIC` may appear without parameters, with one (precision
4081    /// only, scale=0), or with both. Returns `(precision, scale)` with
4082    /// 0 = unspecified for the bare form.
4083    fn parse_optional_numeric_params(&mut self) -> Result<(u8, u8), ParseError> {
4084        if !matches!(self.peek(), Token::LParen) {
4085            // Bare `NUMERIC` — PG treats this as "unlimited precision";
4086            // we surface it as precision=0 to mean "unconstrained" so
4087            // the engine doesn't need a separate variant.
4088            return Ok((0, 0));
4089        }
4090        self.advance();
4091        let precision = match self.advance() {
4092            Token::Integer(n) if (1..=38).contains(&n) => u8::try_from(n).expect("range-checked"),
4093            other => {
4094                return Err(ParseError {
4095                    message: format!(
4096                        "NUMERIC precision must be an integer in 1..=38, got {other:?}"
4097                    ),
4098                    token_pos: self.pos.saturating_sub(1),
4099                });
4100            }
4101        };
4102        let scale = if matches!(self.peek(), Token::Comma) {
4103            self.advance();
4104            match self.advance() {
4105                Token::Integer(n) if (0..=i64::from(precision)).contains(&n) => {
4106                    u8::try_from(n).expect("range-checked")
4107                }
4108                other => {
4109                    return Err(ParseError {
4110                        message: format!(
4111                            "NUMERIC scale must be a non-negative integer ≤ precision, got {other:?}"
4112                        ),
4113                        token_pos: self.pos.saturating_sub(1),
4114                    });
4115                }
4116            }
4117        } else {
4118            0
4119        };
4120        if !matches!(self.peek(), Token::RParen) {
4121            return Err(self.err(format!(
4122                "expected ')' to close NUMERIC params, got {:?}",
4123                self.peek()
4124            )));
4125        }
4126        self.advance();
4127        Ok((precision, scale))
4128    }
4129
4130    /// Parse `(N)` where `N` is a positive integer literal — used by the
4131    /// `VARCHAR`/`CHAR`/`VECTOR` column types. `label` is the type name
4132    /// for the error message.
4133    /// v6.0.1: parse the optional `USING <encoding>` clause that
4134    /// follows `VECTOR(N)` in a column definition. Missing clause
4135    /// → `VecEncoding::F32` (pre-v6 default). Unknown encoding
4136    /// ident → `ParseError` listing the encodings recognised today.
4137    fn parse_optional_vector_encoding(&mut self) -> Result<VecEncoding, ParseError> {
4138        if !matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("using")) {
4139            return Ok(VecEncoding::F32);
4140        }
4141        // v7.13.2 — mailrs round-6 S6: `USING` after a vector type
4142        // overlaps with `ALTER COLUMN TYPE … USING <expr>`. Only
4143        // consume the token when the very next token is a known
4144        // vector-encoding keyword (SQ8 / HALF). Otherwise leave
4145        // `USING` for the caller — it's the rewrite-expression form.
4146        let n1 = self.tokens.get(self.pos + 1);
4147        let next_is_encoding = matches!(
4148            n1,
4149            Some(Token::Ident(s))
4150                if s.eq_ignore_ascii_case("sq8") || s.eq_ignore_ascii_case("half")
4151        );
4152        if !next_is_encoding {
4153            return Ok(VecEncoding::F32);
4154        }
4155        self.advance();
4156        let enc_ident = match self.advance() {
4157            Token::Ident(s) => s,
4158            other => {
4159                return Err(self.err(format!(
4160                    "expected vector encoding after USING, got {other:?}"
4161                )));
4162            }
4163        };
4164        match enc_ident.to_ascii_lowercase().as_str() {
4165            "sq8" => Ok(VecEncoding::Sq8),
4166            // v6.0.3: `HALF` (pgvector convention) selects IEEE-754
4167            // binary16 per-element storage.
4168            "half" => Ok(VecEncoding::F16),
4169            other => Err(self.err(format!(
4170                "unknown vector encoding {other:?}; supported: SQ8, HALF"
4171            ))),
4172        }
4173    }
4174
4175    /// v7.14.0 — consume an optional MySQL display-width
4176    /// parenthesised number after an integer type, returning
4177    /// nothing. `TINYINT(1)` etc.
4178    fn consume_optional_paren_size(&mut self) {
4179        if !matches!(self.peek(), Token::LParen) {
4180            return;
4181        }
4182        self.advance();
4183        // Skip until matching RParen (allow nested or any tokens).
4184        let mut depth = 1usize;
4185        while depth > 0 {
4186            match self.peek() {
4187                Token::LParen => depth += 1,
4188                Token::RParen => depth -= 1,
4189                Token::Eof => return,
4190                _ => {}
4191            }
4192            self.advance();
4193        }
4194    }
4195
4196    fn parse_paren_size(&mut self, label: &str) -> Result<u32, ParseError> {
4197        if !matches!(self.peek(), Token::LParen) {
4198            return Err(self.err(format!("{label} type requires (N), got {:?}", self.peek())));
4199        }
4200        self.advance();
4201        let n = match self.advance() {
4202            Token::Integer(n) if n > 0 => u32::try_from(n).map_err(|_| ParseError {
4203                message: format!("{label} size too large: {n}"),
4204                token_pos: self.pos.saturating_sub(1),
4205            })?,
4206            other => {
4207                return Err(ParseError {
4208                    message: format!("expected positive integer {label} size, got {other:?}"),
4209                    token_pos: self.pos.saturating_sub(1),
4210                });
4211            }
4212        };
4213        if !matches!(self.peek(), Token::RParen) {
4214            return Err(self.err(format!(
4215                "expected ')' after {label} size, got {:?}",
4216                self.peek()
4217            )));
4218        }
4219        self.advance();
4220        Ok(n)
4221    }
4222
4223    fn parse_insert_stmt(&mut self) -> Result<Statement, ParseError> {
4224        debug_assert!(matches!(self.peek(), Token::Insert));
4225        self.advance();
4226        if !matches!(self.peek(), Token::Into) {
4227            return Err(self.err(format!("expected INTO after INSERT, got {:?}", self.peek())));
4228        }
4229        self.advance();
4230        let table = self.expect_ident_like()?;
4231        // Optional column list — `INSERT INTO t (a, b) VALUES ...`.
4232        let columns = if matches!(self.peek(), Token::LParen) {
4233            self.advance();
4234            let mut names = Vec::new();
4235            loop {
4236                names.push(self.expect_ident_like()?);
4237                match self.peek() {
4238                    Token::Comma => {
4239                        self.advance();
4240                    }
4241                    Token::RParen => {
4242                        self.advance();
4243                        break;
4244                    }
4245                    other => {
4246                        return Err(self.err(format!(
4247                            "expected ',' or ')' in INSERT column list, got {other:?}"
4248                        )));
4249                    }
4250                }
4251            }
4252            Some(names)
4253        } else {
4254            None
4255        };
4256        // v7.13.0 — `INSERT INTO t [(cols)] SELECT …` (mailrs
4257        // round-5 G4). Dispatch on VALUES vs SELECT.
4258        if matches!(self.peek(), Token::Select) {
4259            let select_stmt = match self.parse_select_stmt()? {
4260                Statement::Select(s) => s,
4261                other => {
4262                    return Err(self.err(alloc::format!(
4263                        "expected SELECT after INSERT INTO ... target, got {other:?}"
4264                    )));
4265                }
4266            };
4267            let on_conflict = self.parse_optional_on_conflict()?;
4268            let returning = self.parse_optional_returning()?;
4269            return Ok(Statement::Insert(InsertStatement {
4270                table,
4271                columns,
4272                rows: Vec::new(),
4273                select_source: Some(Box::new(select_stmt)),
4274                on_conflict,
4275                returning,
4276            }));
4277        }
4278        if !matches!(self.peek(), Token::Values) {
4279            return Err(self.err(format!(
4280                "expected VALUES or SELECT after table name, got {:?}",
4281                self.peek()
4282            )));
4283        }
4284        self.advance();
4285        if !matches!(self.peek(), Token::LParen) {
4286            return Err(self.err(format!("expected '(' after VALUES, got {:?}", self.peek())));
4287        }
4288        let mut rows = Vec::new();
4289        loop {
4290            // Each iteration consumes one `(expr, expr, …)` tuple.
4291            if !matches!(self.peek(), Token::LParen) {
4292                return Err(self.err(format!(
4293                    "expected '(' for next VALUES tuple, got {:?}",
4294                    self.peek()
4295                )));
4296            }
4297            self.advance();
4298            let mut tuple = Vec::new();
4299            loop {
4300                tuple.push(self.parse_expr(0)?);
4301                match self.peek() {
4302                    Token::Comma => {
4303                        self.advance();
4304                    }
4305                    Token::RParen => {
4306                        self.advance();
4307                        break;
4308                    }
4309                    other => {
4310                        return Err(self.err(format!(
4311                            "expected ',' or ')' in VALUES tuple, got {other:?}"
4312                        )));
4313                    }
4314                }
4315            }
4316            if tuple.is_empty() {
4317                return Err(self.err("INSERT VALUES tuple requires at least one value".into()));
4318            }
4319            rows.push(tuple);
4320            // Continue with comma-separated tuples.
4321            if matches!(self.peek(), Token::Comma) {
4322                self.advance();
4323            } else {
4324                break;
4325            }
4326        }
4327        let on_conflict = self.parse_optional_on_conflict()?;
4328        let returning = self.parse_optional_returning()?;
4329        Ok(Statement::Insert(InsertStatement {
4330            table,
4331            columns,
4332            rows,
4333            select_source: None,
4334            on_conflict,
4335            returning,
4336        }))
4337    }
4338
4339    /// v7.9.7 — parse the optional `ON CONFLICT (cols) DO …`
4340    /// clause sitting between the INSERT body and the trailing
4341    /// RETURNING. All keywords come in as bare idents; `ON` is
4342    /// a reserved Token though.
4343    fn parse_optional_on_conflict(
4344        &mut self,
4345    ) -> Result<Option<crate::ast::OnConflictClause>, ParseError> {
4346        if !matches!(self.peek(), Token::On) {
4347            return Ok(None);
4348        }
4349        // Peek further: we want exactly "ON CONFLICT ...". If the
4350        // next ident isn't "conflict", let some other parser handle.
4351        let next_is_conflict = matches!(
4352            self.tokens.get(self.pos + 1),
4353            Some(Token::Ident(s) | Token::QuotedIdent(s)) if s.eq_ignore_ascii_case("conflict")
4354        );
4355        if !next_is_conflict {
4356            return Ok(None);
4357        }
4358        self.advance(); // ON
4359        self.advance(); // CONFLICT
4360        // Optional `(col [, col]*)` target list.
4361        let mut target_columns: Vec<String> = Vec::new();
4362        if matches!(self.peek(), Token::LParen) {
4363            self.advance();
4364            loop {
4365                target_columns.push(self.expect_ident_like()?);
4366                match self.peek() {
4367                    Token::Comma => {
4368                        self.advance();
4369                    }
4370                    Token::RParen => {
4371                        self.advance();
4372                        break;
4373                    }
4374                    other => {
4375                        return Err(self.err(alloc::format!(
4376                            "expected ',' or ')' in ON CONFLICT target list, got {other:?}"
4377                        )));
4378                    }
4379                }
4380            }
4381        }
4382        // Required `DO`.
4383        match self.advance() {
4384            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("do") => {}
4385            other => {
4386                return Err(self.err(alloc::format!(
4387                    "expected DO after ON CONFLICT [(…)], got {other:?}"
4388                )));
4389            }
4390        }
4391        // Action: NOTHING | UPDATE SET …
4392        let action = match self.advance() {
4393            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("nothing") => {
4394                crate::ast::OnConflictAction::Nothing
4395            }
4396            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("update") => {
4397                self.parse_on_conflict_update_action()?
4398            }
4399            other => {
4400                return Err(self.err(alloc::format!(
4401                    "expected NOTHING or UPDATE after ON CONFLICT DO, got {other:?}"
4402                )));
4403            }
4404        };
4405        Ok(Some(crate::ast::OnConflictClause {
4406            target_columns,
4407            action,
4408        }))
4409    }
4410
4411    /// v7.9.7 — tail of `ON CONFLICT … DO UPDATE`: parse
4412    /// `SET col = expr [, …] [WHERE cond]`. Caller already
4413    /// consumed `UPDATE`.
4414    fn parse_on_conflict_update_action(
4415        &mut self,
4416    ) -> Result<crate::ast::OnConflictAction, ParseError> {
4417        // `SET`
4418        match self.advance() {
4419            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("set") => {}
4420            other => {
4421                return Err(self.err(alloc::format!(
4422                    "expected SET after ON CONFLICT DO UPDATE, got {other:?}"
4423                )));
4424            }
4425        }
4426        let mut assignments: Vec<(String, Expr)> = Vec::new();
4427        loop {
4428            let col = self.expect_ident_like()?;
4429            if !matches!(self.peek(), Token::Eq) {
4430                return Err(self.err(alloc::format!(
4431                    "expected `=` after column in ON CONFLICT DO UPDATE SET, got {:?}",
4432                    self.peek()
4433                )));
4434            }
4435            self.advance();
4436            let value = self.parse_expr(0)?;
4437            assignments.push((col, value));
4438            if matches!(self.peek(), Token::Comma) {
4439                self.advance();
4440                continue;
4441            }
4442            break;
4443        }
4444        let where_ = if matches!(self.peek(), Token::Where) {
4445            self.advance();
4446            Some(self.parse_expr(0)?)
4447        } else {
4448            None
4449        };
4450        Ok(crate::ast::OnConflictAction::Update {
4451            assignments,
4452            where_,
4453        })
4454    }
4455
4456    fn parse_select_list(&mut self) -> Result<Vec<SelectItem>, ParseError> {
4457        let mut items = Vec::new();
4458        loop {
4459            items.push(self.parse_select_item()?);
4460            if matches!(self.peek(), Token::Comma) {
4461                self.advance();
4462            } else {
4463                break;
4464            }
4465        }
4466        Ok(items)
4467    }
4468
4469    fn parse_select_item(&mut self) -> Result<SelectItem, ParseError> {
4470        if matches!(self.peek(), Token::Star) {
4471            self.advance();
4472            return Ok(SelectItem::Wildcard);
4473        }
4474        let expr = self.parse_expr(0)?;
4475        let alias = self.parse_optional_alias();
4476        Ok(SelectItem::Expr { expr, alias })
4477    }
4478
4479    fn parse_table_ref(&mut self) -> Result<TableRef, ParseError> {
4480        // v7.11.7 — `FROM unnest(<expr>) [AS] <alias>` set-returning
4481        // source. Detect at the head before the bare-ident fallback;
4482        // unnest is not a reserved token.
4483        if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("unnest"))
4484            && matches!(self.tokens.get(self.pos + 1), Some(Token::LParen))
4485        {
4486            self.advance(); // unnest
4487            self.advance(); // (
4488            let expr = self.parse_expr(0)?;
4489            if !matches!(self.peek(), Token::RParen) {
4490                return Err(self.err(alloc::format!(
4491                    "expected ')' after unnest() argument, got {:?}",
4492                    self.peek()
4493                )));
4494            }
4495            self.advance();
4496            let (alias_ident, unnest_column_aliases) = self.parse_optional_alias_with_columns();
4497            let name = alias_ident.clone().unwrap_or_else(|| "unnest".to_string());
4498            return Ok(TableRef {
4499                name,
4500                alias: alias_ident,
4501                as_of_segment: None,
4502                unnest_expr: Some(Box::new(expr)),
4503                unnest_column_aliases,
4504            });
4505        }
4506        let name = self.expect_ident_like()?;
4507        // v6.10.2 — optional `AS OF SEGMENT '<id>'` cold-tier
4508        // time-travel clause. Parse BEFORE the alias so the
4509        // alias can still ride at the tail (`tbl AS OF SEGMENT
4510        // '5' alias`). `AS` is a reserved keyword token, while
4511        // `OF` and `SEGMENT` are bare idents.
4512        let as_of_segment = if matches!(self.peek(), Token::As)
4513            && matches!(self.tokens.get(self.pos + 1), Some(Token::Ident(s) | Token::QuotedIdent(s)) if s.eq_ignore_ascii_case("of"))
4514        {
4515            self.advance(); // AS
4516            self.advance(); // OF
4517            let kw = match self.peek().clone() {
4518                Token::Ident(s) | Token::QuotedIdent(s) => s,
4519                other => {
4520                    return Err(self.err(format!("expected SEGMENT after AS OF, got {other:?}")));
4521                }
4522            };
4523            if !kw.eq_ignore_ascii_case("segment") {
4524                return Err(self.err(format!(
4525                    "expected SEGMENT after AS OF, got {kw:?}; v6.10.2 supports SEGMENT only"
4526                )));
4527            }
4528            self.advance();
4529            // Segment id literal — accept either a string or
4530            // integer for operator ergonomics.
4531            let id = match self.advance() {
4532                Token::String(s) => s
4533                    .parse::<u32>()
4534                    .map_err(|e| self.err(format!("AS OF SEGMENT id parse: {e}")))?,
4535                Token::Integer(n) => u32::try_from(n)
4536                    .map_err(|e| self.err(format!("AS OF SEGMENT id parse: {e}")))?,
4537                other => {
4538                    return Err(self.err(format!(
4539                        "expected segment id literal after AS OF SEGMENT, got {other:?}"
4540                    )));
4541                }
4542            };
4543            Some(id)
4544        } else {
4545            None
4546        };
4547        let alias = self.parse_optional_alias();
4548        Ok(TableRef {
4549            name,
4550            alias,
4551            as_of_segment,
4552            unnest_expr: None,
4553            unnest_column_aliases: Vec::new(),
4554        })
4555    }
4556
4557    /// v7.13.2 — mailrs round-6 S5. Like `parse_optional_alias`
4558    /// but also accepts `AS alias(col [, col, …])` — the
4559    /// PG-standard table-function column-list form. The column
4560    /// list is only honoured when paired with `UNNEST(...)` in
4561    /// the parent; other call sites currently discard it.
4562    fn parse_optional_alias_with_columns(&mut self) -> (Option<String>, Vec<String>) {
4563        let alias = self.parse_optional_alias();
4564        if alias.is_none() {
4565            return (None, Vec::new());
4566        }
4567        let mut cols: Vec<String> = Vec::new();
4568        if matches!(self.peek(), Token::LParen) {
4569            self.advance();
4570            loop {
4571                match self.peek().clone() {
4572                    Token::Ident(s) | Token::QuotedIdent(s) => {
4573                        self.advance();
4574                        cols.push(s);
4575                    }
4576                    _ => break,
4577                }
4578                if matches!(self.peek(), Token::Comma) {
4579                    self.advance();
4580                    continue;
4581                }
4582                break;
4583            }
4584            if matches!(self.peek(), Token::RParen) {
4585                self.advance();
4586            }
4587        }
4588        (alias, cols)
4589    }
4590
4591    /// FROM-clause: a primary table reference plus zero-or-more joined
4592    /// peers expressed via either `, <table>` (cross-product, no ON) or
4593    /// `[INNER|LEFT [OUTER]|CROSS] JOIN <table> [ON expr]`. v1.10 keeps
4594    /// the join list flat (left-associative nested-loop semantics).
4595    fn parse_from_clause(&mut self) -> Result<FromClause, ParseError> {
4596        let primary = self.parse_table_ref()?;
4597        let mut joins = Vec::new();
4598        loop {
4599            // `, <table>` — cross-product with no ON.
4600            if matches!(self.peek(), Token::Comma) {
4601                self.advance();
4602                let table = self.parse_table_ref()?;
4603                joins.push(FromJoin {
4604                    kind: JoinKind::Cross,
4605                    table,
4606                    on: None,
4607                });
4608                continue;
4609            }
4610            // Explicit JOIN syntax. Accept INNER JOIN, LEFT [OUTER] JOIN,
4611            // CROSS JOIN, and bare JOIN (defaults to INNER).
4612            let kind =
4613                match self.peek() {
4614                    Token::Inner => {
4615                        self.advance();
4616                        if !matches!(self.peek(), Token::Join) {
4617                            return Err(self
4618                                .err(format!("expected JOIN after INNER, got {:?}", self.peek())));
4619                        }
4620                        self.advance();
4621                        JoinKind::Inner
4622                    }
4623                    Token::Left => {
4624                        self.advance();
4625                        if matches!(self.peek(), Token::Outer) {
4626                            self.advance();
4627                        }
4628                        if !matches!(self.peek(), Token::Join) {
4629                            return Err(self.err(format!(
4630                                "expected JOIN after LEFT [OUTER], got {:?}",
4631                                self.peek()
4632                            )));
4633                        }
4634                        self.advance();
4635                        JoinKind::Left
4636                    }
4637                    Token::Cross => {
4638                        self.advance();
4639                        if !matches!(self.peek(), Token::Join) {
4640                            return Err(self
4641                                .err(format!("expected JOIN after CROSS, got {:?}", self.peek())));
4642                        }
4643                        self.advance();
4644                        JoinKind::Cross
4645                    }
4646                    Token::Join => {
4647                        self.advance();
4648                        JoinKind::Inner
4649                    }
4650                    _ => break,
4651                };
4652            let table = self.parse_table_ref()?;
4653            let on = if matches!(self.peek(), Token::On) {
4654                self.advance();
4655                Some(self.parse_expr(0)?)
4656            } else if kind == JoinKind::Cross {
4657                None
4658            } else {
4659                return Err(self.err(format!(
4660                    "expected ON after {:?} JOIN, got {:?}",
4661                    kind,
4662                    self.peek()
4663                )));
4664            };
4665            joins.push(FromJoin { kind, table, on });
4666        }
4667        Ok(FromClause { primary, joins })
4668    }
4669
4670    /// Optional alias after an expression or table:
4671    /// `AS <ident>` is unambiguous; a bare `<ident>` directly after is also
4672    /// accepted (PG-style implicit alias). Returns `None` if the next token
4673    /// is not alias-shaped (e.g. comma, FROM, WHERE, semicolon, EOF, operator).
4674    fn parse_optional_alias(&mut self) -> Option<String> {
4675        if matches!(self.peek(), Token::As) {
4676            self.advance();
4677            // After AS, the next token MUST be an identifier-like — if not,
4678            // we still return None and let the caller surface the error on the
4679            // next expectation. v0.2 keeps the alias path forgiving; the
4680            // corpus tests don't exercise the malformed case.
4681            if let Token::Ident(_) | Token::QuotedIdent(_) = self.peek() {
4682                return self.expect_ident_like().ok();
4683            }
4684            return None;
4685        }
4686        if let Token::Ident(_) | Token::QuotedIdent(_) = self.peek() {
4687            return self.expect_ident_like().ok();
4688        }
4689        None
4690    }
4691
4692    /// Pratt loop. `min_prec` is the minimum binary-op precedence we'll accept.
4693    fn parse_expr(&mut self, min_prec: u8) -> Result<Expr, ParseError> {
4694        let mut lhs = self.parse_unary()?;
4695        while let Some((op, prec)) = binop_from(self.peek()) {
4696            if prec < min_prec {
4697                break;
4698            }
4699            self.advance();
4700            // v7.10.12 — `x <op> ANY(arr)` / `x <op> ALL(arr)`.
4701            // ANY is a bare ident; ALL is a reserved Token. Both
4702            // require an immediate `(` to disambiguate from
4703            // identifier columns named `any` / `all`.
4704            let any_kind = match self.peek() {
4705                Token::All if matches!(self.tokens.get(self.pos + 1), Some(Token::LParen)) => {
4706                    Some(false)
4707                }
4708                Token::Ident(s) | Token::QuotedIdent(s)
4709                    if (s.eq_ignore_ascii_case("any") || s.eq_ignore_ascii_case("all"))
4710                        && matches!(self.tokens.get(self.pos + 1), Some(Token::LParen)) =>
4711                {
4712                    Some(s.eq_ignore_ascii_case("any"))
4713                }
4714                _ => None,
4715            };
4716            if let Some(is_any) = any_kind {
4717                self.advance(); // ident
4718                self.advance(); // (
4719                let arr = self.parse_expr(0)?;
4720                if !matches!(self.peek(), Token::RParen) {
4721                    return Err(self.err(alloc::format!(
4722                        "expected ')' after ANY/ALL argument, got {:?}",
4723                        self.peek()
4724                    )));
4725                }
4726                self.advance();
4727                lhs = Expr::AnyAll {
4728                    expr: Box::new(lhs),
4729                    op,
4730                    array: Box::new(arr),
4731                    is_any,
4732                };
4733                continue;
4734            }
4735            let rhs = self.parse_expr(prec + 1)?;
4736            lhs = Expr::Binary {
4737                lhs: Box::new(lhs),
4738                op,
4739                rhs: Box::new(rhs),
4740            };
4741        }
4742        Ok(lhs)
4743    }
4744
4745    fn parse_unary(&mut self) -> Result<Expr, ParseError> {
4746        match self.peek() {
4747            Token::Not => {
4748                self.advance();
4749                // NOT sits between AND (2) and comparisons (4) — bind everything
4750                // ≥3, which leaves AND/OR outside.
4751                let e = self.parse_expr(3)?;
4752                Ok(Expr::Unary {
4753                    op: UnOp::Not,
4754                    expr: Box::new(e),
4755                })
4756            }
4757            Token::Minus => {
4758                self.advance();
4759                // Unary minus binds tighter than `*`/`/` (now at prec 7 after
4760                // `<->` slotted into 5 and arithmetic shifted up).
4761                let e = self.parse_expr(8)?;
4762                Ok(Expr::Unary {
4763                    op: UnOp::Neg,
4764                    expr: Box::new(e),
4765                })
4766            }
4767            _ => self.parse_atom(),
4768        }
4769    }
4770
4771    fn parse_atom(&mut self) -> Result<Expr, ParseError> {
4772        let tok_pos = self.pos;
4773        match self.advance() {
4774            Token::Integer(n) => Ok(Expr::Literal(Literal::Integer(n))),
4775            Token::Float(x) => Ok(Expr::Literal(Literal::Float(x))),
4776            Token::String(s) => Ok(Expr::Literal(Literal::String(s))),
4777            Token::True => Ok(Expr::Literal(Literal::Bool(true))),
4778            Token::False => Ok(Expr::Literal(Literal::Bool(false))),
4779            Token::Null => Ok(Expr::Literal(Literal::Null)),
4780            // v6.1.1 — `$N` placeholder. The actual Value lookup
4781            // happens in the engine eval path against the prepared-
4782            // statement bind buffer.
4783            Token::Placeholder(n) => Ok(Expr::Placeholder(n)),
4784            Token::LParen => {
4785                // v4.10: `(SELECT ...)` in expression position is a
4786                // scalar subquery; otherwise it's a parenthesised
4787                // expression. Peek for SELECT keyword to dispatch.
4788                if matches!(self.peek(), Token::Select) {
4789                    let inner = self.parse_select_stmt()?;
4790                    match self.advance() {
4791                        Token::RParen => {
4792                            let Statement::Select(s) = inner else {
4793                                unreachable!("parse_select_stmt returns Select")
4794                            };
4795                            Ok(Expr::ScalarSubquery(Box::new(s)))
4796                        }
4797                        other => Err(ParseError {
4798                            message: format!("expected ')' after scalar subquery, got {other:?}"),
4799                            token_pos: self.pos.saturating_sub(1),
4800                        }),
4801                    }
4802                } else {
4803                    let e = self.parse_expr(0)?;
4804                    match self.advance() {
4805                        Token::RParen => Ok(e),
4806                        other => Err(ParseError {
4807                            message: format!("expected ')', got {other:?}"),
4808                            token_pos: self.pos.saturating_sub(1),
4809                        }),
4810                    }
4811                }
4812            }
4813            Token::LBracket => self.parse_vector_literal_body(),
4814            Token::Extract => self.parse_extract_atom(),
4815            Token::Interval => self.parse_interval_atom(),
4816            // v4.10: EXISTS / NOT EXISTS. EXISTS isn't a reserved
4817            // token; we match on the bare ident. NOT is a token
4818            // (consumed in the comparison rung), but `EXISTS (...)`
4819            // at the top of an expression starts here.
4820            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("exists") => {
4821                self.parse_exists_atom(false)
4822            }
4823            // v7.13.0 — `CASE [<operand>] WHEN <cond> THEN <val>
4824            // [WHEN ...] [ELSE <val>] END` (mailrs round-5 G9).
4825            // CASE is a bare ident; we dispatch on lowercase match.
4826            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("case") => {
4827                self.parse_case_atom()
4828            }
4829            // v7.10.10 — `ARRAY[expr, expr, …]` constructor. ARRAY
4830            // is not a reserved token; we match by case-insensitive
4831            // ident. The opening `[` must follow immediately.
4832            Token::Ident(s) | Token::QuotedIdent(s)
4833                if s.eq_ignore_ascii_case("array") && matches!(self.peek(), Token::LBracket) =>
4834            {
4835                self.advance(); // consume `[`
4836                let mut items: Vec<Expr> = Vec::new();
4837                if !matches!(self.peek(), Token::RBracket) {
4838                    loop {
4839                        items.push(self.parse_expr(0)?);
4840                        match self.peek() {
4841                            Token::Comma => {
4842                                self.advance();
4843                            }
4844                            Token::RBracket => break,
4845                            other => {
4846                                return Err(self.err(alloc::format!(
4847                                    "expected ',' or ']' in ARRAY literal, got {other:?}"
4848                                )));
4849                            }
4850                        }
4851                    }
4852                }
4853                self.advance(); // consume `]`
4854                Ok(Expr::Array(items))
4855            }
4856            Token::Ident(s) | Token::QuotedIdent(s) => self.finish_ident_atom(s),
4857            other => Err(ParseError {
4858                message: format!("unexpected token {other:?} in expression"),
4859                token_pos: tok_pos,
4860            }),
4861        }
4862        // After parsing the atom, fold any postfix `::vector` casts.
4863        .and_then(|atom| self.finish_postfix_casts(atom))
4864    }
4865
4866    /// Postfix operators on an atom: `::TYPE` cast and `IS [NOT] NULL`.
4867    /// Both bind tighter than any binary op.
4868    fn finish_postfix_casts(&mut self, mut expr: Expr) -> Result<Expr, ParseError> {
4869        loop {
4870            if matches!(self.peek(), Token::DoubleColon) {
4871                self.advance();
4872                // v7.9.25 / v7.9.26 — broaden the postfix `::` cast
4873                // target set to include INTERVAL (reserved Token),
4874                // TIMESTAMPTZ, and PG catalog regtype / regclass.
4875                // mailrs follow-up H3a + H3b.
4876                let target = match self.advance() {
4877                    Token::Ident(s) => match s.to_ascii_lowercase().as_str() {
4878                        "int" | "integer" | "int4" => {
4879                            if matches!(self.peek(), Token::LBracket)
4880                                && matches!(self.tokens.get(self.pos + 1), Some(Token::RBracket))
4881                            {
4882                                self.advance();
4883                                self.advance();
4884                                CastTarget::IntArray
4885                            } else {
4886                                CastTarget::Int
4887                            }
4888                        }
4889                        "bigint" | "int8" => {
4890                            if matches!(self.peek(), Token::LBracket)
4891                                && matches!(self.tokens.get(self.pos + 1), Some(Token::RBracket))
4892                            {
4893                                self.advance();
4894                                self.advance();
4895                                CastTarget::BigIntArray
4896                            } else {
4897                                CastTarget::BigInt
4898                            }
4899                        }
4900                        "float" | "double" | "real" => CastTarget::Float,
4901                        "text" => {
4902                            // v7.10.11 — `::TEXT[]` widens to TextArray.
4903                            if matches!(self.peek(), Token::LBracket)
4904                                && matches!(self.tokens.get(self.pos + 1), Some(Token::RBracket))
4905                            {
4906                                self.advance();
4907                                self.advance();
4908                                CastTarget::TextArray
4909                            } else {
4910                                CastTarget::Text
4911                            }
4912                        }
4913                        "bool" | "boolean" => CastTarget::Bool,
4914                        "vector" => CastTarget::Vector,
4915                        "date" => CastTarget::Date,
4916                        "timestamp" | "datetime" => CastTarget::Timestamp,
4917                        "timestamptz" => CastTarget::Timestamptz,
4918                        "interval" => CastTarget::Interval,
4919                        "json" => CastTarget::Json,
4920                        "jsonb" => CastTarget::Jsonb,
4921                        "regtype" => CastTarget::RegType,
4922                        "regclass" => CastTarget::RegClass,
4923                        // v7.12.0 — `::tsvector` / `::tsquery`.
4924                        // Engine decodes the LHS text via the PG
4925                        // external form parser.
4926                        "tsvector" => CastTarget::TsVector,
4927                        "tsquery" => CastTarget::TsQuery,
4928                        other => {
4929                            return Err(ParseError {
4930                                message: format!("unsupported cast target `::{other}`"),
4931                                token_pos: self.pos.saturating_sub(1),
4932                            });
4933                        }
4934                    },
4935                    Token::Interval => CastTarget::Interval,
4936                    other => {
4937                        return Err(ParseError {
4938                            message: format!("expected type ident after `::`, got {other:?}"),
4939                            token_pos: self.pos.saturating_sub(1),
4940                        });
4941                    }
4942                };
4943                expr = Expr::Cast {
4944                    expr: Box::new(expr),
4945                    target,
4946                };
4947                continue;
4948            }
4949            if matches!(self.peek(), Token::Is) {
4950                self.advance();
4951                let negated = if matches!(self.peek(), Token::Not) {
4952                    self.advance();
4953                    true
4954                } else {
4955                    false
4956                };
4957                // v7.9.27b — `IS [NOT] DISTINCT FROM <rhs>`.
4958                // mailrs pg_dump.
4959                if matches!(self.peek(), Token::Distinct) {
4960                    self.advance();
4961                    if !matches!(self.peek(), Token::From) {
4962                        return Err(self.err(format!(
4963                            "expected FROM after IS{} DISTINCT, got {:?}",
4964                            if negated { " NOT" } else { "" },
4965                            self.peek()
4966                        )));
4967                    }
4968                    self.advance();
4969                    // Right-hand side: parse at the same precedence
4970                    // tier as comparison so `x IS DISTINCT FROM a + b`
4971                    // groups as `x IS DISTINCT FROM (a + b)`.
4972                    let rhs = self.parse_expr(20)?;
4973                    let op = if negated {
4974                        BinOp::IsNotDistinctFrom
4975                    } else {
4976                        BinOp::IsDistinctFrom
4977                    };
4978                    expr = Expr::Binary {
4979                        op,
4980                        lhs: Box::new(expr),
4981                        rhs: Box::new(rhs),
4982                    };
4983                    continue;
4984                }
4985                if !matches!(self.peek(), Token::Null) {
4986                    return Err(self.err(format!(
4987                        "expected NULL or DISTINCT after IS{}, got {:?}",
4988                        if negated { " NOT" } else { "" },
4989                        self.peek()
4990                    )));
4991                }
4992                self.advance();
4993                expr = Expr::IsNull {
4994                    expr: Box::new(expr),
4995                    negated,
4996                };
4997                continue;
4998            }
4999            // `x [NOT] BETWEEN a AND b`, `x [NOT] IN (...)`, `x [NOT] LIKE p`.
5000            // Look one token ahead so a stray `NOT` not followed by any of
5001            // these flows through to the early return below untouched.
5002            let negated = if matches!(self.peek(), Token::Not) {
5003                let next = self.tokens.get(self.pos + 1);
5004                matches!(next, Some(Token::Between | Token::In | Token::Like))
5005            } else {
5006                false
5007            };
5008            if negated {
5009                self.advance();
5010            }
5011            if matches!(self.peek(), Token::Between) {
5012                expr = self.parse_between_tail(expr, negated)?;
5013                continue;
5014            }
5015            if matches!(self.peek(), Token::In) {
5016                expr = self.parse_in_tail(expr, negated)?;
5017                continue;
5018            }
5019            if matches!(self.peek(), Token::Like) {
5020                self.advance();
5021                // Pattern at the same precedence as other comparison RHSes —
5022                // 5 leaves AND/OR alone so `a LIKE 'x%' AND b` parses right.
5023                let pattern = self.parse_expr(5)?;
5024                expr = Expr::Like {
5025                    expr: Box::new(expr),
5026                    pattern: Box::new(pattern),
5027                    negated,
5028                };
5029                continue;
5030            }
5031            // v7.10.12 — `arr[i]` subscript. PG 1-based; engine
5032            // returns NULL for out-of-range. Multiple subscripts
5033            // chain: `a[i][j]` parses left-to-right.
5034            if matches!(self.peek(), Token::LBracket) {
5035                self.advance();
5036                let index = self.parse_expr(0)?;
5037                if !matches!(self.peek(), Token::RBracket) {
5038                    return Err(self.err(alloc::format!(
5039                        "expected ']' after array index, got {:?}",
5040                        self.peek()
5041                    )));
5042                }
5043                self.advance();
5044                expr = Expr::ArraySubscript {
5045                    target: Box::new(expr),
5046                    index: Box::new(index),
5047                };
5048                continue;
5049            }
5050            return Ok(expr);
5051        }
5052    }
5053
5054    /// `x BETWEEN low AND high`  →  `(x >= low) AND (x <= high)`, wrapped in
5055    /// `NOT` when `negated`. Bounds parse at precedence 5 so the trailing
5056    /// `AND` is not swallowed.
5057    fn parse_between_tail(&mut self, expr: Expr, negated: bool) -> Result<Expr, ParseError> {
5058        self.advance(); // BETWEEN
5059        let low = self.parse_expr(5)?;
5060        if !matches!(self.peek(), Token::And) {
5061            return Err(self.err(format!(
5062                "expected AND after BETWEEN low bound, got {:?}",
5063                self.peek()
5064            )));
5065        }
5066        self.advance();
5067        let high = self.parse_expr(5)?;
5068        let target = Box::new(expr);
5069        let combined = Expr::Binary {
5070            lhs: Box::new(Expr::Binary {
5071                lhs: target.clone(),
5072                op: BinOp::GtEq,
5073                rhs: Box::new(low),
5074            }),
5075            op: BinOp::And,
5076            rhs: Box::new(Expr::Binary {
5077                lhs: target,
5078                op: BinOp::LtEq,
5079                rhs: Box::new(high),
5080            }),
5081        };
5082        Ok(maybe_not(combined, negated))
5083    }
5084
5085    /// `x IN (a, b, c)`  →  chained OR of equalities. Empty list collapses
5086    /// to FALSE (TRUE under NOT IN), matching standard SQL semantics.
5087    /// v4.11: parse `WITH name AS (SELECT ...) [, ...] SELECT ...`.
5088    /// Caller already consumed the leading `WITH` ident.
5089    fn parse_with_cte_then_select(&mut self) -> Result<Statement, ParseError> {
5090        // v4.22: WITH RECURSIVE — optional keyword right after WITH.
5091        // Comes through as an identifier; consume it if present and
5092        // mark every CTE in the clause as recursive (PG semantics —
5093        // the flag is per-WITH, not per-CTE).
5094        let mut recursive = false;
5095        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
5096            && s.eq_ignore_ascii_case("recursive")
5097        {
5098            self.advance();
5099            recursive = true;
5100        }
5101        let mut ctes = Vec::new();
5102        loop {
5103            let name = self.expect_ident_like()?;
5104            // v4.22: optional column-name list — `WITH t(a,b,c) AS ...`.
5105            // PG uses these to rename the body's output columns; we
5106            // do the same below by overriding `columns[i].name`.
5107            let column_overrides: Vec<String> = if matches!(self.peek(), Token::LParen) {
5108                self.advance();
5109                let mut names = Vec::new();
5110                loop {
5111                    names.push(self.expect_ident_like()?);
5112                    if matches!(self.peek(), Token::Comma) {
5113                        self.advance();
5114                        continue;
5115                    }
5116                    break;
5117                }
5118                if !matches!(self.peek(), Token::RParen) {
5119                    return Err(self.err(format!(
5120                        "expected ')' to close CTE column list, got {:?}",
5121                        self.peek()
5122                    )));
5123                }
5124                self.advance();
5125                names
5126            } else {
5127                Vec::new()
5128            };
5129            // AS is a reserved Token::As (used by SELECT-item / FROM
5130            // aliasing) — handle it specially rather than as a bare
5131            // ident.
5132            if !matches!(self.peek(), Token::As) {
5133                return Err(self.err(format!(
5134                    "expected AS after CTE name {name:?}, got {:?}",
5135                    self.peek()
5136                )));
5137            }
5138            self.advance();
5139            if !matches!(self.peek(), Token::LParen) {
5140                return Err(self.err(format!(
5141                    "expected '(' after AS in WITH clause, got {:?}",
5142                    self.peek()
5143                )));
5144            }
5145            self.advance();
5146            if !matches!(self.peek(), Token::Select) {
5147                return Err(self.err(format!("WITH body must be a SELECT, got {:?}", self.peek())));
5148            }
5149            let inner = self.parse_select_stmt()?;
5150            if !matches!(self.peek(), Token::RParen) {
5151                return Err(self.err(format!(
5152                    "expected ')' after CTE body, got {:?}",
5153                    self.peek()
5154                )));
5155            }
5156            self.advance();
5157            let Statement::Select(body) = inner else {
5158                unreachable!("parse_select_stmt returns Select")
5159            };
5160            ctes.push(crate::ast::Cte {
5161                name,
5162                body,
5163                recursive,
5164                column_overrides,
5165            });
5166            if matches!(self.peek(), Token::Comma) {
5167                self.advance();
5168                continue;
5169            }
5170            break;
5171        }
5172        // The body SELECT follows. Must start with SELECT.
5173        if !matches!(self.peek(), Token::Select) {
5174            return Err(self.err(format!(
5175                "expected SELECT after WITH clause, got {:?}",
5176                self.peek()
5177            )));
5178        }
5179        let body_stmt = self.parse_select_stmt()?;
5180        let Statement::Select(mut body) = body_stmt else {
5181            unreachable!()
5182        };
5183        body.ctes = ctes;
5184        Ok(Statement::Select(body))
5185    }
5186
5187    /// v4.10: parse `EXISTS (SELECT ...)`. Caller (`parse_atom`)
5188    /// already consumed the leading `EXISTS` ident via
5189    /// `self.advance()`.
5190    /// v7.13.0 — parse the rest of a `CASE … END` expression after
5191    /// the leading `CASE` ident has been consumed (mailrs round-5
5192    /// G9). Supports both the searched form
5193    /// (`CASE WHEN cond THEN val …`) and the simple form
5194    /// (`CASE operand WHEN val THEN val …`).
5195    fn parse_case_atom(&mut self) -> Result<Expr, ParseError> {
5196        // Disambiguate searched vs simple form: if the next token
5197        // is `WHEN`, we're in the searched form. Otherwise the
5198        // intervening expression is the operand.
5199        let operand = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("when")) {
5200            None
5201        } else {
5202            Some(Box::new(self.parse_expr(0)?))
5203        };
5204        let mut branches: Vec<(Expr, Expr)> = Vec::new();
5205        loop {
5206            match self.peek() {
5207                Token::Ident(s) if s.eq_ignore_ascii_case("when") => {
5208                    self.advance();
5209                    let cond = self.parse_expr(0)?;
5210                    match self.peek() {
5211                        Token::Ident(t) if t.eq_ignore_ascii_case("then") => {
5212                            self.advance();
5213                        }
5214                        other => {
5215                            return Err(self.err(alloc::format!(
5216                                "expected THEN after CASE WHEN <expr>, got {other:?}"
5217                            )));
5218                        }
5219                    }
5220                    let value = self.parse_expr(0)?;
5221                    branches.push((cond, value));
5222                }
5223                _ => break,
5224            }
5225        }
5226        if branches.is_empty() {
5227            return Err(self.err("CASE requires at least one WHEN … THEN … branch".into()));
5228        }
5229        let else_branch = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("else"))
5230        {
5231            self.advance();
5232            Some(Box::new(self.parse_expr(0)?))
5233        } else {
5234            None
5235        };
5236        match self.peek() {
5237            Token::Ident(s) if s.eq_ignore_ascii_case("end") => {
5238                self.advance();
5239            }
5240            other => {
5241                return Err(self.err(alloc::format!(
5242                    "expected END to close CASE expression, got {other:?}"
5243                )));
5244            }
5245        }
5246        Ok(Expr::Case {
5247            operand,
5248            branches,
5249            else_branch,
5250        })
5251    }
5252
5253    fn parse_exists_atom(&mut self, negated: bool) -> Result<Expr, ParseError> {
5254        if !matches!(self.peek(), Token::LParen) {
5255            return Err(self.err(format!("expected '(' after EXISTS, got {:?}", self.peek())));
5256        }
5257        self.advance();
5258        let inner = self.parse_select_stmt()?;
5259        if !matches!(self.peek(), Token::RParen) {
5260            return Err(self.err(format!(
5261                "expected ')' after EXISTS-subquery, got {:?}",
5262                self.peek()
5263            )));
5264        }
5265        self.advance();
5266        let Statement::Select(s) = inner else {
5267            unreachable!("parse_select_stmt returns Select")
5268        };
5269        Ok(Expr::Exists {
5270            subquery: Box::new(s),
5271            negated,
5272        })
5273    }
5274
5275    fn parse_in_tail(&mut self, expr: Expr, negated: bool) -> Result<Expr, ParseError> {
5276        self.advance(); // IN
5277        if !matches!(self.peek(), Token::LParen) {
5278            return Err(self.err(format!("expected '(' after IN, got {:?}", self.peek())));
5279        }
5280        self.advance();
5281        // v4.10: `IN (SELECT ...)` — subquery branch.
5282        if matches!(self.peek(), Token::Select) {
5283            let inner = self.parse_select_stmt()?;
5284            if !matches!(self.peek(), Token::RParen) {
5285                return Err(self.err(format!(
5286                    "expected ')' after IN-subquery, got {:?}",
5287                    self.peek()
5288                )));
5289            }
5290            self.advance();
5291            let Statement::Select(s) = inner else {
5292                unreachable!("parse_select_stmt always returns Statement::Select")
5293            };
5294            return Ok(Expr::InSubquery {
5295                expr: Box::new(expr),
5296                subquery: Box::new(s),
5297                negated,
5298            });
5299        }
5300        let mut elements = Vec::new();
5301        if !matches!(self.peek(), Token::RParen) {
5302            loop {
5303                elements.push(self.parse_expr(0)?);
5304                match self.peek() {
5305                    Token::Comma => {
5306                        self.advance();
5307                    }
5308                    Token::RParen => break,
5309                    other => {
5310                        return Err(
5311                            self.err(format!("expected ',' or ')' in IN list, got {other:?}"))
5312                        );
5313                    }
5314                }
5315            }
5316        }
5317        self.advance(); // ')'
5318        let target = Box::new(expr);
5319        let combined = if elements.is_empty() {
5320            Expr::Literal(Literal::Bool(false))
5321        } else {
5322            let mut iter = elements.into_iter();
5323            let first = iter.next().unwrap();
5324            let mut acc = Expr::Binary {
5325                lhs: target.clone(),
5326                op: BinOp::Eq,
5327                rhs: Box::new(first),
5328            };
5329            for elt in iter {
5330                acc = Expr::Binary {
5331                    lhs: Box::new(acc),
5332                    op: BinOp::Or,
5333                    rhs: Box::new(Expr::Binary {
5334                        lhs: target.clone(),
5335                        op: BinOp::Eq,
5336                        rhs: Box::new(elt),
5337                    }),
5338                };
5339            }
5340            acc
5341        };
5342        Ok(maybe_not(combined, negated))
5343    }
5344
5345    /// Parse a pgvector array literal `[ x1, x2, ... ]`. The opening `[` is
5346    /// already consumed by the caller. Elements must be numeric literals
5347    /// (with optional unary `-`); any compound expression is rejected at
5348    /// parse time so the runtime never needs to evaluate inside a vector.
5349    /// `EXTRACT(<field> FROM <source>)`. The dispatching `parse_atom`
5350    /// has already consumed the `EXTRACT` token before calling us —
5351    /// we pick up at the opening `(`.
5352    fn parse_extract_atom(&mut self) -> Result<Expr, ParseError> {
5353        if !matches!(self.peek(), Token::LParen) {
5354            return Err(self.err(format!("expected '(' after EXTRACT, got {:?}", self.peek())));
5355        }
5356        self.advance();
5357        let field_name = self.expect_ident_like()?;
5358        let field = match field_name.to_ascii_lowercase().as_str() {
5359            "year" => ExtractField::Year,
5360            "month" => ExtractField::Month,
5361            "day" => ExtractField::Day,
5362            "hour" => ExtractField::Hour,
5363            "minute" => ExtractField::Minute,
5364            "second" => ExtractField::Second,
5365            "microsecond" | "microseconds" => ExtractField::Microsecond,
5366            other => {
5367                return Err(self.err(format!(
5368                    "unknown EXTRACT field {other:?}; \
5369                     supported: YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MICROSECOND"
5370                )));
5371            }
5372        };
5373        if !matches!(self.peek(), Token::From) {
5374            return Err(self.err(format!(
5375                "expected FROM after EXTRACT field, got {:?}",
5376                self.peek()
5377            )));
5378        }
5379        self.advance();
5380        let source = self.parse_expr(0)?;
5381        if !matches!(self.peek(), Token::RParen) {
5382            return Err(self.err(format!(
5383                "expected ')' to close EXTRACT, got {:?}",
5384                self.peek()
5385            )));
5386        }
5387        self.advance();
5388        Ok(Expr::Extract {
5389            field,
5390            source: Box::new(source),
5391        })
5392    }
5393
5394    /// `INTERVAL '<n> <unit> [<n> <unit> ...]'` — the `INTERVAL` keyword
5395    /// is already consumed; we expect a single string literal next and
5396    /// resolve it into `Literal::Interval` at parse time so the engine
5397    /// never has to re-tokenise inside the string.
5398    fn parse_interval_atom(&mut self) -> Result<Expr, ParseError> {
5399        let tok = self.advance();
5400        let Token::String(text) = tok else {
5401            return Err(self.err(format!(
5402                "expected string literal after INTERVAL, got {tok:?}"
5403            )));
5404        };
5405        let (months, micros) = parse_interval_text(&text).ok_or_else(|| ParseError {
5406            message: format!(
5407                "cannot parse INTERVAL {text:?}; \
5408                     expected `<n> <unit> [<n> <unit> ...]` with units \
5409                     microsecond[s], millisecond[s], second[s], minute[s], \
5410                     hour[s], day[s], week[s], month[s], year[s]"
5411            ),
5412            token_pos: self.pos.saturating_sub(1),
5413        })?;
5414        Ok(Expr::Literal(Literal::Interval {
5415            months,
5416            micros,
5417            text,
5418        }))
5419    }
5420
5421    fn parse_vector_literal_body(&mut self) -> Result<Expr, ParseError> {
5422        let mut elems = Vec::new();
5423        if matches!(self.peek(), Token::RBracket) {
5424            self.advance();
5425            return Ok(Expr::Literal(Literal::Vector(elems)));
5426        }
5427        loop {
5428            let e = self.parse_expr(0)?;
5429            let x = extract_numeric_literal(&e).ok_or_else(|| ParseError {
5430                message: format!("vector element must be a numeric literal, got {e:?}"),
5431                token_pos: self.pos,
5432            })?;
5433            elems.push(x);
5434            match self.peek() {
5435                Token::Comma => {
5436                    self.advance();
5437                }
5438                Token::RBracket => {
5439                    self.advance();
5440                    break;
5441                }
5442                other => {
5443                    return Err(self.err(format!("expected ',' or ']' in vector, got {other:?}")));
5444                }
5445            }
5446        }
5447        Ok(Expr::Literal(Literal::Vector(elems)))
5448    }
5449
5450    /// Atom that started with an identifier: could be `t.col`, `col`, or
5451    /// `func(arg, ...)`. Detect each shape by looking at the next token.
5452    /// v4.12: parse `(PARTITION BY expr, ... ORDER BY expr [DESC]
5453    /// [, ...])`. Caller has already consumed `OVER`. Either clause
5454    /// is optional; an empty `()` is also legal (PG semantics).
5455    /// v6.4.2 — consume an optional `IGNORE NULLS` / `RESPECT NULLS`
5456    /// modifier between `name(args)` and `OVER (...)`. Default is
5457    /// `Respect`. Unrecognised idents leave the stream unchanged.
5458    fn parse_null_treatment_modifier(&mut self) -> NullTreatment {
5459        let Token::Ident(s) = self.peek().clone() else {
5460            return NullTreatment::Respect;
5461        };
5462        let is_ignore = s.eq_ignore_ascii_case("ignore");
5463        let is_respect = s.eq_ignore_ascii_case("respect");
5464        if !is_ignore && !is_respect {
5465            return NullTreatment::Respect;
5466        }
5467        // Lookahead for NULLS — only consume both tokens together.
5468        // pos+1 must hold a "nulls" ident.
5469        if self.pos + 1 < self.tokens.len()
5470            && let Token::Ident(s2) = &self.tokens[self.pos + 1]
5471            && s2.eq_ignore_ascii_case("nulls")
5472        {
5473            self.advance();
5474            self.advance();
5475            return if is_ignore {
5476                NullTreatment::Ignore
5477            } else {
5478                NullTreatment::Respect
5479            };
5480        }
5481        NullTreatment::Respect
5482    }
5483
5484    /// No frame clause is supported.
5485    #[allow(clippy::type_complexity)] // (partitions, ordered-keys-with-desc) is the natural shape
5486    fn parse_over_clause(
5487        &mut self,
5488    ) -> Result<(Vec<Expr>, Vec<(Expr, bool)>, Option<WindowFrame>), ParseError> {
5489        if !matches!(self.peek(), Token::LParen) {
5490            return Err(self.err(format!("expected '(' after OVER, got {:?}", self.peek())));
5491        }
5492        self.advance();
5493        let mut partition_by = Vec::new();
5494        let mut order_by = Vec::new();
5495        // PARTITION BY ?
5496        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
5497            && s.eq_ignore_ascii_case("partition")
5498        {
5499            self.advance();
5500            if !matches!(self.peek(), Token::By) {
5501                return Err(self.err(format!(
5502                    "expected BY after PARTITION, got {:?}",
5503                    self.peek()
5504                )));
5505            }
5506            self.advance();
5507            loop {
5508                partition_by.push(self.parse_expr(0)?);
5509                if matches!(self.peek(), Token::Comma) {
5510                    self.advance();
5511                    continue;
5512                }
5513                break;
5514            }
5515        }
5516        // ORDER BY ?
5517        if matches!(self.peek(), Token::Order) {
5518            self.advance();
5519            if !matches!(self.peek(), Token::By) {
5520                return Err(self.err(format!("expected BY after ORDER, got {:?}", self.peek())));
5521            }
5522            self.advance();
5523            loop {
5524                let e = self.parse_expr(0)?;
5525                let desc = if matches!(self.peek(), Token::Desc) {
5526                    self.advance();
5527                    true
5528                } else if matches!(self.peek(), Token::Asc) {
5529                    self.advance();
5530                    false
5531                } else {
5532                    false
5533                };
5534                order_by.push((e, desc));
5535                if matches!(self.peek(), Token::Comma) {
5536                    self.advance();
5537                    continue;
5538                }
5539                break;
5540            }
5541        }
5542        // v4.20: optional explicit frame, `ROWS ...` / `RANGE ...`.
5543        // Both keywords come through the lexer as identifiers; match
5544        // case-insensitively.
5545        let mut frame: Option<WindowFrame> = None;
5546        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek() {
5547            let kind = if s.eq_ignore_ascii_case("rows") {
5548                Some(FrameKind::Rows)
5549            } else if s.eq_ignore_ascii_case("range") {
5550                Some(FrameKind::Range)
5551            } else {
5552                None
5553            };
5554            if let Some(kind) = kind {
5555                self.advance();
5556                frame = Some(self.parse_frame_tail(kind)?);
5557            }
5558        }
5559        if !matches!(self.peek(), Token::RParen) {
5560            return Err(self.err(format!(
5561                "expected ')' to close OVER clause, got {:?}",
5562                self.peek()
5563            )));
5564        }
5565        self.advance();
5566        Ok((partition_by, order_by, frame))
5567    }
5568
5569    /// v4.20: parse the tail of an explicit frame, given the `ROWS`
5570    /// or `RANGE` keyword was just consumed. Accepts both
5571    /// `BETWEEN <bound> AND <bound>` and the single-bound shorthand
5572    /// (`ROWS UNBOUNDED PRECEDING`, `ROWS 5 PRECEDING`, etc.) which
5573    /// PG normalises to `BETWEEN <bound> AND CURRENT ROW`.
5574    fn parse_frame_tail(&mut self, kind: FrameKind) -> Result<WindowFrame, ParseError> {
5575        if matches!(self.peek(), Token::Between) {
5576            self.advance();
5577            let start = self.parse_frame_bound()?;
5578            if !matches!(self.peek(), Token::And) {
5579                return Err(self.err(format!("expected AND in frame spec, got {:?}", self.peek())));
5580            }
5581            self.advance();
5582            let end = self.parse_frame_bound()?;
5583            Ok(WindowFrame {
5584                kind,
5585                start,
5586                end: Some(end),
5587            })
5588        } else {
5589            let start = self.parse_frame_bound()?;
5590            Ok(WindowFrame {
5591                kind,
5592                start,
5593                end: None,
5594            })
5595        }
5596    }
5597
5598    /// Parse one frame bound: `UNBOUNDED PRECEDING`, `<n> PRECEDING`,
5599    /// `CURRENT ROW`, `<n> FOLLOWING`, `UNBOUNDED FOLLOWING`.
5600    fn parse_frame_bound(&mut self) -> Result<FrameBound, ParseError> {
5601        // Number-led: "<n> PRECEDING" / "<n> FOLLOWING".
5602        if let Token::Integer(n) = *self.peek() {
5603            self.advance();
5604            let n: u64 = u64::try_from(n).map_err(|_| {
5605                self.err(format!(
5606                    "invalid frame offset {n} — expected non-negative integer"
5607                ))
5608            })?;
5609            let dir = self.expect_ident_like()?;
5610            return if dir.eq_ignore_ascii_case("preceding") {
5611                Ok(FrameBound::OffsetPreceding(n))
5612            } else if dir.eq_ignore_ascii_case("following") {
5613                Ok(FrameBound::OffsetFollowing(n))
5614            } else {
5615                Err(self.err(format!(
5616                    "expected PRECEDING or FOLLOWING after offset, got {dir:?}"
5617                )))
5618            };
5619        }
5620        let first = self.expect_ident_like()?;
5621        if first.eq_ignore_ascii_case("unbounded") {
5622            let dir = self.expect_ident_like()?;
5623            return if dir.eq_ignore_ascii_case("preceding") {
5624                Ok(FrameBound::UnboundedPreceding)
5625            } else if dir.eq_ignore_ascii_case("following") {
5626                Ok(FrameBound::UnboundedFollowing)
5627            } else {
5628                Err(self.err(format!(
5629                    "expected PRECEDING or FOLLOWING after UNBOUNDED, got {dir:?}"
5630                )))
5631            };
5632        }
5633        if first.eq_ignore_ascii_case("current") {
5634            let row = self.expect_ident_like()?;
5635            if !row.eq_ignore_ascii_case("row") {
5636                return Err(self.err(format!("expected ROW after CURRENT, got {row:?}")));
5637            }
5638            return Ok(FrameBound::CurrentRow);
5639        }
5640        Err(self.err(format!(
5641            "expected frame bound (UNBOUNDED/CURRENT/<n>), got {first:?}"
5642        )))
5643    }
5644
5645    fn finish_ident_atom(&mut self, first: String) -> Result<Expr, ParseError> {
5646        if matches!(self.peek(), Token::Dot) {
5647            self.advance();
5648            let name = self.expect_ident_like()?;
5649            // v7.14.0 — schema-qualified function call
5650            // `<schema>.<fn>(args)`. PG dumps emit
5651            // `pg_catalog.set_config(...)` in the preamble. SPG
5652            // is single-namespace: drop the schema prefix and
5653            // route the dispatch on the bare function name.
5654            if matches!(self.peek(), Token::LParen) {
5655                return self.finish_ident_atom(name);
5656            }
5657            return Ok(Expr::Column(ColumnName {
5658                qualifier: Some(first),
5659                name,
5660            }));
5661        }
5662        if matches!(self.peek(), Token::LParen) {
5663            self.advance();
5664            // `COUNT(*)` — special-cased here because `*` isn't a normal
5665            // expression token. Lower-case match on `first` since the lexer
5666            // folds identifiers.
5667            if first.eq_ignore_ascii_case("count") && matches!(self.peek(), Token::Star) {
5668                self.advance();
5669                if !matches!(self.peek(), Token::RParen) {
5670                    return Err(self.err(format!(
5671                        "expected ')' after COUNT(*), got {:?}",
5672                        self.peek()
5673                    )));
5674                }
5675                self.advance();
5676                // v4.12: COUNT(*) OVER (...) — same window tail.
5677                let null_treatment = self.parse_null_treatment_modifier();
5678                if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
5679                    && s.eq_ignore_ascii_case("over")
5680                {
5681                    self.advance();
5682                    let (partition_by, order_by, frame) = self.parse_over_clause()?;
5683                    return Ok(Expr::WindowFunction {
5684                        name: "count_star".into(),
5685                        args: Vec::new(),
5686                        partition_by,
5687                        order_by,
5688                        frame,
5689                        null_treatment,
5690                    });
5691                }
5692                return Ok(Expr::FunctionCall {
5693                    name: "count_star".into(),
5694                    args: Vec::new(),
5695                });
5696            }
5697            // Function call. PG-style: zero-or-more comma-separated args.
5698            let mut args = Vec::new();
5699            if !matches!(self.peek(), Token::RParen) {
5700                loop {
5701                    args.push(self.parse_expr(0)?);
5702                    match self.peek() {
5703                        Token::Comma => {
5704                            self.advance();
5705                        }
5706                        Token::RParen => break,
5707                        other => {
5708                            return Err(self.err(format!(
5709                                "expected ',' or ')' in function args, got {other:?}"
5710                            )));
5711                        }
5712                    }
5713                }
5714            }
5715            self.advance(); // consume ')'
5716            // v4.12: window-function tail — `name(args) OVER (...)`.
5717            // Promotes the just-parsed FunctionCall into a
5718            // WindowFunction node carrying partition + order.
5719            // v6.4.2: also accepts `name(args) IGNORE NULLS OVER (...)`
5720            // / `RESPECT NULLS OVER (...)` between the closing paren
5721            // and `OVER`.
5722            let null_treatment = self.parse_null_treatment_modifier();
5723            if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
5724                && s.eq_ignore_ascii_case("over")
5725            {
5726                self.advance();
5727                let (partition_by, order_by, frame) = self.parse_over_clause()?;
5728                return Ok(Expr::WindowFunction {
5729                    name: first,
5730                    args,
5731                    partition_by,
5732                    order_by,
5733                    frame,
5734                    null_treatment,
5735                });
5736            }
5737            return Ok(Expr::FunctionCall { name: first, args });
5738        }
5739        // v7.9.20 — SQL-standard parenless keyword expressions
5740        // (PG treats these as functions called without parens).
5741        // Resolve to a synthetic FunctionCall so the engine's
5742        // eval path reuses the existing function-call routing.
5743        // mailrs G3.
5744        let lc = first.to_ascii_lowercase();
5745        if matches!(
5746            lc.as_str(),
5747            "current_date" | "current_time" | "current_timestamp" | "localtimestamp" | "localtime"
5748        ) {
5749            return Ok(Expr::FunctionCall {
5750                name: lc,
5751                args: Vec::new(),
5752            });
5753        }
5754        Ok(Expr::Column(ColumnName {
5755            qualifier: None,
5756            name: first,
5757        }))
5758    }
5759}
5760
5761/// v6.8.2 — walk an expression tree and return the first column
5762/// reference's bare name. Used by `parse_create_index_stmt_after_create`
5763/// to derive `CreateIndexStatement.column` from an expression
5764/// key (so downstream planner code resolving a primary column
5765/// position keeps working with expression indexes). Returns
5766/// `None` when the expression has no column ref at all — caller
5767/// surfaces that as a parse error.
5768fn extract_first_column(expr: &Expr) -> Option<String> {
5769    match expr {
5770        Expr::Column(cn) => Some(cn.name.clone()),
5771        Expr::FunctionCall { args, .. } => args.iter().find_map(extract_first_column),
5772        Expr::Binary { lhs, rhs, .. } => {
5773            extract_first_column(lhs).or_else(|| extract_first_column(rhs))
5774        }
5775        Expr::Unary { expr: e, .. } => extract_first_column(e),
5776        _ => None,
5777    }
5778}
5779
5780fn maybe_not(expr: Expr, negated: bool) -> Expr {
5781    if negated {
5782        Expr::Unary {
5783            op: UnOp::Not,
5784            expr: Box::new(expr),
5785        }
5786    } else {
5787        expr
5788    }
5789}
5790
5791fn binop_from(tok: &Token) -> Option<(BinOp, u8)> {
5792    let pair = match tok {
5793        Token::Or => (BinOp::Or, 1),
5794        Token::And => (BinOp::And, 2),
5795        Token::Eq => (BinOp::Eq, 4),
5796        Token::NotEq => (BinOp::NotEq, 4),
5797        Token::Lt => (BinOp::Lt, 4),
5798        Token::LtEq => (BinOp::LtEq, 4),
5799        Token::Gt => (BinOp::Gt, 4),
5800        Token::GtEq => (BinOp::GtEq, 4),
5801        // pgvector distance ops all sit on the same rung — tighter than
5802        // comparisons (4) so `col <-> v < threshold` parses correctly.
5803        Token::L2Distance => (BinOp::L2Distance, 5),
5804        Token::InnerProduct => (BinOp::InnerProduct, 5),
5805        Token::CosineDistance => (BinOp::CosineDistance, 5),
5806        Token::Plus => (BinOp::Add, 6),
5807        Token::Minus => (BinOp::Sub, 6),
5808        // `||` sits beside `+`/`-` (matches PG conceptually — concat groups
5809        // by the same level as binary additive arithmetic).
5810        Token::Concat => (BinOp::Concat, 6),
5811        Token::Star => (BinOp::Mul, 7),
5812        Token::Slash => (BinOp::Div, 7),
5813        // v4.14: JSON path ops bind tighter than comparisons (4)
5814        // and additive (6) so `doc->'k' = 'v'` parses correctly.
5815        // Same rung as the multiplicative ops.
5816        Token::JsonGet => (BinOp::JsonGet, 7),
5817        Token::JsonGetText => (BinOp::JsonGetText, 7),
5818        Token::JsonGetPath => (BinOp::JsonGetPath, 7),
5819        Token::JsonGetPathText => (BinOp::JsonGetPathText, 7),
5820        Token::JsonContains => (BinOp::JsonContains, 7),
5821        // v7.12.2 — `@@` binds at the comparison rung (looser than
5822        // arithmetic, tighter than AND / OR). PG places `@@` at
5823        // the same precedence as `=` / `<`, so we follow.
5824        Token::TsMatch => (BinOp::TsMatch, 4),
5825        _ => return None,
5826    };
5827    Some(pair)
5828}
5829
5830#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
5831// `as f32` here is intentional: vector elements widen / narrow into f32 on
5832// purpose. i64 → f32 loses precision past 2^24, f64 → f32 loses precision
5833// past ~15 decimal digits — both are acceptable for a fixed-precision
5834// pgvector column.
5835fn extract_numeric_literal(e: &Expr) -> Option<f32> {
5836    match e {
5837        Expr::Literal(Literal::Integer(n)) => Some(*n as f32),
5838        Expr::Literal(Literal::Float(x)) => Some(*x as f32),
5839        Expr::Unary {
5840            op: UnOp::Neg,
5841            expr,
5842        } => extract_numeric_literal(expr).map(|x| -x),
5843        _ => None,
5844    }
5845}
5846
5847/// Parse the text inside `INTERVAL '...'` into `(months, micros)`. Accepts
5848/// one or more `<n> <unit>` pairs separated by whitespace. `<n>` may be
5849/// negative. Returns `None` if any pair fails to parse or no pair is found.
5850///
5851/// Recognised units (case-insensitive, optional trailing `s`):
5852/// `microsecond`, `millisecond`, `second`, `minute`, `hour`, `day`, `week`,
5853/// `month`, `year`. `week` widens to 7 days; `year` widens to 12 months.
5854pub fn parse_interval_text(s: &str) -> Option<(i32, i64)> {
5855    let parts: Vec<&str> = s.split_whitespace().collect();
5856    if parts.is_empty() || !parts.len().is_multiple_of(2) {
5857        return None;
5858    }
5859    let mut months: i32 = 0;
5860    let mut micros: i64 = 0;
5861    let mut i = 0;
5862    while i < parts.len() {
5863        let n: i64 = parts[i].parse().ok()?;
5864        let unit = parts[i + 1].to_ascii_lowercase();
5865        let unit_stripped = unit.strip_suffix('s').unwrap_or(&unit);
5866        match unit_stripped {
5867            "microsecond" => micros = micros.checked_add(n)?,
5868            "millisecond" => micros = micros.checked_add(n.checked_mul(1_000)?)?,
5869            "second" => micros = micros.checked_add(n.checked_mul(1_000_000)?)?,
5870            "minute" => micros = micros.checked_add(n.checked_mul(60_000_000)?)?,
5871            "hour" => micros = micros.checked_add(n.checked_mul(3_600_000_000)?)?,
5872            "day" => micros = micros.checked_add(n.checked_mul(86_400_000_000)?)?,
5873            "week" => micros = micros.checked_add(n.checked_mul(604_800_000_000)?)?,
5874            "month" => {
5875                let n32 = i32::try_from(n).ok()?;
5876                months = months.checked_add(n32)?;
5877            }
5878            "year" => {
5879                let n32 = i32::try_from(n).ok()?;
5880                months = months.checked_add(n32.checked_mul(12)?)?;
5881            }
5882            _ => return None,
5883        }
5884        i += 2;
5885    }
5886    Some((months, micros))
5887}
5888
5889/// v7.12.4 — map a bare type-name identifier (the form that
5890/// appears in a function arg list or RETURNS clause) to a
5891/// [`ColumnTypeName`]. Returns `None` for unknown / extension
5892/// types so the caller can preserve them as
5893/// [`FunctionArgType::Raw`] / [`FunctionReturn::Other`].
5894///
5895/// Subset of the full column-type grammar — we deliberately
5896/// don't parse parameterised forms (`VARCHAR(n)`, `NUMERIC(p,s)`)
5897/// here because function-arg types in v7.12.4 are mostly the
5898/// bare form (`text`, `int`, `bytea`, …).
5899fn map_type_ident_to_column_type_name(ident: &str) -> Option<ColumnTypeName> {
5900    Some(match ident.to_ascii_lowercase().as_str() {
5901        "smallint" | "tinyint" => ColumnTypeName::SmallInt,
5902        "int" | "integer" | "mediumint" => ColumnTypeName::Int,
5903        "bigint" => ColumnTypeName::BigInt,
5904        "float" | "double" | "real" => ColumnTypeName::Float,
5905        "text" => ColumnTypeName::Text,
5906        "bool" | "boolean" => ColumnTypeName::Bool,
5907        "date" => ColumnTypeName::Date,
5908        "timestamp" | "datetime" => ColumnTypeName::Timestamp,
5909        "timestamptz" => ColumnTypeName::Timestamptz,
5910        "json" => ColumnTypeName::Json,
5911        "jsonb" => ColumnTypeName::Jsonb,
5912        "bytea" | "bytes" => ColumnTypeName::Bytes,
5913        "tsvector" => ColumnTypeName::TsVector,
5914        "tsquery" => ColumnTypeName::TsQuery,
5915        _ => return None,
5916    })
5917}
5918
5919/// v7.12.4 — parse a PL/pgSQL function body (the bytes between
5920/// `$$ ... $$`). Returns the parsed `BEGIN ... END;` block.
5921///
5922/// v7.12.4 grammar (strict subset — IF / LOOP / DECLARE / RAISE
5923/// / embedded SQL land in v7.12.5+):
5924///
5925/// ```text
5926///   body          := [ws] block [ws]
5927///   block         := BEGIN stmt ( ; stmt )* [ ; ] END [ ; ]
5928///   stmt          := assign | return
5929///   assign        := assign_target := expr
5930///   assign_target := ( NEW | OLD ) . ident | ident
5931///   return        := RETURN ( NEW | OLD | NULL | expr )
5932/// ```
5933///
5934/// `expr` is parsed by recursing into the regular `Parser` — so a
5935/// PL/pgSQL `NEW.search_vector := to_tsvector('english',
5936/// NEW.subject || ' ' || NEW.sender)` body shape works without
5937/// the body parser knowing what `to_tsvector` is.
5938///
5939/// Errors here cause the caller to fall back to
5940/// `FunctionBody::Raw` — keeping the CREATE FUNCTION DDL itself
5941/// successful, but the executor will refuse to invoke the
5942/// function with an "unparseable body" error.
5943/// v7.12.4 — public alias for [`parse_plpgsql_body`] re-exported
5944/// from the crate root as `spg_sql::parse_function_body`.
5945pub fn parse_function_body(body: &str) -> Result<PlPgSqlBlock, ParseError> {
5946    parse_plpgsql_body(body)
5947}
5948
5949fn parse_plpgsql_body(body: &str) -> Result<PlPgSqlBlock, ParseError> {
5950    // Use the regular lexer on the body text. The trailing
5951    // `END;` may or may not have a semicolon; the lexer treats
5952    // both forms identically.
5953    let tokens = lexer::tokenize(body).map_err(|e| ParseError {
5954        message: alloc::format!("plpgsql body lex error: {e}"),
5955        token_pos: 0,
5956    })?;
5957    let mut parser = Parser::new(tokens);
5958    parser.parse_plpgsql_block()
5959}
5960
5961#[cfg(test)]
5962mod tests {
5963    use super::*;
5964    use alloc::string::ToString;
5965
5966    fn parse(s: &str) -> Statement {
5967        parse_statement(s).expect("parse ok")
5968    }
5969
5970    fn lit_int(n: i64) -> Expr {
5971        Expr::Literal(Literal::Integer(n))
5972    }
5973
5974    fn col(name: &str) -> Expr {
5975        Expr::Column(ColumnName {
5976            qualifier: None,
5977            name: name.into(),
5978        })
5979    }
5980
5981    #[test]
5982    fn select_single_integer() {
5983        let s = parse("SELECT 1");
5984        let Statement::Select(s) = s else {
5985            panic!("expected SELECT")
5986        };
5987        assert_eq!(s.items.len(), 1);
5988        assert!(s.from.is_none());
5989        assert!(s.where_.is_none());
5990    }
5991
5992    #[test]
5993    fn select_multiple_literal_kinds() {
5994        let s = parse("SELECT 1, 'hi', NULL, TRUE, 1.5");
5995        let Statement::Select(s) = s else {
5996            panic!("expected SELECT")
5997        };
5998        assert_eq!(s.items.len(), 5);
5999    }
6000
6001    #[test]
6002    fn select_wildcard_from_table() {
6003        let s = parse("SELECT * FROM users");
6004        let Statement::Select(s) = s else {
6005            panic!("expected SELECT")
6006        };
6007        assert!(matches!(s.items[..], [SelectItem::Wildcard]));
6008        assert_eq!(s.from.as_ref().unwrap().primary.name, "users");
6009    }
6010
6011    #[test]
6012    fn select_with_table_alias() {
6013        let s = parse("SELECT * FROM users AS u");
6014        let Statement::Select(s) = s else {
6015            panic!("expected SELECT")
6016        };
6017        let t = &s.from.as_ref().unwrap().primary;
6018        assert_eq!(t.name, "users");
6019        assert_eq!(t.alias.as_deref(), Some("u"));
6020    }
6021
6022    #[test]
6023    fn select_with_where_eq() {
6024        let s = parse("SELECT a FROM t WHERE a = 1");
6025        let Statement::Select(s) = s else {
6026            panic!("expected SELECT")
6027        };
6028        let w = s.where_.unwrap();
6029        assert_eq!(
6030            w,
6031            Expr::Binary {
6032                lhs: Box::new(col("a")),
6033                op: BinOp::Eq,
6034                rhs: Box::new(lit_int(1)),
6035            }
6036        );
6037    }
6038
6039    #[test]
6040    fn arithmetic_precedence() {
6041        let s = parse("SELECT 1 + 2 * 3");
6042        let Statement::Select(s) = s else {
6043            panic!("expected SELECT")
6044        };
6045        let SelectItem::Expr { expr, .. } = &s.items[0] else {
6046            panic!("wildcard?")
6047        };
6048        assert_eq!(
6049            expr,
6050            &Expr::Binary {
6051                lhs: Box::new(lit_int(1)),
6052                op: BinOp::Add,
6053                rhs: Box::new(Expr::Binary {
6054                    lhs: Box::new(lit_int(2)),
6055                    op: BinOp::Mul,
6056                    rhs: Box::new(lit_int(3)),
6057                }),
6058            }
6059        );
6060    }
6061
6062    #[test]
6063    fn parentheses_override_precedence() {
6064        let s = parse("SELECT (1 + 2) * 3");
6065        let Statement::Select(s) = s else {
6066            panic!("expected SELECT")
6067        };
6068        let SelectItem::Expr { expr, .. } = &s.items[0] else {
6069            panic!()
6070        };
6071        assert_eq!(
6072            expr,
6073            &Expr::Binary {
6074                lhs: Box::new(Expr::Binary {
6075                    lhs: Box::new(lit_int(1)),
6076                    op: BinOp::Add,
6077                    rhs: Box::new(lit_int(2)),
6078                }),
6079                op: BinOp::Mul,
6080                rhs: Box::new(lit_int(3)),
6081            }
6082        );
6083    }
6084
6085    #[test]
6086    fn not_binds_below_comparison() {
6087        // `NOT a = 1` should parse as `NOT (a = 1)`.
6088        let s = parse("SELECT NOT a = 1 FROM t");
6089        let Statement::Select(s) = s else {
6090            panic!("expected SELECT")
6091        };
6092        let SelectItem::Expr { expr, .. } = &s.items[0] else {
6093            panic!()
6094        };
6095        assert_eq!(
6096            expr,
6097            &Expr::Unary {
6098                op: UnOp::Not,
6099                expr: Box::new(Expr::Binary {
6100                    lhs: Box::new(col("a")),
6101                    op: BinOp::Eq,
6102                    rhs: Box::new(lit_int(1)),
6103                }),
6104            }
6105        );
6106    }
6107
6108    #[test]
6109    fn unary_minus_binds_above_multiplication() {
6110        // `-a * 2` should be `(-a) * 2`.
6111        let s = parse("SELECT -a * 2 FROM t");
6112        let Statement::Select(s) = s else {
6113            panic!("expected SELECT")
6114        };
6115        let SelectItem::Expr { expr, .. } = &s.items[0] else {
6116            panic!()
6117        };
6118        assert_eq!(
6119            expr,
6120            &Expr::Binary {
6121                lhs: Box::new(Expr::Unary {
6122                    op: UnOp::Neg,
6123                    expr: Box::new(col("a")),
6124                }),
6125                op: BinOp::Mul,
6126                rhs: Box::new(lit_int(2)),
6127            }
6128        );
6129    }
6130
6131    #[test]
6132    fn qualified_column() {
6133        let s = parse("SELECT t.col FROM t");
6134        let Statement::Select(s) = s else {
6135            panic!("expected SELECT")
6136        };
6137        let SelectItem::Expr { expr, .. } = &s.items[0] else {
6138            panic!()
6139        };
6140        assert_eq!(
6141            expr,
6142            &Expr::Column(ColumnName {
6143                qualifier: Some("t".into()),
6144                name: "col".into()
6145            })
6146        );
6147    }
6148
6149    #[test]
6150    fn select_item_alias_with_as() {
6151        let s = parse("SELECT a AS y FROM t");
6152        let Statement::Select(s) = s else {
6153            panic!("expected SELECT")
6154        };
6155        let SelectItem::Expr { alias, .. } = &s.items[0] else {
6156            panic!()
6157        };
6158        assert_eq!(alias.as_deref(), Some("y"));
6159    }
6160
6161    #[test]
6162    fn trailing_semicolon_accepted() {
6163        let s = parse("SELECT 1;");
6164        let Statement::Select(s) = s else {
6165            panic!("expected SELECT")
6166        };
6167        assert_eq!(s.items.len(), 1);
6168    }
6169
6170    #[test]
6171    fn boolean_chain_with_and_or_not() {
6172        // (NOT a) OR (b AND (NOT c))
6173        let s = parse("SELECT NOT a OR b AND NOT c FROM t");
6174        let Statement::Select(s) = s else {
6175            panic!("expected SELECT")
6176        };
6177        let SelectItem::Expr { expr, .. } = &s.items[0] else {
6178            panic!()
6179        };
6180        let expected = Expr::Binary {
6181            lhs: Box::new(Expr::Unary {
6182                op: UnOp::Not,
6183                expr: Box::new(col("a")),
6184            }),
6185            op: BinOp::Or,
6186            rhs: Box::new(Expr::Binary {
6187                lhs: Box::new(col("b")),
6188                op: BinOp::And,
6189                rhs: Box::new(Expr::Unary {
6190                    op: UnOp::Not,
6191                    expr: Box::new(col("c")),
6192                }),
6193            }),
6194        };
6195        assert_eq!(expr, &expected);
6196    }
6197
6198    #[test]
6199    fn empty_input_errors() {
6200        let err = parse_statement("").unwrap_err();
6201        assert!(err.message.contains("SELECT"));
6202    }
6203
6204    #[test]
6205    fn unmatched_paren_errors() {
6206        assert!(parse_statement("SELECT (1 + 2").is_err());
6207    }
6208
6209    #[test]
6210    fn display_round_trip_simple_select() {
6211        let original = parse("SELECT a + 1 FROM t WHERE a > 0");
6212        let text = original.to_string();
6213        let again = parse_statement(&text).expect("re-parse");
6214        assert_eq!(original, again);
6215    }
6216
6217    // --- CREATE TABLE & INSERT (v0.3) ---------------------------------------
6218
6219    #[test]
6220    fn create_table_single_column() {
6221        let s = parse("CREATE TABLE foo (a INT)");
6222        let Statement::CreateTable(c) = s else {
6223            panic!("expected CreateTable")
6224        };
6225        assert_eq!(c.name, "foo");
6226        assert_eq!(c.columns.len(), 1);
6227        assert_eq!(c.columns[0].name, "a");
6228        assert_eq!(c.columns[0].ty, ColumnTypeName::Int);
6229        assert!(c.columns[0].nullable);
6230    }
6231
6232    #[test]
6233    fn create_table_multi_column_with_not_null_mix() {
6234        let s = parse("CREATE TABLE u (id INT NOT NULL, name TEXT, score FLOAT NOT NULL, ok BOOL)");
6235        let Statement::CreateTable(c) = s else {
6236            panic!()
6237        };
6238        assert_eq!(c.columns.len(), 4);
6239        assert_eq!(c.columns[0].ty, ColumnTypeName::Int);
6240        assert!(!c.columns[0].nullable);
6241        assert_eq!(c.columns[1].ty, ColumnTypeName::Text);
6242        assert!(c.columns[1].nullable);
6243        assert_eq!(c.columns[2].ty, ColumnTypeName::Float);
6244        assert!(!c.columns[2].nullable);
6245        assert_eq!(c.columns[3].ty, ColumnTypeName::Bool);
6246    }
6247
6248    #[test]
6249    fn create_table_bigint_supported() {
6250        let s = parse("CREATE TABLE accounts (id BIGINT NOT NULL)");
6251        let Statement::CreateTable(c) = s else {
6252            panic!()
6253        };
6254        assert_eq!(c.columns[0].ty, ColumnTypeName::BigInt);
6255    }
6256
6257    #[test]
6258    fn create_table_vector_default_is_f32() {
6259        let s = parse("CREATE TABLE t (v VECTOR(128))");
6260        let Statement::CreateTable(c) = s else {
6261            panic!()
6262        };
6263        assert_eq!(
6264            c.columns[0].ty,
6265            ColumnTypeName::Vector {
6266                dim: 128,
6267                encoding: VecEncoding::F32,
6268            },
6269        );
6270    }
6271
6272    #[test]
6273    fn create_table_vector_using_sq8() {
6274        // v6.0.1: `USING SQ8` selects scalar-quantised encoding.
6275        // Case-insensitive on both `USING` and the encoding name.
6276        for sql in [
6277            "CREATE TABLE t (v VECTOR(128) USING SQ8)",
6278            "CREATE TABLE t (v VECTOR(128) using sq8)",
6279        ] {
6280            let s = parse(sql);
6281            let Statement::CreateTable(c) = s else {
6282                panic!()
6283            };
6284            assert_eq!(
6285                c.columns[0].ty,
6286                ColumnTypeName::Vector {
6287                    dim: 128,
6288                    encoding: VecEncoding::Sq8,
6289                },
6290                "{sql}",
6291            );
6292        }
6293    }
6294
6295    #[test]
6296    fn create_table_vector_using_unknown_errors() {
6297        let err = parse_statement("CREATE TABLE t (v VECTOR(8) USING PQ8)").unwrap_err();
6298        assert!(
6299            err.message.contains("unknown vector encoding"),
6300            "got: {}",
6301            err.message
6302        );
6303    }
6304
6305    #[test]
6306    fn vector_using_sq8_display_roundtrips() {
6307        // The Display impl must produce text that re-parses to the
6308        // same AST. Guard for the v6.0.1 `USING SQ8` suffix.
6309        let s = parse("CREATE TABLE t (v VECTOR(64) USING SQ8)");
6310        let Statement::CreateTable(c) = s else {
6311            panic!()
6312        };
6313        assert_eq!(c.columns[0].ty.to_string(), "VECTOR(64) USING SQ8");
6314    }
6315
6316    #[test]
6317    fn parser_recognises_placeholders() {
6318        use crate::ast::{Expr, SelectItem, Statement};
6319        // $N in expression position parses as Expr::Placeholder(N).
6320        let s = parse("SELECT $1, $2 + 1 FROM t WHERE x = $3");
6321        let Statement::Select(sel) = s else { panic!() };
6322        assert!(matches!(
6323            sel.items[0],
6324            SelectItem::Expr {
6325                expr: Expr::Placeholder(1),
6326                alias: None
6327            }
6328        ));
6329        // $2 + 1
6330        let SelectItem::Expr {
6331            expr: Expr::Binary { lhs, rhs, .. },
6332            ..
6333        } = &sel.items[1]
6334        else {
6335            panic!()
6336        };
6337        assert!(matches!(**lhs, Expr::Placeholder(2)));
6338        assert!(matches!(**rhs, Expr::Literal(Literal::Integer(1))));
6339        // WHERE x = $3
6340        let Some(Expr::Binary { rhs, .. }) = sel.where_.as_ref() else {
6341            panic!()
6342        };
6343        assert!(matches!(**rhs, Expr::Placeholder(3)));
6344    }
6345
6346    #[test]
6347    fn parser_rejects_dollar_zero() {
6348        // $0 is not valid in PG; the lexer rejects it.
6349        assert!(parse_statement("SELECT $0").is_err());
6350    }
6351
6352    #[test]
6353    fn placeholder_display_roundtrips() {
6354        // The Display impl must produce text that re-lexes to the
6355        // same Placeholder token.
6356        let s = parse("SELECT $42 FROM t");
6357        let printed = s.to_string();
6358        assert!(printed.contains("$42"));
6359        let again = parse(&printed);
6360        assert_eq!(s, again);
6361    }
6362
6363    #[test]
6364    fn alter_index_rebuild_bare() {
6365        use crate::ast::{AlterIndexTarget, Statement};
6366        let s = parse("ALTER INDEX my_idx REBUILD");
6367        let Statement::AlterIndex(a) = s else {
6368            panic!("expected AlterIndex, got {s:?}")
6369        };
6370        assert_eq!(a.name, "my_idx");
6371        assert_eq!(a.target, AlterIndexTarget::Rebuild { encoding: None });
6372    }
6373
6374    #[test]
6375    fn alter_index_rebuild_with_encoding() {
6376        use crate::ast::{AlterIndexTarget, Statement};
6377        for (sql, want) in [
6378            (
6379                "ALTER INDEX my_idx REBUILD WITH (encoding = F32)",
6380                VecEncoding::F32,
6381            ),
6382            (
6383                "ALTER INDEX my_idx REBUILD WITH (encoding = sq8)",
6384                VecEncoding::Sq8,
6385            ),
6386            (
6387                "ALTER INDEX my_idx REBUILD WITH (encoding = HALF)",
6388                VecEncoding::F16,
6389            ),
6390        ] {
6391            let s = parse(sql);
6392            let Statement::AlterIndex(a) = s else {
6393                panic!("{sql}: expected AlterIndex")
6394            };
6395            assert_eq!(a.name, "my_idx");
6396            assert_eq!(
6397                a.target,
6398                AlterIndexTarget::Rebuild {
6399                    encoding: Some(want)
6400                },
6401                "{sql}"
6402            );
6403        }
6404    }
6405
6406    #[test]
6407    fn alter_index_rebuild_unknown_encoding_errors() {
6408        let err = parse_statement("ALTER INDEX my_idx REBUILD WITH (encoding = PQ8)").unwrap_err();
6409        assert!(
6410            err.message.contains("unknown vector encoding"),
6411            "got: {}",
6412            err.message
6413        );
6414    }
6415
6416    #[test]
6417    fn alter_index_rebuild_display_roundtrips() {
6418        for (input, want) in [
6419            ("ALTER INDEX my_idx REBUILD", "ALTER INDEX my_idx REBUILD"),
6420            (
6421                "ALTER INDEX my_idx REBUILD WITH (encoding = SQ8)",
6422                "ALTER INDEX my_idx REBUILD WITH (encoding = SQ8)",
6423            ),
6424            (
6425                "ALTER INDEX my_idx REBUILD WITH (encoding = HALF)",
6426                "ALTER INDEX my_idx REBUILD WITH (encoding = HALF)",
6427            ),
6428        ] {
6429            let s = parse(input);
6430            assert_eq!(s.to_string(), want);
6431        }
6432    }
6433
6434    #[test]
6435    fn create_table_unknown_type_errors() {
6436        // v4.9: JSON is now real; pick an actually unsupported keyword
6437        // (XML never landed and isn't planned).
6438        let err = parse_statement("CREATE TABLE x (a xml)").unwrap_err();
6439        assert!(err.message.contains("unsupported column type"));
6440    }
6441
6442    #[test]
6443    fn create_table_missing_table_keyword_errors() {
6444        assert!(parse_statement("CREATE x (a INT)").is_err());
6445    }
6446
6447    #[test]
6448    fn insert_single_value() {
6449        let s = parse("INSERT INTO foo VALUES (42)");
6450        let Statement::Insert(i) = s else {
6451            panic!("expected Insert")
6452        };
6453        assert_eq!(i.table, "foo");
6454        assert_eq!(i.rows.len(), 1);
6455        assert_eq!(i.rows[0].len(), 1);
6456        assert!(matches!(i.rows[0][0], Expr::Literal(Literal::Integer(42))));
6457    }
6458
6459    #[test]
6460    fn insert_multi_value_with_mixed_literals() {
6461        let s = parse("INSERT INTO foo VALUES (1, 'hi', 3.14, TRUE, NULL)");
6462        let Statement::Insert(i) = s else { panic!() };
6463        assert_eq!(i.rows.len(), 1);
6464        assert_eq!(i.rows[0].len(), 5);
6465    }
6466
6467    #[test]
6468    fn insert_missing_into_errors() {
6469        assert!(parse_statement("INSERT foo VALUES (1)").is_err());
6470    }
6471
6472    #[test]
6473    fn create_table_round_trip() {
6474        let original =
6475            parse("CREATE TABLE foo (id BIGINT NOT NULL, label TEXT, score FLOAT NOT NULL)");
6476        let text = original.to_string();
6477        let again = parse_statement(&text).expect("re-parse");
6478        assert_eq!(original, again);
6479    }
6480
6481    #[test]
6482    fn insert_round_trip_with_negation_and_string() {
6483        let original = parse("INSERT INTO t VALUES (-1, 'it''s', NULL)");
6484        let text = original.to_string();
6485        let again = parse_statement(&text).expect("re-parse");
6486        assert_eq!(original, again);
6487    }
6488
6489    #[test]
6490    fn unknown_keyword_at_statement_start_errors() {
6491        // v4.4: UPDATE is real SQL now. Use a fabricated keyword so
6492        // the top-level dispatch still has no branch to take.
6493        let err = parse_statement("FROBNICATE foo SET x = 1").unwrap_err();
6494        assert!(err.message.contains("expected SELECT"));
6495    }
6496
6497    // --- v0.8 CREATE INDEX --------------------------------------------------
6498
6499    #[test]
6500    fn create_index_basic() {
6501        let s = parse("CREATE INDEX idx_id ON users (id)");
6502        let Statement::CreateIndex(c) = s else {
6503            panic!("expected CreateIndex")
6504        };
6505        assert_eq!(c.name, "idx_id");
6506        assert_eq!(c.table, "users");
6507        assert_eq!(c.column, "id");
6508    }
6509
6510    #[test]
6511    fn create_index_missing_on_errors() {
6512        assert!(parse_statement("CREATE INDEX foo users (id)").is_err());
6513    }
6514
6515    #[test]
6516    fn create_index_missing_paren_errors() {
6517        assert!(parse_statement("CREATE INDEX foo ON users id").is_err());
6518    }
6519
6520    #[test]
6521    fn create_index_round_trip() {
6522        let original = parse("CREATE INDEX by_name ON users (name)");
6523        let again = parse_statement(&original.to_string()).unwrap();
6524        assert_eq!(original, again);
6525    }
6526
6527    // --- v7.9.29 CREATE UNIQUE INDEX [WHERE pred] (mailrs K1) -------------
6528
6529    #[test]
6530    fn create_unique_index_basic() {
6531        let s = parse("CREATE UNIQUE INDEX uq_x ON t (a)");
6532        let Statement::CreateIndex(c) = s else {
6533            panic!("expected CreateIndex");
6534        };
6535        assert!(c.is_unique);
6536        assert_eq!(c.column, "a");
6537        assert!(c.partial_predicate.is_none());
6538    }
6539
6540    #[test]
6541    fn create_unique_index_partial() {
6542        // mailrs's email_templates "one default per user" shape.
6543        let s = parse(
6544            "CREATE UNIQUE INDEX idx_email_templates_user_default \
6545             ON email_templates (user_address) WHERE is_default = true",
6546        );
6547        let Statement::CreateIndex(c) = s else {
6548            panic!("expected CreateIndex");
6549        };
6550        assert!(c.is_unique);
6551        assert_eq!(c.table, "email_templates");
6552        assert_eq!(c.column, "user_address");
6553        assert!(c.partial_predicate.is_some());
6554    }
6555
6556    #[test]
6557    fn create_unique_index_composite_with_predicate() {
6558        // mailrs's calendar_events instance: composite columns.
6559        let s = parse(
6560            "CREATE UNIQUE INDEX uq_calendar_events_instance \
6561             ON calendar_events (calendar_id, uid, recurrence_id) \
6562             WHERE recurrence_id IS NOT NULL",
6563        );
6564        let Statement::CreateIndex(c) = s else {
6565            panic!("expected CreateIndex");
6566        };
6567        assert!(c.is_unique);
6568        assert_eq!(c.column, "calendar_id");
6569        assert_eq!(
6570            c.extra_columns,
6571            vec!["uid".to_string(), "recurrence_id".to_string()]
6572        );
6573        assert!(c.partial_predicate.is_some());
6574    }
6575
6576    #[test]
6577    fn create_unique_index_using_btree_ok() {
6578        let s = parse("CREATE UNIQUE INDEX uq_x ON t USING btree (a)");
6579        assert!(matches!(s, Statement::CreateIndex(ref c) if c.is_unique));
6580    }
6581
6582    #[test]
6583    fn create_unique_index_using_hnsw_rejected() {
6584        let err =
6585            parse_statement("CREATE UNIQUE INDEX uq_v ON t USING hnsw (embedding)").unwrap_err();
6586        assert!(err.message.contains("UNIQUE"), "{}", err.message);
6587    }
6588
6589    #[test]
6590    fn create_unique_index_round_trip() {
6591        let original = parse(
6592            "CREATE UNIQUE INDEX uq_calendar_events_master \
6593             ON calendar_events (calendar_id, uid) WHERE recurrence_id IS NULL",
6594        );
6595        let again = parse_statement(&original.to_string()).unwrap();
6596        assert_eq!(original, again);
6597    }
6598
6599    #[test]
6600    fn create_unique_without_index_errors() {
6601        let err = parse_statement("CREATE UNIQUE TABLE t (a INT)").unwrap_err();
6602        assert!(err.message.contains("INDEX"), "{}", err.message);
6603    }
6604
6605    // --- v7.10.4 BYTES / BYTEA column type (Epic 1) ----------------------
6606
6607    #[test]
6608    fn create_table_bytea_column() {
6609        let s = parse("CREATE TABLE t (id INT NOT NULL, payload BYTEA NOT NULL)");
6610        let Statement::CreateTable(c) = s else {
6611            panic!("expected CreateTable");
6612        };
6613        assert_eq!(c.columns.len(), 2);
6614        assert_eq!(c.columns[1].ty, ColumnTypeName::Bytes);
6615        assert!(!c.columns[1].nullable);
6616    }
6617
6618    #[test]
6619    fn create_table_bytes_alias_column() {
6620        let s = parse("CREATE TABLE t (blob BYTES)");
6621        let Statement::CreateTable(c) = s else {
6622            panic!("expected CreateTable");
6623        };
6624        assert_eq!(c.columns[0].ty, ColumnTypeName::Bytes);
6625    }
6626
6627    #[test]
6628    fn bytea_round_trip_display() {
6629        let original = parse("CREATE TABLE t (a BYTEA NOT NULL)");
6630        let again = parse_statement(&original.to_string()).unwrap();
6631        assert_eq!(original, again);
6632    }
6633
6634    // --- v0.9 transactions -------------------------------------------------
6635
6636    #[test]
6637    fn begin_commit_rollback_parse_as_unit_variants() {
6638        assert_eq!(parse("BEGIN"), Statement::Begin);
6639        assert_eq!(parse("COMMIT"), Statement::Commit);
6640        assert_eq!(parse("ROLLBACK"), Statement::Rollback);
6641        // Trailing semicolons accepted too.
6642        assert_eq!(parse("BEGIN;"), Statement::Begin);
6643    }
6644
6645    // --- v1.2: pgvector distance ops + ::vector cast --------------------
6646
6647    #[test]
6648    fn inner_product_binop_parses() {
6649        let s = parse("SELECT v <#> [1.0, 2.0] FROM t");
6650        let Statement::Select(s) = s else { panic!() };
6651        let SelectItem::Expr { expr, .. } = &s.items[0] else {
6652            panic!()
6653        };
6654        assert!(matches!(
6655            expr,
6656            Expr::Binary {
6657                op: BinOp::InnerProduct,
6658                ..
6659            }
6660        ));
6661    }
6662
6663    #[test]
6664    fn cosine_distance_binop_parses() {
6665        let s = parse("SELECT v <=> [1.0, 2.0] FROM t");
6666        let Statement::Select(s) = s else { panic!() };
6667        let SelectItem::Expr { expr, .. } = &s.items[0] else {
6668            panic!()
6669        };
6670        assert!(matches!(
6671            expr,
6672            Expr::Binary {
6673                op: BinOp::CosineDistance,
6674                ..
6675            }
6676        ));
6677    }
6678
6679    #[test]
6680    fn vector_cast_postfix_wraps_string_literal() {
6681        let s = parse("SELECT '[1,2,3]'::vector FROM t");
6682        let Statement::Select(s) = s else { panic!() };
6683        let SelectItem::Expr { expr, .. } = &s.items[0] else {
6684            panic!()
6685        };
6686        assert!(matches!(
6687            expr,
6688            Expr::Cast {
6689                target: CastTarget::Vector,
6690                ..
6691            }
6692        ));
6693    }
6694
6695    #[test]
6696    fn unsupported_cast_target_errors() {
6697        // `::numeric` isn't in the v1.3 cast target set.
6698        let err = parse_statement("SELECT 1::numeric FROM t").unwrap_err();
6699        assert!(err.message.contains("unsupported cast target"));
6700    }
6701
6702    #[test]
6703    fn tx_statements_round_trip() {
6704        for q in ["BEGIN", "COMMIT", "ROLLBACK"] {
6705            let original = parse(q);
6706            let again = parse_statement(&original.to_string()).unwrap();
6707            assert_eq!(original, again);
6708        }
6709    }
6710
6711    #[test]
6712    fn interval_text_parsing_units() {
6713        // Single unit.
6714        assert_eq!(parse_interval_text("1 day"), Some((0, 86_400_000_000)));
6715        assert_eq!(parse_interval_text("1 second"), Some((0, 1_000_000)));
6716        assert_eq!(parse_interval_text("1 month"), Some((1, 0)));
6717        assert_eq!(parse_interval_text("2 years"), Some((24, 0)));
6718        // Compound spans accumulate.
6719        assert_eq!(parse_interval_text("1 year 6 months"), Some((18, 0)));
6720        assert_eq!(
6721            parse_interval_text("1 day 2 hours"),
6722            Some((0, 86_400_000_000 + 7_200_000_000))
6723        );
6724        // Negative numbers carry through.
6725        assert_eq!(parse_interval_text("-1 day"), Some((0, -86_400_000_000)));
6726        // Bad shapes return None.
6727        assert_eq!(parse_interval_text(""), None);
6728        assert_eq!(parse_interval_text("garbage"), None);
6729        assert_eq!(parse_interval_text("1 fortnight"), None);
6730        assert_eq!(parse_interval_text("1"), None);
6731    }
6732
6733    #[test]
6734    fn interval_literal_roundtrips_via_display() {
6735        let parsed = parse("SELECT INTERVAL '1 day 2 hours'");
6736        let s = parsed.to_string();
6737        // Display preserves the original text verbatim.
6738        assert!(s.contains("INTERVAL '1 day 2 hours'"), "got: {s}");
6739        // And re-parsing yields a structurally equal statement.
6740        let again = parse_statement(&s).unwrap();
6741        assert_eq!(parsed, again);
6742    }
6743
6744    // ── v6.1.2: CREATE / DROP PUBLICATION ────────────────────
6745
6746    #[test]
6747    fn parser_recognises_create_publication_bare() {
6748        let s = parse("CREATE PUBLICATION pub_a");
6749        let Statement::CreatePublication(p) = s else {
6750            panic!("expected CreatePublication, got {s:?}")
6751        };
6752        assert_eq!(p.name, "pub_a");
6753        assert_eq!(p.scope, PublicationScope::AllTables);
6754    }
6755
6756    #[test]
6757    fn parser_recognises_create_publication_for_all_tables() {
6758        let s = parse("CREATE PUBLICATION pub_a FOR ALL TABLES");
6759        let Statement::CreatePublication(p) = s else {
6760            panic!("expected CreatePublication, got {s:?}")
6761        };
6762        assert_eq!(p.name, "pub_a");
6763        assert_eq!(p.scope, PublicationScope::AllTables);
6764    }
6765
6766    #[test]
6767    fn parser_recognises_drop_publication() {
6768        let s = parse("DROP PUBLICATION pub_a");
6769        let Statement::DropPublication(name) = s else {
6770            panic!("expected DropPublication, got {s:?}")
6771        };
6772        assert_eq!(name, "pub_a");
6773    }
6774
6775    #[test]
6776    fn parser_recognises_for_table_list() {
6777        let s = parse("CREATE PUBLICATION pub_a FOR TABLE t1, t2, t3");
6778        let Statement::CreatePublication(p) = s else {
6779            panic!("expected CreatePublication, got {s:?}")
6780        };
6781        assert_eq!(p.name, "pub_a");
6782        let PublicationScope::ForTables(ts) = p.scope else {
6783            panic!("expected ForTables scope")
6784        };
6785        assert_eq!(ts, alloc::vec!["t1", "t2", "t3"]);
6786    }
6787
6788    #[test]
6789    fn parser_recognises_for_tables_plural() {
6790        // PG 19 accepts both `FOR TABLE` and `FOR TABLES` — match.
6791        let s = parse("CREATE PUBLICATION pub_a FOR TABLES t1, t2");
6792        let Statement::CreatePublication(p) = s else {
6793            panic!("expected CreatePublication, got {s:?}")
6794        };
6795        let PublicationScope::ForTables(ts) = p.scope else {
6796            panic!("expected ForTables")
6797        };
6798        assert_eq!(ts, alloc::vec!["t1", "t2"]);
6799    }
6800
6801    #[test]
6802    fn parser_recognises_for_all_tables_except_list() {
6803        let s = parse("CREATE PUBLICATION p FOR ALL TABLES EXCEPT t1, t2");
6804        let Statement::CreatePublication(p) = s else {
6805            panic!()
6806        };
6807        let PublicationScope::AllTablesExcept(ts) = p.scope else {
6808            panic!("expected AllTablesExcept")
6809        };
6810        assert_eq!(ts, alloc::vec!["t1", "t2"]);
6811    }
6812
6813    #[test]
6814    fn parser_rejects_for_table_with_empty_list() {
6815        // `FOR TABLE` with nothing after is a parse error.
6816        let err = parse_statement("CREATE PUBLICATION p FOR TABLE")
6817            .expect_err("must error on empty list");
6818        // No specific message asserted — the call falls through to
6819        // expect_ident_like which yields "expected identifier, got …".
6820        assert!(!err.message.is_empty());
6821    }
6822
6823    #[test]
6824    fn parser_recognises_show_publications() {
6825        // v6.1.3 — SHOW PUBLICATIONS lands here. PUBLICATIONS is a
6826        // bare ident in this position, NOT a reserved keyword.
6827        let s = parse("SHOW PUBLICATIONS");
6828        assert!(matches!(s, Statement::ShowPublications));
6829    }
6830
6831    // ── v6.1.4: CREATE / DROP SUBSCRIPTION + SHOW SUBSCRIPTIONS ─
6832
6833    #[test]
6834    fn parser_recognises_create_subscription_single_publication() {
6835        let s = parse(
6836            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=127.0.0.1 port=20002' PUBLICATION pub_a",
6837        );
6838        let Statement::CreateSubscription(c) = s else {
6839            panic!("expected CreateSubscription, got {s:?}")
6840        };
6841        assert_eq!(c.name, "sub_a");
6842        assert_eq!(c.conn_str, "host=127.0.0.1 port=20002");
6843        assert_eq!(c.publications, alloc::vec!["pub_a"]);
6844    }
6845
6846    #[test]
6847    fn parser_recognises_create_subscription_multi_publication() {
6848        let s = parse("CREATE SUBSCRIPTION sub_a CONNECTION 'host=h' PUBLICATION p1, p2, p3");
6849        let Statement::CreateSubscription(c) = s else {
6850            panic!()
6851        };
6852        assert_eq!(c.publications, alloc::vec!["p1", "p2", "p3"]);
6853    }
6854
6855    #[test]
6856    fn parser_rejects_create_subscription_missing_connection() {
6857        let err = parse_statement("CREATE SUBSCRIPTION s PUBLICATION p")
6858            .expect_err("must error on missing CONNECTION");
6859        assert!(err.message.contains("CONNECTION"), "got: {}", err.message);
6860    }
6861
6862    #[test]
6863    fn parser_rejects_create_subscription_missing_publication() {
6864        let err = parse_statement("CREATE SUBSCRIPTION s CONNECTION 'host=x'")
6865            .expect_err("must error on missing PUBLICATION");
6866        assert!(err.message.contains("PUBLICATION"), "got: {}", err.message);
6867    }
6868
6869    #[test]
6870    fn parser_recognises_drop_subscription() {
6871        let s = parse("DROP SUBSCRIPTION sub_a");
6872        let Statement::DropSubscription(name) = s else {
6873            panic!("expected DropSubscription, got {s:?}")
6874        };
6875        assert_eq!(name, "sub_a");
6876    }
6877
6878    #[test]
6879    fn parser_recognises_show_subscriptions() {
6880        let s = parse("SHOW SUBSCRIPTIONS");
6881        assert!(matches!(s, Statement::ShowSubscriptions));
6882    }
6883
6884    #[test]
6885    fn parser_recognises_wait_for_wal_position_no_timeout() {
6886        let s = parse("WAIT FOR WAL POSITION 12345");
6887        let Statement::WaitForWalPosition { pos, timeout_ms } = s else {
6888            panic!("expected WaitForWalPosition, got {s:?}")
6889        };
6890        assert_eq!(pos, 12345);
6891        assert!(timeout_ms.is_none());
6892    }
6893
6894    #[test]
6895    fn parser_recognises_wait_for_wal_position_with_timeout() {
6896        let s = parse("WAIT FOR WAL POSITION 67890 WITH TIMEOUT 5000");
6897        let Statement::WaitForWalPosition { pos, timeout_ms } = s else {
6898            panic!()
6899        };
6900        assert_eq!(pos, 67890);
6901        assert_eq!(timeout_ms, Some(5000));
6902    }
6903
6904    #[test]
6905    fn parser_rejects_wait_with_negative_position() {
6906        // The lexer treats `-` as a token; `expect_u64_literal`
6907        // only sees the Integer that follows, so the negative
6908        // arrives as a unary-minus expression at higher levels.
6909        // Bare `WAIT FOR WAL POSITION -1` thus surfaces as a
6910        // parse error one way or another.
6911        let err = parse_statement("WAIT FOR WAL POSITION -1").unwrap_err();
6912        assert!(!err.message.is_empty());
6913    }
6914
6915    #[test]
6916    fn parser_recognises_bare_analyze() {
6917        let s = parse("ANALYZE");
6918        assert!(matches!(s, Statement::Analyze(None)));
6919    }
6920
6921    #[test]
6922    fn parser_recognises_analyze_with_table() {
6923        let s = parse("ANALYZE users");
6924        let Statement::Analyze(Some(name)) = s else {
6925            panic!("expected Analyze, got {s:?}")
6926        };
6927        assert_eq!(name, "users");
6928    }
6929
6930    #[test]
6931    fn parser_recognises_analyze_with_quoted_table() {
6932        let s = parse("ANALYZE \"Mixed Case\"");
6933        let Statement::Analyze(Some(name)) = s else {
6934            panic!()
6935        };
6936        assert_eq!(name, "Mixed Case");
6937    }
6938
6939    #[test]
6940    fn parser_rejects_analyze_with_garbage_token() {
6941        let err = parse_statement("ANALYZE 42").expect_err("must error");
6942        assert!(!err.message.is_empty());
6943    }
6944
6945    #[test]
6946    fn analyze_display_roundtrips() {
6947        for sql in ["ANALYZE", "ANALYZE users"] {
6948            let s = parse(sql);
6949            let printed = s.to_string();
6950            let again = parse_statement(&printed)
6951                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
6952            assert_eq!(s, again);
6953        }
6954    }
6955
6956    #[test]
6957    fn wait_for_display_roundtrips() {
6958        for sql in [
6959            "WAIT FOR WAL POSITION 12345",
6960            "WAIT FOR WAL POSITION 67890 WITH TIMEOUT 5000",
6961        ] {
6962            let s = parse(sql);
6963            let printed = s.to_string();
6964            let again = parse_statement(&printed)
6965                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
6966            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
6967        }
6968    }
6969
6970    #[test]
6971    fn subscription_ddl_display_roundtrips() {
6972        for sql in [
6973            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=h port=20002' PUBLICATION pub_a",
6974            "CREATE SUBSCRIPTION sub_b CONNECTION 'host=h' PUBLICATION p1, p2",
6975            "DROP SUBSCRIPTION sub_a",
6976            "SHOW SUBSCRIPTIONS",
6977        ] {
6978            let s = parse(sql);
6979            let printed = s.to_string();
6980            let again = parse_statement(&printed)
6981                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
6982            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
6983        }
6984    }
6985
6986    #[test]
6987    fn parser_drop_dispatches_user_vs_publication() {
6988        // Pre-v6.1.2 DROP USER took the bare-ident path; v6.1.2
6989        // tokenises DROP. Both targets must still parse.
6990        let s = parse("DROP USER 'alice'");
6991        let Statement::DropUser(name) = s else {
6992            panic!("expected DropUser, got {s:?}")
6993        };
6994        assert_eq!(name, "alice");
6995        // And DROP PUBLICATION lands the new variant.
6996        let s = parse("DROP PUBLICATION p1");
6997        assert!(matches!(s, Statement::DropPublication(_)));
6998    }
6999
7000    #[test]
7001    fn publication_ddl_display_roundtrips() {
7002        // Every CREATE PUBLICATION variant must Display → parse →
7003        // same AST. v6.1.3 covers all three scope shapes.
7004        for sql in [
7005            "CREATE PUBLICATION pub_a",
7006            "CREATE PUBLICATION pub_a FOR ALL TABLES",
7007            "CREATE PUBLICATION pub_a FOR TABLE t1, t2",
7008            "CREATE PUBLICATION pub_a FOR ALL TABLES EXCEPT t1",
7009            "DROP PUBLICATION pub_a",
7010            "SHOW PUBLICATIONS",
7011        ] {
7012            let s = parse(sql);
7013            let printed = s.to_string();
7014            let again = parse_statement(&printed)
7015                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
7016            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
7017        }
7018    }
7019
7020    // --- v7.12.4: CREATE FUNCTION + CREATE TRIGGER + PL/pgSQL ---
7021
7022    #[test]
7023    fn create_function_returns_trigger_plpgsql_minimal() {
7024        let sql = "CREATE FUNCTION noop() RETURNS TRIGGER LANGUAGE plpgsql AS $$ BEGIN RETURN NEW; END; $$";
7025        let s = parse(sql);
7026        let Statement::CreateFunction(f) = s else {
7027            panic!("expected CreateFunction");
7028        };
7029        assert_eq!(f.name, "noop");
7030        assert!(!f.or_replace);
7031        assert!(f.args.is_empty());
7032        assert!(matches!(f.returns, FunctionReturn::Trigger));
7033        assert_eq!(f.language, "plpgsql");
7034        let FunctionBody::PlPgSql(block) = f.body else {
7035            panic!("expected PlPgSql body");
7036        };
7037        assert_eq!(block.statements.len(), 1);
7038        assert!(matches!(
7039            block.statements[0],
7040            PlPgSqlStmt::Return(ReturnTarget::New)
7041        ));
7042    }
7043
7044    #[test]
7045    fn create_function_or_replace_with_assignment() {
7046        // mailrs-shape trigger function: NEW.col := to_tsvector(...);
7047        // RETURN NEW.
7048        let sql = "CREATE OR REPLACE FUNCTION update_sv() RETURNS TRIGGER LANGUAGE plpgsql AS $$
7049BEGIN
7050  NEW.search_vector := to_tsvector('english', NEW.subject);
7051  RETURN NEW;
7052END;
7053$$";
7054        let s = parse(sql);
7055        let Statement::CreateFunction(f) = s else {
7056            panic!("expected CreateFunction");
7057        };
7058        assert!(f.or_replace);
7059        let FunctionBody::PlPgSql(block) = &f.body else {
7060            panic!("expected PlPgSql body");
7061        };
7062        assert_eq!(block.statements.len(), 2);
7063        // First statement: NEW.search_vector := to_tsvector(...)
7064        let PlPgSqlStmt::Assign { target, .. } = &block.statements[0] else {
7065            panic!("expected Assign as first stmt");
7066        };
7067        match target {
7068            AssignTarget::NewColumn(c) => assert_eq!(c, "search_vector"),
7069            other => panic!("expected NEW.col, got {other:?}"),
7070        }
7071        // Second statement: RETURN NEW
7072        assert!(matches!(
7073            block.statements[1],
7074            PlPgSqlStmt::Return(ReturnTarget::New)
7075        ));
7076    }
7077
7078    #[test]
7079    fn create_trigger_after_insert_or_update() {
7080        let sql = "CREATE TRIGGER tg AFTER INSERT OR UPDATE ON messages FOR EACH ROW EXECUTE FUNCTION update_sv()";
7081        let s = parse(sql);
7082        let Statement::CreateTrigger(t) = s else {
7083            panic!("expected CreateTrigger");
7084        };
7085        assert_eq!(t.name, "tg");
7086        assert_eq!(t.table, "messages");
7087        assert_eq!(t.timing, TriggerTiming::After);
7088        assert_eq!(t.events, vec![TriggerEvent::Insert, TriggerEvent::Update]);
7089        assert_eq!(t.for_each, TriggerForEach::Row);
7090        assert_eq!(t.function, "update_sv");
7091    }
7092
7093    #[test]
7094    fn create_trigger_before_delete_execute_procedure_alias() {
7095        // PG also accepts the legacy `EXECUTE PROCEDURE` spelling.
7096        let sql =
7097            "CREATE TRIGGER guard BEFORE DELETE ON t FOR EACH ROW EXECUTE PROCEDURE block_delete()";
7098        let s = parse(sql);
7099        let Statement::CreateTrigger(t) = s else {
7100            panic!("expected CreateTrigger");
7101        };
7102        assert_eq!(t.timing, TriggerTiming::Before);
7103        assert_eq!(t.events, vec![TriggerEvent::Delete]);
7104    }
7105
7106    #[test]
7107    fn drop_trigger_if_exists_round_trips() {
7108        // No parser support for DROP TRIGGER yet — added in v7.12.5
7109        // alongside the broader DROP …{IF EXISTS} cleanup. The
7110        // AST + Display impls are in place so we round-trip via
7111        // construction:
7112        let s = Statement::DropTrigger {
7113            name: "tg".into(),
7114            table: "messages".into(),
7115            if_exists: true,
7116        };
7117        assert_eq!(s.to_string(), "DROP TRIGGER IF EXISTS tg ON messages");
7118    }
7119
7120    #[test]
7121    fn trigger_ddl_display_roundtrips_through_parser() {
7122        // CREATE TRIGGER + its referenced CREATE FUNCTION must
7123        // Display → parse → same AST (modulo PL/pgSQL body
7124        // formatting which is parser-canonicalised).
7125        for sql in [
7126            "CREATE TRIGGER tg AFTER INSERT ON t FOR EACH ROW EXECUTE FUNCTION f()",
7127            "CREATE TRIGGER tg2 BEFORE UPDATE OR DELETE ON t FOR EACH ROW EXECUTE FUNCTION g()",
7128        ] {
7129            let s = parse(sql);
7130            let printed = s.to_string();
7131            let again = parse_statement(&printed)
7132                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
7133            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
7134        }
7135    }
7136}