Skip to main content

spg_sql/
parser.rs

1//! Recursive-descent parser with a Pratt (precedence-climbing) sub-parser for
2//! expressions.
3//!
4//! Precedence (lowest → highest binding):
5//! `OR` (1) `<` `AND` (2) `<` `NOT` unary (3) `<`
6//! comparisons `=` `<>` `<` `<=` `>` `>=` (4) `<`
7//! `+` `-` (5) `<` `*` `/` (6) `<` unary `-` (7) `<` parens / atom.
8//!
9//! This matches PG's behaviour for the operators we support — e.g. `NOT a = b`
10//! parses as `NOT (a = b)` and `-a * b` as `(-a) * b`.
11
12use alloc::boxed::Box;
13use alloc::format;
14use alloc::string::{String, ToString};
15use alloc::vec;
16use alloc::vec::Vec;
17use core::fmt;
18use core::mem;
19
20use crate::ast::{
21    AssignTarget, BinOp, CastTarget, ColumnDef, ColumnName, ColumnTypeName,
22    CreateFunctionStatement, CreateIndexStatement, CreatePublicationStatement,
23    CreateSubscriptionStatement, CreateTableStatement, CreateTriggerStatement, Expr, ExtractField,
24    FkAction, ForeignKeyConstraint, FrameBound, FrameKind, FromClause, FromJoin, FunctionArg,
25    FunctionArgMode, FunctionArgType, FunctionBody, FunctionReturn, IndexMethod, InsertStatement,
26    JoinKind, Literal, NullTreatment, OrderBy, PlPgSqlBlock, PlPgSqlDeclare, PlPgSqlStmt,
27    PublicationScope, RaiseLevel, ReturnTarget, SelectItem, SelectStatement, Statement, TableRef,
28    TriggerEvent, TriggerForEach, TriggerTiming, UnOp, UnionKind, VecEncoding, WindowFrame,
29};
30use crate::lexer::{self, LexError, Token};
31
32/// v7.14.0 — true when the leading keyword of a top-level
33/// statement is one of the dump-emitted DDL forms SPG accepts
34/// as a no-op (no behavioural effect on the single-schema /
35/// single-database model). These statements are consumed up to
36/// the next `;` / EOF and returned as `Statement::Empty`.
37fn is_dump_noise_statement(lc: &str) -> bool {
38    matches!(
39        lc,
40        // Object comments / privileges / ownership — none of
41        // these change schema semantics on SPG.
42        "comment"
43            | "grant"
44            | "revoke"
45            // MySQL bulk-load brackets.
46            | "lock"
47            | "unlock"
48            // MySQL OPTIMIZE / ANALYZE TABLE / CHECK TABLE
49            // diagnostics that pg_dump-style tools also emit
50            // post-restore.
51            | "optimize"
52            | "check"
53            | "use"
54            // PG psql backslash meta-commands that newer
55            // pg_dump versions emit unescaped (\restrict /
56            // \unrestrict). Real psql intercepts these; SPG's
57            // PG-wire sees them as raw text.
58            | "\\restrict"
59            | "\\unrestrict"
60    )
61}
62
63/// v7.9.22 — recognise pgvector / SPG vector-index opclass names
64/// in CREATE INDEX. SPG's HNSW already routes by query operator;
65/// the opclass is accepted for `pg_dump` compatibility (mailrs
66/// migration follow-up G5).
67/// v7.13.0 — extended to recognise PG built-in / pg_trgm opclasses
68/// (mailrs round-5 G5). These are tokens-only acceptance — SPG
69/// doesn't change index behaviour based on them.
70fn is_vector_opclass_name(name: &str) -> bool {
71    let lc = name.to_ascii_lowercase();
72    matches!(
73        lc.as_str(),
74        "vector_cosine_ops"
75            | "vector_l2_ops"
76            | "vector_ip_ops"
77            | "halfvec_cosine_ops"
78            | "halfvec_l2_ops"
79            | "halfvec_ip_ops"
80            | "sq8_cosine_ops"
81            | "sq8_l2_ops"
82            | "sq8_ip_ops"
83            // pg_trgm — trigram operator class. SPG's GIN index
84            // already uses tsvector tokens; trigram-style LIKE
85            // pattern matching still routes through a sequential
86            // scan, but the opclass name is accepted so PG schemas
87            // load.
88            | "gin_trgm_ops"
89            | "gist_trgm_ops"
90            // PG built-in btree opclasses occasionally appear in
91            // pg_dump output for column types with multiple
92            // sort orders (text_pattern_ops, varchar_pattern_ops,
93            // bpchar_pattern_ops).
94            | "text_pattern_ops"
95            | "varchar_pattern_ops"
96            | "bpchar_pattern_ops"
97            | "int4_ops"
98            | "int8_ops"
99            | "text_ops"
100    )
101}
102
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub struct ParseError {
105    pub message: String,
106    /// Index into the token stream where parsing tripped. Not a byte offset.
107    pub token_pos: usize,
108}
109
110impl fmt::Display for ParseError {
111    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
112        write!(
113            f,
114            "parse error at token #{}: {}",
115            self.token_pos, self.message
116        )
117    }
118}
119
120impl From<LexError> for ParseError {
121    fn from(e: LexError) -> Self {
122        Self {
123            message: format!("lex: {e}"),
124            token_pos: 0,
125        }
126    }
127}
128
129/// v7.9.30 — parse a single expression (no trailing junk). Used by
130/// the engine to re-hydrate stored partial-index / unique-index
131/// predicates from their canonical Display form. The same Pratt
132/// parser the statement path uses; this entry point just skips the
133/// statement dispatch.
134pub fn parse_expression(input: &str) -> Result<Expr, ParseError> {
135    let tokens = lexer::tokenize(input)?;
136    let mut p = Parser::new(tokens);
137    let expr = p.parse_expr(0)?;
138    p.expect_eof()?;
139    Ok(expr)
140}
141
142/// Parse exactly one statement, swallow an optional trailing `;`, and require
143/// the token stream to end there.
144pub fn parse_statement(input: &str) -> Result<Statement, ParseError> {
145    let tokens = lexer::tokenize(input)?;
146    let mut p = Parser::new(tokens);
147    let stmt = p.parse_one_statement()?;
148    if matches!(p.peek(), Token::Semicolon) {
149        p.advance();
150    }
151    p.expect_eof()?;
152    Ok(stmt)
153}
154
155struct Parser {
156    tokens: Vec<Token>,
157    pos: usize,
158}
159
160impl Parser {
161    fn new(tokens: Vec<Token>) -> Self {
162        Self { tokens, pos: 0 }
163    }
164
165    fn peek(&self) -> &Token {
166        // tokens always ends with Eof; pos is clamped in advance().
167        &self.tokens[self.pos]
168    }
169
170    fn advance(&mut self) -> Token {
171        let t = mem::replace(&mut self.tokens[self.pos], Token::Eof);
172        if self.pos + 1 < self.tokens.len() {
173            self.pos += 1;
174        }
175        t
176    }
177
178    fn err(&self, message: String) -> ParseError {
179        ParseError {
180            message,
181            token_pos: self.pos,
182        }
183    }
184
185    fn expect_eof(&self) -> Result<(), ParseError> {
186        if matches!(self.peek(), Token::Eof) {
187            Ok(())
188        } else {
189            Err(self.err(format!("expected end of input, got {:?}", self.peek())))
190        }
191    }
192
193    /// v7.14.0 — swallow every token up to (but not including) the
194    /// next semicolon / EOF. Used by the dump-noise dispatcher
195    /// to consume `COMMENT ON …`, `GRANT …`, `LOCK TABLES …`,
196    /// etc. without modeling each grammar.
197    fn consume_until_statement_boundary(&mut self) {
198        loop {
199            match self.peek() {
200                Token::Semicolon | Token::Eof => return,
201                _ => self.advance(),
202            };
203        }
204    }
205
206    fn expect_ident_like(&mut self) -> Result<String, ParseError> {
207        let first = match self.advance() {
208            Token::Ident(s) | Token::QuotedIdent(s) => s,
209            other => {
210                return Err(ParseError {
211                    message: format!("expected identifier, got {other:?}"),
212                    token_pos: self.pos.saturating_sub(1),
213                });
214            }
215        };
216        // v7.14.0 — strip optional `<schema>.` prefix. PG dumps
217        // qualify every name with `public.` (and pg_catalog.* for
218        // functions); SPG is single-schema so we discard the
219        // prefix and return only the trailing ident. Same shape
220        // also handles MySQL `db.tbl` cross-database refs (SPG
221        // ignores the db part).
222        if matches!(self.peek(), Token::Dot) {
223            self.advance();
224            match self.advance() {
225                Token::Ident(s) | Token::QuotedIdent(s) => return Ok(s),
226                other => {
227                    return Err(ParseError {
228                        message: format!(
229                            "expected identifier after '{first}.', got {other:?}"
230                        ),
231                        token_pos: self.pos.saturating_sub(1),
232                    });
233                }
234            }
235        }
236        Ok(first)
237    }
238
239    #[allow(clippy::too_many_lines)]
240    fn parse_one_statement(&mut self) -> Result<Statement, ParseError> {
241        // v7.14.0 — empty / comment-only / semicolon-only input
242        // (after the lexer strips line + block + MySQL
243        // conditional comments) lands as Statement::Empty.
244        // pg_dump and mysqldump emit several wrappers that
245        // collapse to nothing after stripping (`/*!40101 SET …
246        // */;`, blank lines between statements); the engine
247        // returns CommandOk no-op so the dump loads cleanly.
248        if matches!(self.peek(), Token::Eof | Token::Semicolon) {
249            return Ok(Statement::Empty);
250        }
251        // v7.14.0 — pg_dump / mysqldump "noise" statements:
252        // catalog / metadata DDL that has no behavioural effect
253        // on SPG's single-schema, single-database, single-user
254        // model. Consume the whole statement up to the next
255        // semicolon / EOF and return Empty. This is broader than
256        // the per-keyword DROP / SET / COMMENT arms but lets the
257        // long tail of `LOCK TABLES`, `UNLOCK TABLES`, `GRANT`,
258        // `REVOKE`, `ALTER OWNER TO`, `\restrict`, `\unrestrict`,
259        // `BEGIN; COMMIT;` wrappers, etc. all pass through.
260        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek() {
261            let lc = s.to_ascii_lowercase();
262            if is_dump_noise_statement(&lc) {
263                self.consume_until_statement_boundary();
264                return Ok(Statement::Empty);
265            }
266        }
267        match self.peek() {
268            Token::Select => self.parse_select_stmt(),
269            // v7.9.27 — `DO $$ … $$ [LANGUAGE plpgsql]`. PG-only;
270            // SPG has no PL/pgSQL so the body is consumed (lexer
271            // already turned it into a Token::String) and the whole
272            // DO statement returns CommandOk no-op. mailrs H1 +
273            // pg_dump compat.
274            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("do") => {
275                self.advance();
276                // Body — single string token (dollar-quoted or
277                // ordinary).
278                match self.advance() {
279                    Token::String(_) => {}
280                    other => {
281                        return Err(self.err(alloc::format!(
282                            "expected dollar-quoted body after DO, got {other:?}"
283                        )));
284                    }
285                }
286                // Optional `LANGUAGE <name>` trailer (idents only).
287                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("language")) {
288                    self.advance();
289                    let _ = self.expect_ident_like()?;
290                }
291                Ok(Statement::DoBlock)
292            }
293            // v4.11: `WITH name AS (SELECT ...) [, ...] SELECT ...`.
294            // WITH isn't a reserved token in our lexer — comes through
295            // as `Token::Ident("with")` (case-insensitive).
296            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("with") => {
297                self.advance();
298                self.parse_with_cte_then_select()
299            }
300            // v4.26: `EXPLAIN [ANALYZE] <select>`. Comes through as
301            // an identifier — not a reserved keyword.
302            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("explain") => {
303                self.advance();
304                let mut analyze = false;
305                let mut suggest = false;
306                // v6.8.3 — `EXPLAIN (SUGGEST)` opt-in.
307                if matches!(self.peek(), Token::LParen) {
308                    self.advance();
309                    let opt = match self.peek().clone() {
310                        Token::Ident(s) | Token::QuotedIdent(s) => s,
311                        other => {
312                            return Err(self.err(format!(
313                                "expected option keyword inside EXPLAIN (…), got {other:?}"
314                            )));
315                        }
316                    };
317                    if !opt.eq_ignore_ascii_case("suggest") {
318                        return Err(self.err(format!(
319                            "unknown EXPLAIN option {opt:?}; v6.8.3 supports SUGGEST"
320                        )));
321                    }
322                    self.advance();
323                    if !matches!(self.peek(), Token::RParen) {
324                        return Err(self.err(format!(
325                            "expected ')' after EXPLAIN option, got {:?}",
326                            self.peek()
327                        )));
328                    }
329                    self.advance();
330                    suggest = true;
331                } else if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
332                    && (s.eq_ignore_ascii_case("analyze") || s.eq_ignore_ascii_case("analyse"))
333                {
334                    self.advance();
335                    analyze = true;
336                }
337                let inner = self.parse_select_stmt()?;
338                let Statement::Select(s) = inner else {
339                    return Err(self.err(format!("EXPLAIN body must be a SELECT, got {inner:?}")));
340                };
341                Ok(Statement::Explain(crate::ast::ExplainStatement {
342                    analyze,
343                    inner: Box::new(s),
344                    suggest,
345                }))
346            }
347            Token::Create => self.parse_create_stmt(),
348            Token::Insert => self.parse_insert_stmt(),
349            Token::Begin => {
350                self.advance();
351                Ok(Statement::Begin)
352            }
353            Token::Commit => {
354                self.advance();
355                Ok(Statement::Commit)
356            }
357            Token::Rollback => {
358                self.advance();
359                // `ROLLBACK TO [SAVEPOINT] <name>` returns to that
360                // savepoint without ending the transaction. Bare
361                // `ROLLBACK` drops the whole TX.
362                if matches!(self.peek(), Token::To) {
363                    self.advance();
364                    if matches!(self.peek(), Token::Savepoint) {
365                        self.advance();
366                    }
367                    let name = self.expect_ident_like()?;
368                    Ok(Statement::RollbackToSavepoint(name))
369                } else {
370                    Ok(Statement::Rollback)
371                }
372            }
373            Token::Savepoint => {
374                self.advance();
375                let name = self.expect_ident_like()?;
376                Ok(Statement::Savepoint(name))
377            }
378            Token::Release => {
379                self.advance();
380                // `RELEASE [SAVEPOINT] <name>` — the `SAVEPOINT` keyword
381                // is optional in standard SQL.
382                if matches!(self.peek(), Token::Savepoint) {
383                    self.advance();
384                }
385                let name = self.expect_ident_like()?;
386                Ok(Statement::ReleaseSavepoint(name))
387            }
388            Token::Show => {
389                self.advance();
390                // `SHOW TABLES` / `SHOW USERS` / `SHOW COLUMNS FROM <table>`.
391                // v6.1.2 promoted TABLES to a reserved keyword (for
392                // `CREATE PUBLICATION … FOR ALL TABLES`), so it now
393                // arrives as `Token::Tables` rather than a bare ident.
394                // USERS / COLUMNS remain bare idents.
395                let target = match self.advance() {
396                    Token::Tables => "tables".to_string(),
397                    Token::Ident(s) | Token::QuotedIdent(s) => s.to_ascii_lowercase(),
398                    other => {
399                        return Err(self.err(format!(
400                            "expected SHOW target, got {other:?}"
401                        )));
402                    }
403                };
404                match target.as_str() {
405                    "tables" => Ok(Statement::ShowTables),
406                    "users" => Ok(Statement::ShowUsers),
407                    // v6.1.3 — PUBLICATIONS plural is NOT a reserved
408                    // keyword on its own; it lands here as a bare
409                    // ident. Returning all publications + their
410                    // scope summary.
411                    "publications" => Ok(Statement::ShowPublications),
412                    // v6.1.4 — same shape for SUBSCRIPTIONS plural.
413                    "subscriptions" => Ok(Statement::ShowSubscriptions),
414                    "columns" => {
415                        if !matches!(self.peek(), Token::From) {
416                            return Err(self.err(format!(
417                                "expected FROM after SHOW COLUMNS, got {:?}",
418                                self.peek()
419                            )));
420                        }
421                        self.advance();
422                        let table = self.expect_ident_like()?;
423                        Ok(Statement::ShowColumns(table))
424                    }
425                    other => Err(self.err(format!(
426                        "unknown SHOW target {other:?}; supported: TABLES, COLUMNS, USERS, PUBLICATIONS"
427                    ))),
428                }
429            }
430            // v6.1.2: `DROP` is now a reserved keyword (it dispatches
431            // to DROP USER and DROP PUBLICATION today; DROP TABLE /
432            // DROP INDEX are still SHOW-shaped admin ops). Pre-6.1.2
433            // arrived as a bare ident; tokenising it dedicatedly
434            // keeps the dispatch tree small.
435            Token::Drop => {
436                self.advance();
437                match self.peek() {
438                    Token::Publication => {
439                        self.advance();
440                        let name = self.expect_ident_or_string()?;
441                        Ok(Statement::DropPublication(name))
442                    }
443                    Token::Subscription => {
444                        self.advance();
445                        let name = self.expect_ident_or_string()?;
446                        Ok(Statement::DropSubscription(name))
447                    }
448                    Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("user") => {
449                        self.advance();
450                        let name = self.expect_ident_or_string()?;
451                        Ok(Statement::DropUser(name))
452                    }
453                    // v7.12.4 — DROP TRIGGER [IF EXISTS] name ON table.
454                    Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("trigger") => {
455                        self.advance();
456                        let if_exists = self.consume_if_exists();
457                        let name = self.expect_ident_like()?;
458                        // ON <table>
459                        if !matches!(self.peek(), Token::On) {
460                            return Err(self.err(alloc::format!(
461                                "expected ON <table> after DROP TRIGGER {name:?}, got {:?}",
462                                self.peek()
463                            )));
464                        }
465                        self.advance();
466                        let table = self.expect_ident_like()?;
467                        Ok(Statement::DropTrigger {
468                            name,
469                            table,
470                            if_exists,
471                        })
472                    }
473                    // v7.12.4 — DROP FUNCTION [IF EXISTS] name [(args)].
474                    // v7.12.4 ignores any optional arg-list (signature-
475                    // based overload disambiguation lands in v7.12.5+).
476                    Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("function") => {
477                        self.advance();
478                        let if_exists = self.consume_if_exists();
479                        let name = self.expect_ident_like()?;
480                        // Optional `()` — consume + discard.
481                        if matches!(self.peek(), Token::LParen) {
482                            self.advance();
483                            // Skip until matching RParen, accepting any tokens (typed args we don't model yet).
484                            let mut depth = 1usize;
485                            while depth > 0 {
486                                match self.peek() {
487                                    Token::LParen => depth += 1,
488                                    Token::RParen => depth -= 1,
489                                    Token::Eof => {
490                                        return Err(self.err(alloc::format!(
491                                            "unterminated arg list in DROP FUNCTION {name:?}"
492                                        )));
493                                    }
494                                    _ => {}
495                                }
496                                self.advance();
497                            }
498                        }
499                        Ok(Statement::DropFunction { name, if_exists })
500                    }
501                    // v7.14.0 — DROP TABLE [IF EXISTS] name [, name…]
502                    // [CASCADE|RESTRICT]. pg_dump and mysqldump both
503                    // emit DROP TABLE IF EXISTS at the head of every
504                    // CREATE TABLE block so re-importing a dump
505                    // overwrites prior state. SPG accepts and removes
506                    // matching tables; CASCADE/RESTRICT trailers
507                    // accepted silently.
508                    Token::Table => {
509                        self.advance();
510                        let if_exists = self.consume_if_exists();
511                        let mut names: Vec<String> = Vec::new();
512                        loop {
513                            names.push(self.expect_ident_like()?);
514                            if matches!(self.peek(), Token::Comma) {
515                                self.advance();
516                                continue;
517                            }
518                            break;
519                        }
520                        if matches!(
521                            self.peek(),
522                            Token::Ident(s) if s.eq_ignore_ascii_case("cascade")
523                                || s.eq_ignore_ascii_case("restrict")
524                        ) {
525                            self.advance();
526                        }
527                        Ok(Statement::DropTable { names, if_exists })
528                    }
529                    // v7.14.0 — DROP INDEX [IF EXISTS] name
530                    // [CASCADE|RESTRICT]. PG / mysqldump emit this
531                    // for partial-index renames and pgvector
532                    // migrations. SPG removes the matching index;
533                    // IF EXISTS makes the drop idempotent.
534                    Token::Index => {
535                        self.advance();
536                        let if_exists = self.consume_if_exists();
537                        let name = self.expect_ident_like()?;
538                        if matches!(
539                            self.peek(),
540                            Token::Ident(s) if s.eq_ignore_ascii_case("cascade")
541                                || s.eq_ignore_ascii_case("restrict")
542                        ) {
543                            self.advance();
544                        }
545                        Ok(Statement::DropIndex { name, if_exists })
546                    }
547                    // v7.14.0 — DROP SCHEMA [IF EXISTS] name
548                    // [CASCADE|RESTRICT]. SPG is single-database;
549                    // schemas are accepted as no-ops (any name
550                    // resolves to the single catalog).
551                    Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("schema") => {
552                        self.advance();
553                        let _ = self.consume_if_exists();
554                        let _ = self.expect_ident_like()?;
555                        if matches!(
556                            self.peek(),
557                            Token::Ident(s) if s.eq_ignore_ascii_case("cascade")
558                                || s.eq_ignore_ascii_case("restrict")
559                        ) {
560                            self.advance();
561                        }
562                        Ok(Statement::Empty)
563                    }
564                    // v7.14.0 — DROP SEQUENCE [IF EXISTS] name
565                    // [CASCADE|RESTRICT]. SPG has no separate
566                    // sequence object — SERIAL/BIGSERIAL is column-
567                    // local AUTO_INCREMENT — so DROP SEQUENCE
568                    // resolves as a no-op.
569                    Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("sequence") => {
570                        self.advance();
571                        let _ = self.consume_if_exists();
572                        let _ = self.expect_ident_like()?;
573                        if matches!(
574                            self.peek(),
575                            Token::Ident(s) if s.eq_ignore_ascii_case("cascade")
576                                || s.eq_ignore_ascii_case("restrict")
577                        ) {
578                            self.advance();
579                        }
580                        Ok(Statement::Empty)
581                    }
582                    other => Err(self.err(format!(
583                        "expected TABLE / INDEX / SCHEMA / SEQUENCE / USER / PUBLICATION / \
584                         SUBSCRIPTION / TRIGGER / FUNCTION after DROP, got {other:?}"
585                    ))),
586                }
587            }
588            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("update") => {
589                self.advance();
590                self.parse_update_after_keyword()
591            }
592            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("delete") => {
593                self.advance();
594                self.parse_delete_after_keyword()
595            }
596            // v6.0.4: ALTER INDEX <name> REBUILD [WITH (encoding = ...)].
597            // ALTER is not a reserved keyword in the lexer — handled
598            // as a bare ident here.
599            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("alter") => {
600                self.advance();
601                self.parse_alter_after_keyword()
602            }
603            // v6.1.7: WAIT FOR WAL POSITION <pos> [WITH TIMEOUT <ms>].
604            // WAIT / POSITION / TIMEOUT are bare idents — no lexer
605            // additions needed.
606            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("wait") => {
607                self.advance();
608                self.parse_wait_after_keyword()
609            }
610            // v6.2.0: ANALYZE [<table>]. ANALYZE is a bare ident.
611            // Bare ANALYZE → analyse every user table; ANALYZE
612            // <name> → re-stats one. The argument is an optional
613            // ident (or quoted ident); anything else is a parse
614            // error.
615            // v6.7.3 — `COMPACT COLD SEGMENTS`. No arguments, no
616            // `WHERE` filter (carved out per V6_7_DESIGN.md
617            // STABILITY). Lex order: identifier "compact" → "cold"
618            // → "segments". Anything else after `COMPACT` is a
619            // parse error.
620            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("compact") => {
621                self.advance();
622                let next = self.peek().clone();
623                let cold = match next {
624                    Token::Ident(s) | Token::QuotedIdent(s) => s,
625                    _ => {
626                        return Err(
627                            self.err(format!("expected COLD after COMPACT, got {:?}", self.peek()))
628                        );
629                    }
630                };
631                if !cold.eq_ignore_ascii_case("cold") {
632                    return Err(self.err(format!("expected COLD after COMPACT, got {cold:?}")));
633                }
634                self.advance();
635                let next = self.peek().clone();
636                let segments = match next {
637                    Token::Ident(s) | Token::QuotedIdent(s) => s,
638                    _ => {
639                        return Err(self.err(format!(
640                            "expected SEGMENTS after COMPACT COLD, got {:?}",
641                            self.peek()
642                        )));
643                    }
644                };
645                if !segments.eq_ignore_ascii_case("segments") {
646                    return Err(self.err(format!(
647                        "expected SEGMENTS after COMPACT COLD, got {segments:?}"
648                    )));
649                }
650                self.advance();
651                Ok(Statement::CompactColdSegments)
652            }
653            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("analyze") => {
654                self.advance();
655                let target = match self.peek() {
656                    Token::Eof | Token::Semicolon => None,
657                    Token::Ident(_) | Token::QuotedIdent(_) => {
658                        Some(self.expect_ident_like()?)
659                    }
660                    other => {
661                        return Err(self.err(format!(
662                            "expected table name or end of statement after ANALYZE, got {other:?}"
663                        )));
664                    }
665                };
666                Ok(Statement::Analyze(target))
667            }
668            // v7.12.1 — `SET <name> [TO|=] <value>`. The
669            // `default_text_search_config` parameter is consumed
670            // by the FTS function dispatcher; other parameter
671            // names are recorded but treated as a no-op so PG
672            // dump output loads.
673            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("set") => {
674                self.advance();
675                // PG allows `SET LOCAL` / `SET SESSION` qualifiers
676                // — accept and ignore. MySQL adds `SET GLOBAL` too
677                // (and the alias `SET @@global.name = …` which the
678                // SessionVar path handles).
679                if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("local") || s.eq_ignore_ascii_case("session") || s.eq_ignore_ascii_case("global"))
680                {
681                    self.advance();
682                }
683                // v7.14.0 — MySQL `SET NAMES <charset> [COLLATE
684                // <collation>]` — change the connection client
685                // charset. SPG stores UTF-8 always and orders
686                // bytewise; accept as a no-op.
687                if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("names"))
688                {
689                    self.advance();
690                    // Charset ident-or-string.
691                    if matches!(
692                        self.peek(),
693                        Token::Ident(_) | Token::QuotedIdent(_) | Token::String(_)
694                    ) {
695                        self.advance();
696                    }
697                    // Optional `COLLATE <name>`.
698                    if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("collate"))
699                    {
700                        self.advance();
701                        if matches!(
702                            self.peek(),
703                            Token::Ident(_) | Token::QuotedIdent(_) | Token::String(_)
704                        ) {
705                            self.advance();
706                        }
707                    }
708                    return Ok(Statement::Empty);
709                }
710                // v7.14.0 — MySQL `SET CHARACTER SET <charset>`
711                // alias — same accept-as-no-op as SET NAMES.
712                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("character"))
713                    && matches!(self.tokens.get(self.pos + 1), Some(Token::Ident(s)) if s.eq_ignore_ascii_case("set"))
714                {
715                    self.advance(); // CHARACTER
716                    self.advance(); // SET
717                    if matches!(
718                        self.peek(),
719                        Token::Ident(_) | Token::QuotedIdent(_) | Token::String(_)
720                    ) {
721                        self.advance();
722                    }
723                    return Ok(Statement::Empty);
724                }
725                // v7.14.0 — multi-assignment form
726                // `SET a = 1, b = 2, …`. Single-assignment is the
727                // 1-element case. Each LHS may be a regular ident
728                // or a SessionVar (`@VAR` / `@@VAR`).
729                let mut pairs: Vec<(String, crate::ast::SetValue)> = Vec::new();
730                loop {
731                    let lhs = match self.peek().clone() {
732                        Token::SessionVar(s) => {
733                            self.advance();
734                            s
735                        }
736                        Token::Ident(_) | Token::QuotedIdent(_) => self.parse_set_param_name()?,
737                        other => {
738                            return Err(self.err(format!(
739                                "expected parameter name after SET, got {other:?}"
740                            )));
741                        }
742                    };
743                    // Accept either `=` or the bare `TO` keyword.
744                    match self.peek() {
745                        Token::Eq => {
746                            self.advance();
747                        }
748                        Token::To => {
749                            self.advance();
750                        }
751                        other => {
752                            return Err(self.err(format!(
753                                "expected `=` or TO after SET {lhs}, got {other:?}"
754                            )));
755                        }
756                    }
757                    let value = self.parse_set_value()?;
758                    pairs.push((lhs, value));
759                    if matches!(self.peek(), Token::Comma) {
760                        self.advance();
761                        continue;
762                    }
763                    break;
764                }
765                if pairs.len() == 1 {
766                    let (name, value) = pairs.into_iter().next().unwrap();
767                    Ok(Statement::SetParameter { name, value })
768                } else {
769                    Ok(Statement::SetParameterList(pairs))
770                }
771            }
772            // v7.12.1 — `RESET <name>` / `RESET ALL`.
773            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("reset") => {
774                self.advance();
775                match self.peek().clone() {
776                    Token::All => {
777                        self.advance();
778                        Ok(Statement::ResetParameter(None))
779                    }
780                    Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("all") => {
781                        self.advance();
782                        Ok(Statement::ResetParameter(None))
783                    }
784                    _ => {
785                        let name = self.parse_set_param_name()?;
786                        Ok(Statement::ResetParameter(Some(name)))
787                    }
788                }
789            }
790            other => Err(self.err(format!(
791                "expected SELECT / CREATE / DROP / INSERT / UPDATE / DELETE / ALTER / BEGIN / COMMIT / \
792                 ROLLBACK / SAVEPOINT / RELEASE / SHOW at start of statement, got {other:?}"
793            ))),
794        }
795    }
796
797    fn parse_create_stmt(&mut self) -> Result<Statement, ParseError> {
798        debug_assert!(matches!(self.peek(), Token::Create));
799        self.advance();
800        match self.peek() {
801            Token::Table => self.parse_create_table_stmt_after_create(),
802            Token::Index => self.parse_create_index_stmt_after_create(false),
803            // v7.9.29 — `CREATE UNIQUE INDEX … [WHERE pred]`.
804            // The `UNIQUE` modifier turns a partial index into a
805            // partial-uniqueness invariant (only rows matching the
806            // WHERE predicate are checked for duplicates). mailrs
807            // K1 (3 hits: email_templates default, calendar_events
808            // master, calendar_events instance).
809            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("unique") => {
810                self.advance();
811                if !matches!(self.peek(), Token::Index) {
812                    return Err(self.err(alloc::format!(
813                        "expected INDEX after CREATE UNIQUE, got {:?}",
814                        self.peek()
815                    )));
816                }
817                self.parse_create_index_stmt_after_create(true)
818            }
819            Token::Publication => {
820                self.advance();
821                self.parse_create_publication_after_keyword()
822            }
823            Token::Subscription => {
824                self.advance();
825                self.parse_create_subscription_after_keyword()
826            }
827            // v4.1: CREATE USER 'name' WITH PASSWORD 'pw' [ROLE 'role'].
828            // USER isn't a reserved keyword — we look for the bare
829            // identifier so the lexer doesn't have to grow a token.
830            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("user") => {
831                self.advance();
832                self.parse_create_user_after_keyword()
833            }
834            // v7.9.15 — `CREATE EXTENSION [IF NOT EXISTS] <name>
835            // [WITH SCHEMA …] [VERSION '…'] [CASCADE]` as a
836            // no-op. mailrs follow-up F3.
837            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("extension") => {
838                self.advance();
839                self.parse_create_extension_after_keyword()
840            }
841            // v7.12.4 — `CREATE [OR REPLACE] FUNCTION …` and
842            // `CREATE [OR REPLACE] TRIGGER …`. `OR REPLACE` is
843            // optional; absorb it here and forward to the
844            // per-kind parsers with the flag. OR is a reserved
845            // keyword token.
846            Token::Or => {
847                self.advance();
848                let next = self.peek();
849                let (Token::Ident(s2) | Token::QuotedIdent(s2)) = next else {
850                    return Err(self.err(alloc::format!(
851                        "expected REPLACE after CREATE OR, got {next:?}"
852                    )));
853                };
854                if !s2.eq_ignore_ascii_case("replace") {
855                    return Err(self.err(alloc::format!(
856                        "expected REPLACE after CREATE OR, got {s2:?}"
857                    )));
858                }
859                self.advance();
860                self.parse_create_function_or_trigger_after_or_replace(true)
861            }
862            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("function") => {
863                self.advance();
864                self.parse_create_function_after_keyword(false)
865            }
866            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("trigger") => {
867                self.advance();
868                self.parse_create_trigger_after_keyword(false)
869            }
870            // v7.14.0 — pg_dump / mysqldump emit
871            // `CREATE SEQUENCE / SCHEMA / VIEW / MATERIALIZED VIEW
872            // / TYPE / DOMAIN / DATABASE / ROLE / POLICY / OPERATOR`.
873            // SPG is single-schema / single-database; these have
874            // no behavioural effect, so consume + return Empty.
875            Token::Ident(s) | Token::QuotedIdent(s)
876                if matches!(
877                    s.to_ascii_lowercase().as_str(),
878                    "sequence"
879                        | "schema"
880                        | "view"
881                        | "materialized"
882                        | "type"
883                        | "domain"
884                        | "database"
885                        | "role"
886                        | "policy"
887                        | "operator"
888                        | "cast"
889                        | "rule"
890                        | "aggregate"
891                        | "language"
892                        | "collation"
893                        | "conversion"
894                ) =>
895            {
896                self.consume_until_statement_boundary();
897                return Ok(Statement::Empty);
898            }
899            other => Err(self.err(format!(
900                "expected TABLE / INDEX / USER / EXTENSION / PUBLICATION / SUBSCRIPTION / FUNCTION / TRIGGER / SEQUENCE / SCHEMA / VIEW / TYPE / DOMAIN [OR REPLACE …] after CREATE, got {other:?}"
901            ))),
902        }
903    }
904
905    /// v7.12.4 — `CREATE OR REPLACE` already consumed; the next
906    /// keyword decides whether we parse a function or trigger
907    /// body. PG accepts other `OR REPLACE`-able objects (VIEW,
908    /// PROCEDURE) — those land in later releases.
909    fn parse_create_function_or_trigger_after_or_replace(
910        &mut self,
911        or_replace: bool,
912    ) -> Result<Statement, ParseError> {
913        let tok = self.peek();
914        let (Token::Ident(s) | Token::QuotedIdent(s)) = tok else {
915            return Err(self.err(alloc::format!(
916                "expected FUNCTION / TRIGGER after CREATE OR REPLACE, got {tok:?}"
917            )));
918        };
919        if s.eq_ignore_ascii_case("function") {
920            self.advance();
921            self.parse_create_function_after_keyword(or_replace)
922        } else if s.eq_ignore_ascii_case("trigger") {
923            self.advance();
924            self.parse_create_trigger_after_keyword(or_replace)
925        } else {
926            Err(self.err(alloc::format!(
927                "expected FUNCTION / TRIGGER after CREATE OR REPLACE, got {s:?}"
928            )))
929        }
930    }
931
932    /// v7.9.15 — accept and discard `CREATE EXTENSION` DDL.
933    /// SPG doesn't have a registry; pgvector / similar are
934    /// either builtin (VECTOR(N) ↔ pgvector) or n/a. Parsing
935    /// the syntax lets dual-target schemas keep the line.
936    fn parse_create_extension_after_keyword(&mut self) -> Result<Statement, ParseError> {
937        // Optional `IF NOT EXISTS`.
938        self.consume_if_not_exists();
939        let name = self.expect_ident_like()?;
940        // Drain optional WITH SCHEMA <ident> / VERSION '<v>' /
941        // CASCADE / FROM '<v>' clauses; we don't model them.
942        loop {
943            match self.peek() {
944                Token::Ident(s) if s.eq_ignore_ascii_case("with") => {
945                    self.advance();
946                    continue;
947                }
948                Token::Ident(s) if s.eq_ignore_ascii_case("schema") => {
949                    self.advance();
950                    let _ = self.expect_ident_like()?;
951                    continue;
952                }
953                Token::Ident(s) if s.eq_ignore_ascii_case("version") => {
954                    self.advance();
955                    // String or ident literal.
956                    let _ = self.advance();
957                    continue;
958                }
959                Token::Ident(s) if s.eq_ignore_ascii_case("from") => {
960                    self.advance();
961                    let _ = self.advance();
962                    continue;
963                }
964                Token::Ident(s) if s.eq_ignore_ascii_case("cascade") => {
965                    self.advance();
966                    continue;
967                }
968                _ => break,
969            }
970        }
971        Ok(Statement::CreateExtension(name))
972    }
973
974    /// v7.12.4 — body of `CREATE [OR REPLACE] FUNCTION`. The
975    /// `[OR REPLACE]` flag (and the `FUNCTION` keyword) have
976    /// already been consumed by the caller. Grammar accepted:
977    ///
978    ///   name `(` arg-list `)`
979    ///   `RETURNS` return-type
980    ///   [ `LANGUAGE` ident ]
981    ///   `AS` $$ body $$
982    ///   [ `LANGUAGE` ident ]
983    ///
984    /// Either `LANGUAGE` position is allowed; PG accepts both.
985    fn parse_create_function_after_keyword(
986        &mut self,
987        or_replace: bool,
988    ) -> Result<Statement, ParseError> {
989        let name = self.expect_ident_like()?;
990        // Argument list. v7.12.4 commonly sees the empty `()`
991        // (trigger functions); typed args parse and round-trip
992        // but the executor only invokes nullary functions.
993        if !matches!(self.peek(), Token::LParen) {
994            return Err(self.err(alloc::format!(
995                "expected '(' after function name {name:?}, got {:?}",
996                self.peek()
997            )));
998        }
999        self.advance();
1000        let args = self.parse_function_arg_list()?;
1001        // RETURNS clause.
1002        let tok = self.peek();
1003        let (Token::Ident(s) | Token::QuotedIdent(s)) = tok else {
1004            return Err(self.err(alloc::format!(
1005                "expected RETURNS after function arg list, got {tok:?}"
1006            )));
1007        };
1008        if !s.eq_ignore_ascii_case("returns") {
1009            return Err(self.err(alloc::format!(
1010                "expected RETURNS after function arg list, got {s:?}"
1011            )));
1012        }
1013        self.advance();
1014        let returns = self.parse_function_return()?;
1015        // Optional LANGUAGE clause (PG also accepts after AS — we'll
1016        // re-check after the body too).
1017        let mut language: Option<String> = self.parse_optional_language()?;
1018        // `AS` followed by a $$-quoted body (lexer already
1019        // collapses both `$$…$$` and `$tag$…$tag$` to a single
1020        // Token::String). AS is a reserved keyword (Token::As).
1021        if !matches!(self.peek(), Token::As) {
1022            return Err(self.err(alloc::format!(
1023                "expected AS before function body, got {:?}",
1024                self.peek()
1025            )));
1026        }
1027        self.advance();
1028        let body_text = match self.peek() {
1029            Token::String(s) => {
1030                let body = s.clone();
1031                self.advance();
1032                body
1033            }
1034            other => {
1035                return Err(self.err(alloc::format!(
1036                    "expected $$-quoted function body after AS, got {other:?}"
1037                )));
1038            }
1039        };
1040        // Trailing optional LANGUAGE clause (the other PG position).
1041        if language.is_none() {
1042            language = self.parse_optional_language()?;
1043        }
1044        let language = language.unwrap_or_else(|| String::from("sql"));
1045        // PL/pgSQL bodies get structure-parsed. Other languages
1046        // (or PL/pgSQL bodies the v7.12.4 parser doesn't yet
1047        // recognise) round-trip as Raw text — the executor errors
1048        // when invoked with a clear unsupported message.
1049        let body = if language.eq_ignore_ascii_case("plpgsql") {
1050            match parse_plpgsql_body(&body_text) {
1051                Ok(block) => FunctionBody::PlPgSql(block),
1052                // Best-effort: if the body parser doesn't yet
1053                // support a construct used inside, fall back to
1054                // raw — keeps `CREATE FUNCTION` itself working
1055                // (catalogue accepts), executor errors on
1056                // invocation only.
1057                Err(_) => FunctionBody::Raw(body_text),
1058            }
1059        } else {
1060            FunctionBody::Raw(body_text)
1061        };
1062        Ok(Statement::CreateFunction(CreateFunctionStatement {
1063            name,
1064            or_replace,
1065            args,
1066            returns,
1067            language,
1068            body,
1069        }))
1070    }
1071
1072    /// Closing `)`-terminated argument list. v7.12.4 commonly
1073    /// sees the empty `()`; typed args round-trip but the
1074    /// executor (yet) doesn't invoke them.
1075    fn parse_function_arg_list(&mut self) -> Result<Vec<FunctionArg>, ParseError> {
1076        let mut args: Vec<FunctionArg> = Vec::new();
1077        if matches!(self.peek(), Token::RParen) {
1078            self.advance();
1079            return Ok(args);
1080        }
1081        loop {
1082            // Optional `IN` / `OUT` / `INOUT` mode keyword. IN is
1083            // a reserved token; OUT / INOUT are bare idents.
1084            let mode = if matches!(self.peek(), Token::In) {
1085                self.advance();
1086                FunctionArgMode::In
1087            } else if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("out"))
1088            {
1089                self.advance();
1090                FunctionArgMode::Out
1091            } else if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("inout"))
1092            {
1093                self.advance();
1094                FunctionArgMode::InOut
1095            } else {
1096                FunctionArgMode::In
1097            };
1098            // Optional name. The next token is either a name
1099            // (followed by a type ident) or the type itself.
1100            // Disambiguate by peeking ahead: if the token after
1101            // the next ident is also an ident, we treat the
1102            // first as the name.
1103            let (name, ty_token) = {
1104                let first = self.expect_ident_like()?;
1105                // Peek next: if it's an ident (i.e. a type
1106                // name) the `first` was the arg name.
1107                match self.peek() {
1108                    Token::Ident(_) | Token::QuotedIdent(_) => {
1109                        let ty = self.expect_ident_like()?;
1110                        (Some(first), ty)
1111                    }
1112                    _ => (None, first),
1113                }
1114            };
1115            // Type — try to map to ColumnTypeName, else Raw.
1116            let ty = match map_type_ident_to_column_type_name(&ty_token) {
1117                Some(t) => FunctionArgType::Typed(t),
1118                None => FunctionArgType::Raw(ty_token),
1119            };
1120            args.push(FunctionArg { mode, name, ty });
1121            match self.peek() {
1122                Token::Comma => {
1123                    self.advance();
1124                    continue;
1125                }
1126                Token::RParen => {
1127                    self.advance();
1128                    return Ok(args);
1129                }
1130                other => {
1131                    return Err(self.err(alloc::format!(
1132                        "expected , or ) in function arg list, got {other:?}"
1133                    )));
1134                }
1135            }
1136        }
1137    }
1138
1139    fn parse_function_return(&mut self) -> Result<FunctionReturn, ParseError> {
1140        let ident = self.expect_ident_like()?;
1141        if ident.eq_ignore_ascii_case("trigger") {
1142            return Ok(FunctionReturn::Trigger);
1143        }
1144        if ident.eq_ignore_ascii_case("void") {
1145            return Ok(FunctionReturn::Void);
1146        }
1147        match map_type_ident_to_column_type_name(&ident) {
1148            Some(t) => Ok(FunctionReturn::Type(t)),
1149            None => Ok(FunctionReturn::Other(ident)),
1150        }
1151    }
1152
1153    fn parse_optional_language(&mut self) -> Result<Option<String>, ParseError> {
1154        match self.peek() {
1155            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("language") => {
1156                self.advance();
1157                let lang = self.expect_ident_like()?;
1158                Ok(Some(lang.to_ascii_lowercase()))
1159            }
1160            _ => Ok(None),
1161        }
1162    }
1163
1164    /// v7.12.4 — body of `CREATE [OR REPLACE] TRIGGER`. The
1165    /// `[OR REPLACE]` flag and the `TRIGGER` keyword have already
1166    /// been consumed.
1167    fn parse_create_trigger_after_keyword(
1168        &mut self,
1169        or_replace: bool,
1170    ) -> Result<Statement, ParseError> {
1171        let name = self.expect_ident_like()?;
1172        let timing = {
1173            let ident = self.expect_ident_like()?;
1174            if ident.eq_ignore_ascii_case("before") {
1175                TriggerTiming::Before
1176            } else if ident.eq_ignore_ascii_case("after") {
1177                TriggerTiming::After
1178            } else if ident.eq_ignore_ascii_case("instead") {
1179                let next = self.expect_ident_like()?;
1180                if !next.eq_ignore_ascii_case("of") {
1181                    return Err(self.err(alloc::format!(
1182                        "expected OF after INSTEAD in trigger timing, got {next:?}"
1183                    )));
1184                }
1185                TriggerTiming::InsteadOf
1186            } else {
1187                return Err(self.err(alloc::format!(
1188                    "expected BEFORE / AFTER / INSTEAD OF in trigger timing, got {ident:?}"
1189                )));
1190            }
1191        };
1192        // Events: INSERT [ OR UPDATE [ OR DELETE [ OR TRUNCATE ] ] ].
1193        // OR is a reserved keyword token (Token::Or), not an Ident.
1194        // v7.13.0 — after an UPDATE event we may optionally see
1195        // `OF col, col, …` (mailrs round-5 G7). Columns are
1196        // captured into `update_columns` once across the whole
1197        // events list; multiple `UPDATE OF` clauses are rejected.
1198        let mut events: Vec<TriggerEvent> = Vec::new();
1199        let mut update_columns: Vec<String> = Vec::new();
1200        let (first_ev, first_cols) = self.parse_trigger_event_with_optional_of()?;
1201        events.push(first_ev);
1202        if !first_cols.is_empty() {
1203            update_columns = first_cols;
1204        }
1205        while matches!(self.peek(), Token::Or) {
1206            self.advance();
1207            let (ev, cols) = self.parse_trigger_event_with_optional_of()?;
1208            events.push(ev);
1209            if !cols.is_empty() {
1210                if !update_columns.is_empty() {
1211                    return Err(self.err(
1212                        "CREATE TRIGGER: `UPDATE OF cols` may appear at most once".into(),
1213                    ));
1214                }
1215                update_columns = cols;
1216            }
1217        }
1218        // ON <table>
1219        let tok = self.peek();
1220        let Token::On = tok else {
1221            return Err(self.err(alloc::format!(
1222                "expected ON after trigger events, got {tok:?}"
1223            )));
1224        };
1225        self.advance();
1226        let table = self.expect_ident_like()?;
1227        // FOR EACH ROW / FOR EACH STATEMENT. FOR is a reserved
1228        // keyword (Token::For); EACH / ROW / STATEMENT are bare
1229        // idents.
1230        if !matches!(self.peek(), Token::For) {
1231            return Err(self.err(alloc::format!(
1232                "expected FOR EACH ROW / STATEMENT, got {:?}",
1233                self.peek()
1234            )));
1235        }
1236        self.advance();
1237        let for_each = {
1238            let e = self.expect_ident_like()?;
1239            if !e.eq_ignore_ascii_case("each") {
1240                return Err(self.err(alloc::format!("expected EACH after FOR, got {e:?}")));
1241            }
1242            let unit = self.expect_ident_like()?;
1243            if unit.eq_ignore_ascii_case("row") {
1244                TriggerForEach::Row
1245            } else if unit.eq_ignore_ascii_case("statement") {
1246                TriggerForEach::Statement
1247            } else {
1248                return Err(self.err(alloc::format!(
1249                    "expected ROW / STATEMENT after FOR EACH, got {unit:?}"
1250                )));
1251            }
1252        };
1253        // EXECUTE FUNCTION/PROCEDURE name(...)
1254        let exec = self.expect_ident_like()?;
1255        if !exec.eq_ignore_ascii_case("execute") {
1256            return Err(self.err(alloc::format!(
1257                "expected EXECUTE FUNCTION/PROCEDURE in CREATE TRIGGER, got {exec:?}"
1258            )));
1259        }
1260        let fn_or_proc = self.expect_ident_like()?;
1261        if !(fn_or_proc.eq_ignore_ascii_case("function")
1262            || fn_or_proc.eq_ignore_ascii_case("procedure"))
1263        {
1264            return Err(self.err(alloc::format!(
1265                "expected FUNCTION / PROCEDURE after EXECUTE, got {fn_or_proc:?}"
1266            )));
1267        }
1268        let function = self.expect_ident_like()?;
1269        // Optional empty arg list `()`.
1270        if matches!(self.peek(), Token::LParen) {
1271            self.advance();
1272            if !matches!(self.peek(), Token::RParen) {
1273                return Err(self.err(alloc::format!(
1274                    "v7.12.4 trigger function calls take no args; got {:?}",
1275                    self.peek()
1276                )));
1277            }
1278            self.advance();
1279        }
1280        Ok(Statement::CreateTrigger(CreateTriggerStatement {
1281            name,
1282            or_replace,
1283            timing,
1284            events,
1285            table,
1286            for_each,
1287            function,
1288            update_columns,
1289        }))
1290    }
1291
1292    /// v7.13.0 — parse one trigger event, then optionally consume
1293    /// `OF col, col, …` after `UPDATE` (mailrs round-5 G7). Other
1294    /// events (INSERT/DELETE/TRUNCATE) don't accept the OF tail.
1295    fn parse_trigger_event_with_optional_of(
1296        &mut self,
1297    ) -> Result<(TriggerEvent, Vec<String>), ParseError> {
1298        let ev = self.parse_trigger_event()?;
1299        if !matches!(ev, TriggerEvent::Update) {
1300            return Ok((ev, Vec::new()));
1301        }
1302        // `OF` is a bare ident.
1303        if !matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("of")) {
1304            return Ok((ev, Vec::new()));
1305        }
1306        self.advance(); // OF
1307        let mut cols: Vec<String> = Vec::new();
1308        loop {
1309            cols.push(self.expect_ident_like()?);
1310            if matches!(self.peek(), Token::Comma) {
1311                self.advance();
1312                continue;
1313            }
1314            break;
1315        }
1316        if cols.is_empty() {
1317            return Err(self.err(
1318                "CREATE TRIGGER: `UPDATE OF` requires at least one column name".into(),
1319            ));
1320        }
1321        Ok((ev, cols))
1322    }
1323
1324    /// v7.12.4 — `BEGIN stmt; stmt; … END[;]` PL/pgSQL block.
1325    /// v7.12.6 — optional `DECLARE var TYPE [:= init];` prelude
1326    /// before `BEGIN`, and IF / RAISE / embedded SQL statements
1327    /// inside the body.
1328    /// Called by [`parse_plpgsql_body`] after the body's tokens
1329    /// have been lexed into this temporary parser.
1330    pub(crate) fn parse_plpgsql_block(&mut self) -> Result<PlPgSqlBlock, ParseError> {
1331        // v7.12.6 — optional DECLARE prelude.
1332        let declarations = if matches!(
1333            self.peek(),
1334            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("declare")
1335        ) {
1336            self.advance();
1337            self.parse_plpgsql_declare_block()?
1338        } else {
1339            Vec::new()
1340        };
1341        // BEGIN keyword (PL/pgSQL — distinct from the SQL
1342        // `BEGIN` transaction-start, but we can reuse the
1343        // reserved Token::Begin since the body is a separate
1344        // lex/parse context).
1345        if !matches!(self.peek(), Token::Begin) {
1346            return Err(self.err(alloc::format!(
1347                "expected BEGIN at start of plpgsql block, got {:?}",
1348                self.peek()
1349            )));
1350        }
1351        self.advance();
1352        let statements = self.parse_plpgsql_stmt_list_until_end()?;
1353        Ok(PlPgSqlBlock {
1354            declarations,
1355            statements,
1356        })
1357    }
1358
1359    /// v7.12.6 — parse the `DECLARE ... [var TYPE [:= init];]+`
1360    /// prelude. Caller has already consumed `DECLARE`. We stop
1361    /// reading entries when we hit `BEGIN`.
1362    fn parse_plpgsql_declare_block(&mut self) -> Result<Vec<PlPgSqlDeclare>, ParseError> {
1363        let mut out: Vec<PlPgSqlDeclare> = Vec::new();
1364        loop {
1365            if matches!(self.peek(), Token::Begin) {
1366                return Ok(out);
1367            }
1368            let name = self.expect_ident_like()?;
1369            let ty_token = self.expect_ident_like()?;
1370            let ty = match map_type_ident_to_column_type_name(&ty_token) {
1371                Some(t) => FunctionArgType::Typed(t),
1372                None => FunctionArgType::Raw(ty_token),
1373            };
1374            let default = match self.peek() {
1375                Token::ColonEq => {
1376                    self.advance();
1377                    Some(self.parse_expr(0)?)
1378                }
1379                Token::Eq => {
1380                    // PL/pgSQL also accepts `=` for the
1381                    // DECLARE default (PG treats them the same
1382                    // in this position).
1383                    self.advance();
1384                    Some(self.parse_expr(0)?)
1385                }
1386                _ => None,
1387            };
1388            // Mandatory `;` between declarations.
1389            if !matches!(self.peek(), Token::Semicolon) {
1390                return Err(self.err(alloc::format!(
1391                    "expected ; after DECLARE entry for {name:?}, got {:?}",
1392                    self.peek()
1393                )));
1394            }
1395            self.advance();
1396            out.push(PlPgSqlDeclare { name, ty, default });
1397        }
1398    }
1399
1400    /// v7.12.6 — parse PL/pgSQL statements up to (and consuming)
1401    /// the terminating `END;` (or `END IF;` etc — handled by the
1402    /// per-construct sub-parsers). Used by both the outer block
1403    /// and the IF/ELSE branch bodies.
1404    fn parse_plpgsql_stmt_list_until_end(&mut self) -> Result<Vec<PlPgSqlStmt>, ParseError> {
1405        let mut statements: Vec<PlPgSqlStmt> = Vec::new();
1406        loop {
1407            // Allow trailing semicolons + END.
1408            while matches!(self.peek(), Token::Semicolon) {
1409                self.advance();
1410            }
1411            // END / ELSE / ELSIF — handled by the caller.
1412            if matches!(
1413                self.peek(),
1414                Token::Ident(s) | Token::QuotedIdent(s)
1415                    if s.eq_ignore_ascii_case("end")
1416                        || s.eq_ignore_ascii_case("else")
1417                        || s.eq_ignore_ascii_case("elsif")
1418                        || s.eq_ignore_ascii_case("elseif")
1419            ) {
1420                return Ok(statements);
1421            }
1422            // Otherwise: one statement, then expect `;` or
1423            // a block-terminator keyword.
1424            let stmt = self.parse_plpgsql_stmt()?;
1425            statements.push(stmt);
1426            match self.peek() {
1427                Token::Semicolon => {
1428                    self.advance();
1429                }
1430                Token::Ident(s) | Token::QuotedIdent(s)
1431                    if s.eq_ignore_ascii_case("end")
1432                        || s.eq_ignore_ascii_case("else")
1433                        || s.eq_ignore_ascii_case("elsif")
1434                        || s.eq_ignore_ascii_case("elseif") =>
1435                {
1436                    // Final statement of the block without `;`.
1437                }
1438                other => {
1439                    return Err(self.err(alloc::format!(
1440                        "expected ; or END/ELSE/ELSIF after plpgsql statement, got {other:?}"
1441                    )));
1442                }
1443            }
1444        }
1445    }
1446
1447    fn parse_plpgsql_stmt(&mut self) -> Result<PlPgSqlStmt, ParseError> {
1448        // RETURN keyword?
1449        if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("return"))
1450        {
1451            self.advance();
1452            return self.parse_plpgsql_return();
1453        }
1454        // v7.12.6 — IF block.
1455        if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("if"))
1456        {
1457            self.advance();
1458            return self.parse_plpgsql_if();
1459        }
1460        // v7.12.6 — RAISE.
1461        if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("raise"))
1462        {
1463            self.advance();
1464            return self.parse_plpgsql_raise();
1465        }
1466        // v7.12.6 — embedded SQL statements. INSERT/UPDATE/DELETE/
1467        // SELECT can appear directly inside a trigger body; we
1468        // recurse into the regular Statement parser, which will
1469        // stop at the trailing `;` (which our caller then
1470        // consumes).
1471        if matches!(self.peek(), Token::Insert)
1472            || matches!(self.peek(), Token::Select)
1473            || matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("update") || s.eq_ignore_ascii_case("delete"))
1474        {
1475            let stmt = self.parse_one_statement()?;
1476            return Ok(PlPgSqlStmt::EmbeddedSql(Box::new(stmt)));
1477        }
1478        // Otherwise: assignment. `NEW.col` / `OLD.col` / `var`
1479        // followed by `:=` and an expression.
1480        let target = self.parse_plpgsql_assign_target()?;
1481        // PL/pgSQL assignment uses `:=`. The lexer represents
1482        // this as a colon followed by `=`; check both shapes.
1483        match self.peek() {
1484            Token::ColonEq => {
1485                self.advance();
1486            }
1487            Token::Colon => {
1488                self.advance();
1489                if !matches!(self.peek(), Token::Eq) {
1490                    return Err(self.err(alloc::format!(
1491                        "expected := after plpgsql assign target, got `:` then {:?}",
1492                        self.peek()
1493                    )));
1494                }
1495                self.advance();
1496            }
1497            other => {
1498                return Err(self.err(alloc::format!(
1499                    "expected := after plpgsql assign target, got {other:?}"
1500                )));
1501            }
1502        }
1503        let value = self.parse_expr(0)?;
1504        Ok(PlPgSqlStmt::Assign { target, value })
1505    }
1506
1507    /// v7.12.6 — `IF cond THEN body [ELSIF cond THEN body]*
1508    /// [ELSE body] END IF`. `IF` keyword already consumed.
1509    fn parse_plpgsql_if(&mut self) -> Result<PlPgSqlStmt, ParseError> {
1510        let mut branches: Vec<(Expr, Vec<PlPgSqlStmt>)> = Vec::new();
1511        let mut else_branch: Vec<PlPgSqlStmt> = Vec::new();
1512        loop {
1513            // <expr> THEN
1514            let cond = self.parse_expr(0)?;
1515            let then_kw = self.expect_ident_like()?;
1516            if !then_kw.eq_ignore_ascii_case("then") {
1517                return Err(self.err(alloc::format!(
1518                    "expected THEN after IF/ELSIF condition, got {then_kw:?}"
1519                )));
1520            }
1521            let body = self.parse_plpgsql_stmt_list_until_end()?;
1522            branches.push((cond, body));
1523            // Look at terminator: ELSIF/ELSEIF, ELSE, or END IF.
1524            match self.peek() {
1525                Token::Ident(s) | Token::QuotedIdent(s)
1526                    if s.eq_ignore_ascii_case("elsif") || s.eq_ignore_ascii_case("elseif") =>
1527                {
1528                    self.advance();
1529                    continue;
1530                }
1531                Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("else") => {
1532                    self.advance();
1533                    else_branch = self.parse_plpgsql_stmt_list_until_end()?;
1534                    break;
1535                }
1536                Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("end") => {
1537                    break;
1538                }
1539                other => {
1540                    return Err(self.err(alloc::format!(
1541                        "expected ELSIF / ELSE / END after IF branch body, got {other:?}"
1542                    )));
1543                }
1544            }
1545        }
1546        // Expect `END IF` (the END keyword is the one we're
1547        // looking at right now).
1548        let end_kw = self.expect_ident_like()?;
1549        if !end_kw.eq_ignore_ascii_case("end") {
1550            return Err(self.err(alloc::format!("expected END IF, got {end_kw:?}")));
1551        }
1552        let if_kw = self.expect_ident_like()?;
1553        if !if_kw.eq_ignore_ascii_case("if") {
1554            return Err(self.err(alloc::format!("expected END IF, got END {if_kw:?}")));
1555        }
1556        Ok(PlPgSqlStmt::If {
1557            branches,
1558            else_branch,
1559        })
1560    }
1561
1562    /// v7.12.6 — `RAISE { NOTICE | WARNING | INFO | LOG | DEBUG
1563    /// | EXCEPTION } '<message>' [, args]*`. The `RAISE` keyword
1564    /// is already consumed.
1565    fn parse_plpgsql_raise(&mut self) -> Result<PlPgSqlStmt, ParseError> {
1566        let lvl_ident = self.expect_ident_like()?;
1567        let level = match lvl_ident.to_ascii_lowercase().as_str() {
1568            "notice" => RaiseLevel::Notice,
1569            "warning" => RaiseLevel::Warning,
1570            "info" => RaiseLevel::Info,
1571            "log" => RaiseLevel::Log,
1572            "debug" => RaiseLevel::Debug,
1573            "exception" => RaiseLevel::Exception,
1574            other => {
1575                return Err(self.err(alloc::format!(
1576                    "expected RAISE level (NOTICE/WARNING/INFO/LOG/DEBUG/EXCEPTION), got {other:?}"
1577                )));
1578            }
1579        };
1580        // Message: required for v7.12.6. PG accepts a bare
1581        // RAISE-rethrow form (no message), reserved for future
1582        // RAISE-no-args support.
1583        let Token::String(msg) = self.peek() else {
1584            return Err(self.err(alloc::format!(
1585                "expected RAISE message string, got {:?}",
1586                self.peek()
1587            )));
1588        };
1589        let message = msg.clone();
1590        self.advance();
1591        // Optional comma-separated args (PG `%` format substitution).
1592        let mut args: Vec<Expr> = Vec::new();
1593        while matches!(self.peek(), Token::Comma) {
1594            self.advance();
1595            args.push(self.parse_expr(0)?);
1596        }
1597        Ok(PlPgSqlStmt::Raise {
1598            level,
1599            message,
1600            args,
1601        })
1602    }
1603
1604    fn parse_plpgsql_assign_target(&mut self) -> Result<AssignTarget, ParseError> {
1605        let head = self.expect_ident_like()?;
1606        if matches!(self.peek(), Token::Dot) {
1607            self.advance();
1608            let col = self.expect_ident_like()?;
1609            if head.eq_ignore_ascii_case("new") {
1610                return Ok(AssignTarget::NewColumn(col));
1611            }
1612            if head.eq_ignore_ascii_case("old") {
1613                return Ok(AssignTarget::OldColumn(col));
1614            }
1615            return Err(self.err(alloc::format!(
1616                "v7.12.4 plpgsql assign target must be NEW.<col> / OLD.<col> / <local_var>; \
1617                 got {head:?}.<col>"
1618            )));
1619        }
1620        Ok(AssignTarget::Local(head))
1621    }
1622
1623    fn parse_plpgsql_return(&mut self) -> Result<PlPgSqlStmt, ParseError> {
1624        // RETURN NEW / OLD / NULL — bare-ident forms.
1625        match self.peek() {
1626            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("new") => {
1627                self.advance();
1628                return Ok(PlPgSqlStmt::Return(ReturnTarget::New));
1629            }
1630            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("old") => {
1631                self.advance();
1632                return Ok(PlPgSqlStmt::Return(ReturnTarget::Old));
1633            }
1634            Token::Null => {
1635                self.advance();
1636                return Ok(PlPgSqlStmt::Return(ReturnTarget::Null));
1637            }
1638            // Bare `RETURN;` (no value) — treated as `RETURN NULL`
1639            // per PL/pgSQL convention.
1640            Token::Semicolon => {
1641                return Ok(PlPgSqlStmt::Return(ReturnTarget::Null));
1642            }
1643            _ => {}
1644        }
1645        // Fall through: parse a full expression.
1646        let e = self.parse_expr(0)?;
1647        Ok(PlPgSqlStmt::Return(ReturnTarget::Expr(e)))
1648    }
1649
1650    fn parse_trigger_event(&mut self) -> Result<TriggerEvent, ParseError> {
1651        // INSERT is a reserved Token; UPDATE / DELETE / TRUNCATE
1652        // are ident-shaped (the parser keys off case-insensitive
1653        // match — same shape used by the top-level Update / Delete
1654        // dispatchers at parse_one_statement).
1655        if matches!(self.peek(), Token::Insert) {
1656            self.advance();
1657            return Ok(TriggerEvent::Insert);
1658        }
1659        match self.peek() {
1660            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("update") => {
1661                self.advance();
1662                Ok(TriggerEvent::Update)
1663            }
1664            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("delete") => {
1665                self.advance();
1666                Ok(TriggerEvent::Delete)
1667            }
1668            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("truncate") => {
1669                self.advance();
1670                Ok(TriggerEvent::Truncate)
1671            }
1672            other => Err(self.err(alloc::format!(
1673                "expected INSERT / UPDATE / DELETE / TRUNCATE in trigger event list, got {other:?}"
1674            ))),
1675        }
1676    }
1677
1678    /// v6.1.2 → v6.1.3 — `CREATE PUBLICATION <name>` body. Accepts:
1679    ///   - (no clause) → implicit `FOR ALL TABLES`
1680    ///   - `FOR ALL TABLES`
1681    ///   - `FOR ALL TABLES EXCEPT t1, t2, …` (v6.1.3)
1682    ///   - `FOR TABLE t1, t2, …` (v6.1.3) — `FOR TABLES …` also
1683    ///     accepted (PG accepts both forms in PG 19).
1684    fn parse_create_publication_after_keyword(&mut self) -> Result<Statement, ParseError> {
1685        let name = self.expect_ident_or_string()?;
1686        // Bare DDL maps to FOR ALL TABLES — matches the v6.1.2
1687        // shape so existing publications keep parsing identically.
1688        let scope = if matches!(self.peek(), Token::For) {
1689            self.advance();
1690            if matches!(self.peek(), Token::All) {
1691                self.advance();
1692                if !matches!(self.peek(), Token::Tables) {
1693                    return Err(self.err(format!(
1694                        "expected TABLES after FOR ALL, got {:?}",
1695                        self.peek()
1696                    )));
1697                }
1698                self.advance();
1699                if matches!(self.peek(), Token::Except) {
1700                    self.advance();
1701                    let tables = self.parse_publication_table_list()?;
1702                    PublicationScope::AllTablesExcept(tables)
1703                } else {
1704                    PublicationScope::AllTables
1705                }
1706            } else if matches!(self.peek(), Token::Table | Token::Tables) {
1707                // PG 19 accepts both `FOR TABLE …` (singular) and
1708                // `FOR TABLES …` (plural); SPG matches.
1709                self.advance();
1710                let tables = self.parse_publication_table_list()?;
1711                PublicationScope::ForTables(tables)
1712            } else {
1713                return Err(self.err(format!(
1714                    "expected ALL TABLES or TABLE <list> after FOR, got {:?}",
1715                    self.peek()
1716                )));
1717            }
1718        } else {
1719            PublicationScope::AllTables
1720        };
1721        Ok(Statement::CreatePublication(CreatePublicationStatement {
1722            name,
1723            scope,
1724        }))
1725    }
1726
1727    /// v6.1.3 — Comma-separated identifier list for the publication
1728    /// FOR-clause. Requires at least one entry; empty list is a
1729    /// parse error (PG behaviour). Quoted idents are accepted; the
1730    /// names round-trip through `Display` as `quote_ident(name)`.
1731    fn parse_publication_table_list(&mut self) -> Result<Vec<String>, ParseError> {
1732        let first = self.expect_ident_like()?;
1733        let mut out = alloc::vec![first];
1734        while matches!(self.peek(), Token::Comma) {
1735            self.advance();
1736            out.push(self.expect_ident_like()?);
1737        }
1738        Ok(out)
1739    }
1740
1741    /// v6.1.4 — `CREATE SUBSCRIPTION <name>
1742    ///                 CONNECTION '<conn>'
1743    ///                 PUBLICATION <pub> [, <pub> ...]`.
1744    ///
1745    /// The clause order is fixed (CONNECTION first, then
1746    /// PUBLICATION) to match PG. No WITH-options accepted in
1747    /// v6.1.4 — `enabled` defaults to true, no other knobs ship.
1748    fn parse_create_subscription_after_keyword(&mut self) -> Result<Statement, ParseError> {
1749        let name = self.expect_ident_or_string()?;
1750        if !matches!(self.peek(), Token::Connection) {
1751            return Err(self.err(format!(
1752                "expected CONNECTION after CREATE SUBSCRIPTION <name>, got {:?}",
1753                self.peek()
1754            )));
1755        }
1756        self.advance();
1757        let conn_str = self.expect_string_literal()?;
1758        if !matches!(self.peek(), Token::Publication) {
1759            return Err(self.err(format!(
1760                "expected PUBLICATION after CONNECTION '<conn>', got {:?}",
1761                self.peek()
1762            )));
1763        }
1764        self.advance();
1765        // Reuse the publication FOR-list parser shape: at least one
1766        // identifier, comma-separated.
1767        let first = self.expect_ident_like()?;
1768        let mut publications = alloc::vec![first];
1769        while matches!(self.peek(), Token::Comma) {
1770            self.advance();
1771            publications.push(self.expect_ident_like()?);
1772        }
1773        Ok(Statement::CreateSubscription(CreateSubscriptionStatement {
1774            name,
1775            conn_str,
1776            publications,
1777        }))
1778    }
1779
1780    /// v6.1.7 — `WAIT FOR WAL POSITION <pos> [WITH TIMEOUT <ms>]`.
1781    /// All keywords after `WAIT` are bare idents in v6.1.x; no
1782    /// lexer churn. Both `<pos>` and `<ms>` are positive integers
1783    /// that fit `u64`.
1784    /// v7.12.1 — parameter name in `SET <name>` may be dotted
1785    /// (`pg_catalog.default_text_search_config` etc).
1786    fn parse_set_param_name(&mut self) -> Result<String, ParseError> {
1787        let mut name = self.expect_ident_like()?;
1788        while matches!(self.peek(), Token::Dot) {
1789            self.advance();
1790            let next = self.expect_ident_like()?;
1791            name.push('.');
1792            name.push_str(&next);
1793        }
1794        Ok(name.to_ascii_lowercase())
1795    }
1796
1797    fn parse_set_value(&mut self) -> Result<crate::ast::SetValue, ParseError> {
1798        match self.advance() {
1799            Token::String(s) => Ok(crate::ast::SetValue::String(s)),
1800            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("default") => {
1801                Ok(crate::ast::SetValue::Default)
1802            }
1803            Token::Ident(s) | Token::QuotedIdent(s) => {
1804                let mut accum = s;
1805                while matches!(self.peek(), Token::Dot) {
1806                    self.advance();
1807                    let next = self.expect_ident_like()?;
1808                    accum.push('.');
1809                    accum.push_str(&next);
1810                }
1811                Ok(crate::ast::SetValue::Ident(accum))
1812            }
1813            Token::Integer(n) => Ok(crate::ast::SetValue::Number(n.to_string())),
1814            Token::Float(f) => Ok(crate::ast::SetValue::Number(f.to_string())),
1815            // v7.14.0 — MySQL session/user variable RHS
1816            // (e.g. `SET OLD_FOREIGN_KEY_CHECKS = @@FOREIGN_KEY_CHECKS`).
1817            // Wrap as Ident so the SET handler can record it; the
1818            // engine treats `@VAR` / `@@VAR` values as opaque
1819            // strings.
1820            Token::SessionVar(s) => Ok(crate::ast::SetValue::Ident(s)),
1821            // v7.14.0 — `SET sql_mode = 'NO_AUTO_VALUE_ON_ZERO,STRICT_TRANS_TABLES'`
1822            // is the common MySQL preamble shape. Allow a `+` or
1823            // `-` prefix on negative numerics for parity with PG
1824            // (some param defaults are negative).
1825            Token::Minus => match self.advance() {
1826                Token::Integer(n) => Ok(crate::ast::SetValue::Number(alloc::format!("-{n}"))),
1827                Token::Float(f) => Ok(crate::ast::SetValue::Number(alloc::format!("-{f}"))),
1828                other => Err(self.err(format!(
1829                    "expected numeric after `-` in SET value, got {other:?}"
1830                ))),
1831            },
1832            other => Err(self.err(format!(
1833                "expected literal, identifier, or DEFAULT after `=` in SET, got {other:?}"
1834            ))),
1835        }
1836    }
1837
1838    fn parse_wait_after_keyword(&mut self) -> Result<Statement, ParseError> {
1839        // FOR is a v6.1.2-reserved keyword (Token::For). The
1840        // other two are bare idents — they've never needed lexer
1841        // support and we keep it that way.
1842        if !matches!(self.peek(), Token::For) {
1843            return Err(self.err(format!("expected FOR after WAIT, got {:?}", self.peek())));
1844        }
1845        self.advance();
1846        self.expect_keyword_ident("wal")?;
1847        self.expect_keyword_ident("position")?;
1848        let pos = self.expect_u64_literal()?;
1849        let timeout_ms = if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("with"))
1850        {
1851            self.advance();
1852            self.expect_keyword_ident("timeout")?;
1853            Some(self.expect_u64_literal()?)
1854        } else {
1855            None
1856        };
1857        Ok(Statement::WaitForWalPosition { pos, timeout_ms })
1858    }
1859
1860    /// v6.1.7 helper — consume a `Token::Integer` and check it
1861    /// fits `u64`. WAL positions and millisecond timeouts are
1862    /// non-negative.
1863    fn expect_u64_literal(&mut self) -> Result<u64, ParseError> {
1864        match self.advance() {
1865            Token::Integer(n) if n >= 0 => Ok(n as u64),
1866            Token::Integer(n) => Err(ParseError {
1867                message: format!("expected non-negative integer, got {n}"),
1868                token_pos: self.pos.saturating_sub(1),
1869            }),
1870            other => Err(ParseError {
1871                message: format!("expected integer literal, got {other:?}"),
1872                token_pos: self.pos.saturating_sub(1),
1873            }),
1874        }
1875    }
1876
1877    /// `CREATE USER` body — name + WITH PASSWORD '<pw>' + optional
1878    /// ROLE '<role>' (defaults to readonly). All string slots accept
1879    /// either a quoted ident or a quoted string literal.
1880    fn parse_create_user_after_keyword(&mut self) -> Result<Statement, ParseError> {
1881        let name = self.expect_ident_or_string()?;
1882        self.expect_keyword_ident("with")?;
1883        self.expect_keyword_ident("password")?;
1884        let password = self.expect_string_literal()?;
1885        let role = if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
1886            && s.eq_ignore_ascii_case("role")
1887        {
1888            self.advance();
1889            self.expect_string_literal()?
1890        } else {
1891            "readonly".to_string()
1892        };
1893        Ok(Statement::CreateUser(crate::ast::CreateUserStatement {
1894            name,
1895            password,
1896            role,
1897        }))
1898    }
1899
1900    /// v4.4 `UPDATE <table> SET col = expr [, col = expr]* [WHERE cond]`.
1901    /// Caller already consumed the leading `UPDATE` ident.
1902    fn parse_update_after_keyword(&mut self) -> Result<Statement, ParseError> {
1903        let table = self.expect_ident_like()?;
1904        self.expect_keyword_ident("set")?;
1905        let mut assignments = Vec::new();
1906        loop {
1907            let col = self.expect_ident_like()?;
1908            if !matches!(self.peek(), Token::Eq) {
1909                return Err(self.err(format!(
1910                    "expected `=` after column name in UPDATE SET, got {:?}",
1911                    self.peek()
1912                )));
1913            }
1914            self.advance();
1915            let value = self.parse_expr(0)?;
1916            assignments.push((col, value));
1917            if matches!(self.peek(), Token::Comma) {
1918                self.advance();
1919                continue;
1920            }
1921            break;
1922        }
1923        let where_ = if matches!(self.peek(), Token::Where) {
1924            self.advance();
1925            Some(self.parse_expr(0)?)
1926        } else {
1927            None
1928        };
1929        let returning = self.parse_optional_returning()?;
1930        Ok(Statement::Update(crate::ast::UpdateStatement {
1931            table,
1932            assignments,
1933            where_,
1934            returning,
1935        }))
1936    }
1937
1938    /// v4.4 `DELETE FROM <table> [WHERE cond]`. Caller already consumed
1939    /// the leading `DELETE` ident.
1940    fn parse_delete_after_keyword(&mut self) -> Result<Statement, ParseError> {
1941        if !matches!(self.peek(), Token::From) {
1942            return Err(self.err(format!("expected FROM after DELETE, got {:?}", self.peek())));
1943        }
1944        self.advance();
1945        let table = self.expect_ident_like()?;
1946        let where_ = if matches!(self.peek(), Token::Where) {
1947            self.advance();
1948            Some(self.parse_expr(0)?)
1949        } else {
1950            None
1951        };
1952        let returning = self.parse_optional_returning()?;
1953        Ok(Statement::Delete(crate::ast::DeleteStatement {
1954            table,
1955            where_,
1956            returning,
1957        }))
1958    }
1959
1960    /// v7.9.4 — parse the optional trailing `RETURNING <projection>`
1961    /// clause on INSERT / UPDATE / DELETE. Same projection grammar
1962    /// as SELECT, so `RETURNING *`, `RETURNING col`,
1963    /// `RETURNING expr AS alias`, and `RETURNING a, b, c` all work.
1964    fn parse_optional_returning(
1965        &mut self,
1966    ) -> Result<Option<Vec<crate::ast::SelectItem>>, ParseError> {
1967        let is_returning_kw = matches!(
1968            self.peek(),
1969            Token::Ident(s) if s.eq_ignore_ascii_case("returning")
1970        );
1971        if !is_returning_kw {
1972            return Ok(None);
1973        }
1974        self.advance();
1975        let mut items = Vec::new();
1976        loop {
1977            items.push(self.parse_select_item()?);
1978            if matches!(self.peek(), Token::Comma) {
1979                self.advance();
1980                continue;
1981            }
1982            break;
1983        }
1984        Ok(Some(items))
1985    }
1986
1987    /// v6.0.4 — parse the tail of an ALTER statement after the
1988    /// leading `ALTER` keyword has been consumed. Only one form is
1989    /// supported in v6.0.4:
1990    ///
1991    /// ```text
1992    /// ALTER INDEX <name> REBUILD [WITH (encoding = <enc>)]
1993    /// ```
1994    fn parse_alter_after_keyword(&mut self) -> Result<Statement, ParseError> {
1995        // ALTER INDEX <name> ... | ALTER TABLE <name> SET hot_tier_bytes = <n>
1996        // v7.14.0 — `ALTER TABLE ONLY` modifier (PG partition-
1997        // exclusion) is accepted by stripping the `ONLY` keyword
1998        // before the table parse.
1999        // v7.14.0 — `ALTER SEQUENCE / ALTER VIEW / ALTER OWNER`
2000        // and the long PG-dump tail are accepted as no-ops.
2001        match self.advance() {
2002            Token::Index => {}
2003            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("index") => {}
2004            // v6.7.2 — ALTER TABLE t SET hot_tier_bytes = X
2005            // v7.14.0 — ALTER TABLE ONLY t … strip the `ONLY`.
2006            Token::Table => {
2007                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("only")) {
2008                    self.advance();
2009                }
2010                return self.parse_alter_table_after_keyword();
2011            }
2012            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("table") => {
2013                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("only")) {
2014                    self.advance();
2015                }
2016                return self.parse_alter_table_after_keyword();
2017            }
2018            // v7.14.0 — ALTER SEQUENCE / ALTER VIEW / ALTER
2019            // FUNCTION / ALTER TYPE / ALTER DOMAIN / ALTER
2020            // DATABASE / ALTER USER / ALTER ROLE / ALTER SCHEMA
2021            // / ALTER OWNER / ALTER DEFAULT PRIVILEGES — accept
2022            // as no-op so pg_dump's tail loads.
2023            Token::Ident(s) | Token::QuotedIdent(s)
2024                if matches!(
2025                    s.to_ascii_lowercase().as_str(),
2026                    "sequence"
2027                        | "view"
2028                        | "function"
2029                        | "type"
2030                        | "domain"
2031                        | "database"
2032                        | "role"
2033                        | "schema"
2034                        | "owner"
2035                        | "default"
2036                        | "extension"
2037                        | "materialized"
2038                        | "policy"
2039                        | "publication"
2040                        | "subscription"
2041                ) =>
2042            {
2043                self.consume_until_statement_boundary();
2044                return Ok(Statement::Empty);
2045            }
2046            other => {
2047                return Err(self.err(format!(
2048                    "expected INDEX / TABLE / SEQUENCE / VIEW / FUNCTION / TYPE / OWNER / etc \
2049                     after ALTER, got {other:?}"
2050                )));
2051            }
2052        }
2053        let name = self.expect_ident_like()?;
2054        // REBUILD
2055        self.expect_keyword_ident("rebuild")?;
2056        // Optional: WITH (encoding = <enc>)
2057        let encoding = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("with")) {
2058            self.advance();
2059            if !matches!(self.peek(), Token::LParen) {
2060                return Err(self.err(format!(
2061                    "expected '(' after WITH in ALTER INDEX REBUILD, got {:?}",
2062                    self.peek()
2063                )));
2064            }
2065            self.advance();
2066            self.expect_keyword_ident("encoding")?;
2067            if !matches!(self.peek(), Token::Eq) {
2068                return Err(self.err(format!(
2069                    "expected '=' after encoding in ALTER INDEX REBUILD, got {:?}",
2070                    self.peek()
2071                )));
2072            }
2073            self.advance();
2074            let enc_ident = match self.advance() {
2075                Token::Ident(s) | Token::QuotedIdent(s) => s,
2076                other => {
2077                    return Err(self.err(format!("expected encoding name after =, got {other:?}")));
2078                }
2079            };
2080            let enc = match enc_ident.to_ascii_lowercase().as_str() {
2081                "f32" => VecEncoding::F32,
2082                "sq8" => VecEncoding::Sq8,
2083                "half" => VecEncoding::F16,
2084                other => {
2085                    return Err(self.err(format!(
2086                        "unknown vector encoding {other:?} in ALTER INDEX REBUILD; supported: F32, SQ8, HALF"
2087                    )));
2088                }
2089            };
2090            if !matches!(self.peek(), Token::RParen) {
2091                return Err(self.err(format!(
2092                    "expected ')' after encoding value, got {:?}",
2093                    self.peek()
2094                )));
2095            }
2096            self.advance();
2097            Some(enc)
2098        } else {
2099            None
2100        };
2101        Ok(Statement::AlterIndex(crate::ast::AlterIndexStatement {
2102            name,
2103            target: crate::ast::AlterIndexTarget::Rebuild { encoding },
2104        }))
2105    }
2106
2107    /// v6.7.2 — `ALTER TABLE <name> SET hot_tier_bytes = <n>`. The
2108    /// only `SET` form currently supported; future v6.7.x can add
2109    /// more SET subjects without changing the dispatch shape.
2110    /// v7.13.2 — mailrs round-6 S1: accepts comma-separated
2111    /// subactions. Single-subaction shape stays a 1-element vec.
2112    fn parse_alter_table_after_keyword(&mut self) -> Result<Statement, ParseError> {
2113        let table_name = self.expect_ident_like()?;
2114        let mut targets: Vec<crate::ast::AlterTableTarget> = Vec::new();
2115        loop {
2116            let subaction = self.parse_alter_table_subaction()?;
2117            // ADD COLUMN with inline REFERENCES emits both an
2118            // AddColumn and an AddForeignKey subaction; the
2119            // helper returns 1 or 2 items.
2120            targets.extend(subaction);
2121            if matches!(self.peek(), Token::Comma) {
2122                self.advance();
2123                continue;
2124            }
2125            break;
2126        }
2127        Ok(Statement::AlterTable(crate::ast::AlterTableStatement {
2128            name: table_name,
2129            targets,
2130        }))
2131    }
2132
2133    /// Parse one ALTER TABLE subaction. Returns a Vec because
2134    /// inline `REFERENCES` on `ADD COLUMN` produces both an
2135    /// AddColumn and an AddForeignKey entry (mailrs round-6 S3).
2136    fn parse_alter_table_subaction(
2137        &mut self,
2138    ) -> Result<Vec<crate::ast::AlterTableTarget>, ParseError> {
2139        match self.peek() {
2140            Token::Ident(s) if s.eq_ignore_ascii_case("set") => {
2141                self.advance();
2142                let setting = self.expect_ident_like()?;
2143                if !setting.eq_ignore_ascii_case("hot_tier_bytes") {
2144                    return Err(self.err(alloc::format!(
2145                        "ALTER TABLE SET: unknown setting {setting:?}; supported: hot_tier_bytes"
2146                    )));
2147                }
2148                if !matches!(self.peek(), Token::Eq) {
2149                    return Err(self.err(alloc::format!(
2150                        "expected '=' after hot_tier_bytes, got {:?}",
2151                        self.peek()
2152                    )));
2153                }
2154                self.advance();
2155                let n = self.expect_u64_literal()?;
2156                Ok(alloc::vec![crate::ast::AlterTableTarget::SetHotTierBytes(n)])
2157            }
2158            Token::Ident(s) if s.eq_ignore_ascii_case("add") => {
2159                self.advance();
2160                // v7.14.0 — ADD CONSTRAINT <name> { FOREIGN KEY |
2161                // PRIMARY KEY | UNIQUE | CHECK }. pg_dump emits
2162                // PRIMARY KEY this way; mysqldump emits both.
2163                // Peek-only dispatch (no advance) — `advance()`
2164                // destructively replaces consumed tokens with Eof,
2165                // so saved-pos restore would land on Eofs.
2166                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("constraint"))
2167                {
2168                    // The next-but-one ident is the constraint
2169                    // name; the one after THAT is the kind.
2170                    let kind_pos = self.pos + 2;
2171                    let kind = self.tokens.get(kind_pos).cloned();
2172                    if matches!(&kind, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("foreign"))
2173                    {
2174                        let fk = self.parse_table_level_fk()?;
2175                        return Ok(alloc::vec![
2176                            crate::ast::AlterTableTarget::AddForeignKey(fk)
2177                        ]);
2178                    }
2179                    if matches!(&kind, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("primary"))
2180                    {
2181                        self.advance(); // CONSTRAINT
2182                        let _name = self.expect_ident_like()?;
2183                        self.advance(); // PRIMARY
2184                        self.expect_keyword_ident("key")?;
2185                        let cols = self.parse_paren_ident_list("PRIMARY KEY")?;
2186                        return Ok(alloc::vec![
2187                            crate::ast::AlterTableTarget::AddTableConstraint(
2188                                crate::ast::TableConstraint::PrimaryKey {
2189                                    name: None,
2190                                    columns: cols,
2191                                }
2192                            )
2193                        ]);
2194                    }
2195                    if matches!(&kind, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("unique"))
2196                    {
2197                        self.advance(); // CONSTRAINT
2198                        let _name = self.expect_ident_like()?;
2199                        self.advance(); // UNIQUE
2200                        let cols = self.parse_paren_ident_list("UNIQUE")?;
2201                        return Ok(alloc::vec![
2202                            crate::ast::AlterTableTarget::AddTableConstraint(
2203                                crate::ast::TableConstraint::Unique {
2204                                    name: None,
2205                                    columns: cols,
2206                                    nulls_not_distinct: false,
2207                                }
2208                            )
2209                        ]);
2210                    }
2211                    if matches!(&kind, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("check"))
2212                    {
2213                        self.advance(); // CONSTRAINT
2214                        let _name = self.expect_ident_like()?;
2215                        self.advance(); // CHECK
2216                        if !matches!(self.peek(), Token::LParen) {
2217                            return Err(self.err(alloc::format!(
2218                                "expected '(' after CHECK, got {:?}", self.peek()
2219                            )));
2220                        }
2221                        self.advance();
2222                        let expr = self.parse_expr(0)?;
2223                        if matches!(self.peek(), Token::RParen) {
2224                            self.advance();
2225                        }
2226                        return Ok(alloc::vec![
2227                            crate::ast::AlterTableTarget::AddTableConstraint(
2228                                crate::ast::TableConstraint::Check { name: None, expr }
2229                            )
2230                        ]);
2231                    }
2232                    // Unknown kind — fall through to FK path which
2233                    // produces a descriptive parse error.
2234                }
2235                let is_fk = matches!(
2236                    self.peek(),
2237                    Token::Ident(s) if s.eq_ignore_ascii_case("constraint")
2238                        || s.eq_ignore_ascii_case("foreign")
2239                );
2240                if is_fk {
2241                    let fk = self.parse_table_level_fk()?;
2242                    return Ok(alloc::vec![crate::ast::AlterTableTarget::AddForeignKey(fk)]);
2243                }
2244                // v7.14.0 — bare ADD PRIMARY KEY / UNIQUE / CHECK
2245                // (no CONSTRAINT prefix) — same dispatch.
2246                match self.peek().clone() {
2247                    Token::Ident(s) if s.eq_ignore_ascii_case("primary") => {
2248                        self.advance();
2249                        self.expect_keyword_ident("key")?;
2250                        let cols = self.parse_paren_ident_list("PRIMARY KEY")?;
2251                        return Ok(alloc::vec![
2252                            crate::ast::AlterTableTarget::AddTableConstraint(
2253                                crate::ast::TableConstraint::PrimaryKey {
2254                                    name: None,
2255                                    columns: cols,
2256                                }
2257                            )
2258                        ]);
2259                    }
2260                    Token::Ident(s) if s.eq_ignore_ascii_case("unique") => {
2261                        self.advance();
2262                        let cols = self.parse_paren_ident_list("UNIQUE")?;
2263                        return Ok(alloc::vec![
2264                            crate::ast::AlterTableTarget::AddTableConstraint(
2265                                crate::ast::TableConstraint::Unique {
2266                                    name: None,
2267                                    columns: cols,
2268                                    nulls_not_distinct: false,
2269                                }
2270                            )
2271                        ]);
2272                    }
2273                    _ => {}
2274                }
2275                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("column")) {
2276                    self.advance();
2277                }
2278                let mut if_not_exists = false;
2279                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("if")) {
2280                    self.advance();
2281                    if !matches!(self.peek(), Token::Not) {
2282                        return Err(self.err(alloc::format!(
2283                            "expected NOT after IF in ALTER TABLE ADD COLUMN, got {:?}",
2284                            self.peek()
2285                        )));
2286                    }
2287                    self.advance();
2288                    if !matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("exists")) {
2289                        return Err(self.err(alloc::format!(
2290                            "expected EXISTS after IF NOT in ALTER TABLE ADD COLUMN, got {:?}",
2291                            self.peek()
2292                        )));
2293                    }
2294                    self.advance();
2295                    if_not_exists = true;
2296                }
2297                // v7.13.2 — mailrs round-6 S3: `ADD COLUMN col TYPE
2298                // REFERENCES other(col) [ON DELETE …]`. parse_column_def
2299                // returns ColumnDef + an optional inline FK.
2300                let (column, col_level_fk) = self.parse_column_def_with_fk()?;
2301                let col_name = column.name.clone();
2302                let mut out = alloc::vec![crate::ast::AlterTableTarget::AddColumn {
2303                    column,
2304                    if_not_exists,
2305                }];
2306                if let Some(mut fk) = col_level_fk {
2307                    if fk.columns.is_empty() {
2308                        fk.columns.push(col_name);
2309                    }
2310                    out.push(crate::ast::AlterTableTarget::AddForeignKey(fk));
2311                }
2312                Ok(out)
2313            }
2314            Token::Drop => {
2315                self.advance();
2316                // v7.13.3 — dispatch on the next token. mailrs round-7
2317                // S8 closed DROP COLUMN; round-6 S7 closed
2318                // DROP CONSTRAINT. Both share IF EXISTS / CASCADE /
2319                // RESTRICT modifiers.
2320                //   DROP CONSTRAINT [IF EXISTS] <name> [CASCADE|RESTRICT]
2321                //   DROP [COLUMN] [IF EXISTS] <col> [CASCADE|RESTRICT]
2322                let subject = match self.peek() {
2323                    Token::Ident(s) if s.eq_ignore_ascii_case("constraint") => {
2324                        self.advance();
2325                        "constraint"
2326                    }
2327                    Token::Ident(s) if s.eq_ignore_ascii_case("column") => {
2328                        self.advance();
2329                        "column"
2330                    }
2331                    // PG-canonical bare `DROP <col>` without COLUMN
2332                    // keyword is also valid; treat any other ident
2333                    // as the column name.
2334                    Token::Ident(_) | Token::QuotedIdent(_) => "column",
2335                    other => {
2336                        return Err(self.err(alloc::format!(
2337                            "expected COLUMN / CONSTRAINT after DROP in ALTER TABLE, got {other:?}"
2338                        )));
2339                    }
2340                };
2341                let mut if_exists = false;
2342                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("if")) {
2343                    let n1 = self.tokens.get(self.pos + 1);
2344                    if matches!(n1, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("exists")) {
2345                        self.advance();
2346                        self.advance();
2347                        if_exists = true;
2348                    }
2349                }
2350                let name = self.expect_ident_like()?;
2351                let mut cascade = false;
2352                if matches!(
2353                    self.peek(),
2354                    Token::Ident(s) if s.eq_ignore_ascii_case("cascade")
2355                        || s.eq_ignore_ascii_case("restrict")
2356                ) {
2357                    if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("cascade"))
2358                    {
2359                        cascade = true;
2360                    }
2361                    self.advance();
2362                }
2363                if subject == "constraint" {
2364                    Ok(alloc::vec![crate::ast::AlterTableTarget::DropForeignKey {
2365                        name,
2366                        if_exists,
2367                    }])
2368                } else {
2369                    Ok(alloc::vec![crate::ast::AlterTableTarget::DropColumn {
2370                        column: name,
2371                        if_exists,
2372                        cascade,
2373                    }])
2374                }
2375            }
2376            Token::Ident(s) if s.eq_ignore_ascii_case("alter") => {
2377                self.advance();
2378                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("column")) {
2379                    self.advance();
2380                }
2381                let col_name = self.expect_ident_like()?;
2382                match self.peek() {
2383                    Token::Ident(s) if s.eq_ignore_ascii_case("type") => {
2384                        self.advance();
2385                    }
2386                    // v7.14.0 — pg_dump emits BIGSERIAL via
2387                    // `ALTER TABLE … ALTER COLUMN id SET DEFAULT
2388                    // nextval('seq')` (the sequence is created
2389                    // separately). SPG's BIGSERIAL already uses
2390                    // AUTO_INCREMENT; accept SET DEFAULT / DROP
2391                    // DEFAULT / SET NOT NULL / DROP NOT NULL as
2392                    // engine no-ops by consuming the tail.
2393                    Token::Ident(s) if s.eq_ignore_ascii_case("set") => {
2394                        // ALTER COLUMN col SET DEFAULT … / SET NOT
2395                        // NULL — accept as a no-op on SPG (BIGSERIAL
2396                        // already auto-increments; nullability change
2397                        // would need row scan — deferred).
2398                        self.consume_until_statement_boundary();
2399                        return Ok(Vec::new());
2400                    }
2401                    Token::Ident(s) if s.eq_ignore_ascii_case("drop") => {
2402                        // ALTER COLUMN col DROP DEFAULT / DROP NOT NULL.
2403                        self.consume_until_statement_boundary();
2404                        return Ok(Vec::new());
2405                    }
2406                    other => {
2407                        return Err(self.err(alloc::format!(
2408                            "expected TYPE / SET / DROP after ALTER COLUMN <name>, got {other:?}"
2409                        )));
2410                    }
2411                }
2412                let new_type = self.parse_column_type_name()?;
2413                let using = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("using"))
2414                {
2415                    self.advance();
2416                    Some(self.parse_expr(0)?)
2417                } else {
2418                    None
2419                };
2420                Ok(alloc::vec![crate::ast::AlterTableTarget::AlterColumnType {
2421                    column: col_name,
2422                    new_type,
2423                    using,
2424                }])
2425            }
2426            // v7.15.0 — `ALTER TABLE t RENAME [COLUMN] old TO new`.
2427            // PG also supports `RENAME TO new_table` for table-name
2428            // rename; that surface is deferred (pg_dump never emits
2429            // it). If the first post-RENAME ident is `TO`, the user
2430            // is asking for table rename — error with a clear
2431            // message rather than misparsing `TO` as a column name.
2432            Token::Ident(s) if s.eq_ignore_ascii_case("rename") => {
2433                self.advance();
2434                // `TO` is a reserved keyword token (Token::To), not
2435                // Token::Ident("to"); detect both shapes so the
2436                // table-rename surface (RENAME TO) produces a clear
2437                // error rather than misparsing.
2438                if matches!(self.peek(), Token::To)
2439                    || matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("to"))
2440                {
2441                    return Err(self.err(alloc::format!(
2442                        "ALTER TABLE RENAME TO <new_name> (table rename) is not supported; \
2443                         use RENAME COLUMN <old> TO <new> instead"
2444                    )));
2445                }
2446                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("column")) {
2447                    self.advance();
2448                }
2449                let old = self.expect_ident_like()?;
2450                // `TO` is a reserved keyword token; accept both
2451                // Token::To and Token::Ident("to") for consistency.
2452                if matches!(self.peek(), Token::To) {
2453                    self.advance();
2454                } else {
2455                    self.expect_keyword_ident("to")?;
2456                }
2457                let new = self.expect_ident_like()?;
2458                Ok(alloc::vec![crate::ast::AlterTableTarget::RenameColumn {
2459                    old,
2460                    new,
2461                }])
2462            }
2463            other => Err(self.err(alloc::format!(
2464                "expected SET / ADD / DROP / ALTER / RENAME in ALTER TABLE, got {other:?}"
2465            ))),
2466        }
2467    }
2468
2469    /// Consume a bare ident if its lowercase matches `kw`, else err.
2470    fn expect_keyword_ident(&mut self, kw: &str) -> Result<(), ParseError> {
2471        match self.advance() {
2472            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case(kw) => Ok(()),
2473            other => Err(ParseError {
2474                message: format!("expected {kw:?}, got {other:?}"),
2475                token_pos: self.pos.saturating_sub(1),
2476            }),
2477        }
2478    }
2479
2480    /// Accept either a quoted identifier (`"foo"`) or a quoted string
2481    /// literal (`'foo'`) — same shape used by CREATE USER for the
2482    /// username slot.
2483    fn expect_ident_or_string(&mut self) -> Result<String, ParseError> {
2484        match self.advance() {
2485            Token::Ident(s) | Token::QuotedIdent(s) | Token::String(s) => Ok(s),
2486            other => Err(ParseError {
2487                message: format!("expected identifier or string, got {other:?}"),
2488                token_pos: self.pos.saturating_sub(1),
2489            }),
2490        }
2491    }
2492
2493    fn expect_string_literal(&mut self) -> Result<String, ParseError> {
2494        match self.advance() {
2495            Token::String(s) => Ok(s),
2496            other => Err(ParseError {
2497                message: format!("expected quoted string, got {other:?}"),
2498                token_pos: self.pos.saturating_sub(1),
2499            }),
2500        }
2501    }
2502
2503    fn parse_select_stmt(&mut self) -> Result<Statement, ParseError> {
2504        // Caller dispatches on Token::Select; the inner helper handles
2505        // the rest. ORDER BY / LIMIT bind at this top level; UNION peers
2506        // get a fresh bare-select parse and may not have their own ORDER
2507        // BY / LIMIT.
2508        let mut head = self.parse_bare_select()?;
2509        while matches!(self.peek(), Token::Union) {
2510            self.advance();
2511            let kind = if matches!(self.peek(), Token::All) {
2512                self.advance();
2513                UnionKind::All
2514            } else {
2515                UnionKind::Distinct
2516            };
2517            let peer = self.parse_bare_select()?;
2518            head.unions.push((kind, peer));
2519        }
2520        head.order_by = if matches!(self.peek(), Token::Order) {
2521            self.advance();
2522            if !matches!(self.peek(), Token::By) {
2523                return Err(self.err(format!("expected BY after ORDER, got {:?}", self.peek())));
2524            }
2525            self.advance();
2526            // v6.4.0 — multi-key ORDER BY. Loop over comma-separated
2527            // `<expr> [ASC|DESC]` items.
2528            let mut keys = Vec::new();
2529            loop {
2530                let expr = self.parse_expr(0)?;
2531                let desc = if matches!(self.peek(), Token::Desc) {
2532                    self.advance();
2533                    true
2534                } else if matches!(self.peek(), Token::Asc) {
2535                    self.advance();
2536                    false
2537                } else {
2538                    false
2539                };
2540                keys.push(OrderBy { expr, desc });
2541                if matches!(self.peek(), Token::Comma) {
2542                    self.advance();
2543                } else {
2544                    break;
2545                }
2546            }
2547            keys
2548        } else {
2549            Vec::new()
2550        };
2551        head.limit = if matches!(self.peek(), Token::Limit) {
2552            self.advance();
2553            Some(self.parse_limit_expr("LIMIT")?)
2554        } else {
2555            None
2556        };
2557        head.offset = if matches!(self.peek(), Token::Offset) {
2558            self.advance();
2559            Some(self.parse_limit_expr("OFFSET")?)
2560        } else {
2561            None
2562        };
2563        Ok(Statement::Select(head))
2564    }
2565
2566    /// v7.9.24 — accept `LIMIT <int>` or `LIMIT $N`. mailrs H2.
2567    /// Bind value gets resolved during prepared-statement Execute;
2568    /// the Pratt expression parser would over-accept here (e.g.
2569    /// `LIMIT 5 + 5`), so we narrowly accept only the two PG forms.
2570    fn parse_limit_expr(&mut self, label: &str) -> Result<crate::ast::LimitExpr, ParseError> {
2571        match self.advance() {
2572            Token::Integer(n) if n >= 0 => u32::try_from(n)
2573                .map(crate::ast::LimitExpr::Literal)
2574                .map_err(|_| ParseError {
2575                    message: alloc::format!("{label} value too large: {n}"),
2576                    token_pos: self.pos.saturating_sub(1),
2577                }),
2578            Token::Placeholder(n) => Ok(crate::ast::LimitExpr::Placeholder(n)),
2579            other => Err(ParseError {
2580                message: alloc::format!(
2581                    "expected non-negative integer or $N placeholder after {label}, got {other:?}"
2582                ),
2583                token_pos: self.pos.saturating_sub(1),
2584            }),
2585        }
2586    }
2587
2588    /// Parse one SELECT block without ORDER BY / LIMIT / UNION chaining —
2589    /// just `[DISTINCT] items [FROM] [WHERE] [GROUP BY]`. Returned with
2590    /// `unions` empty and `order_by` / `limit` `None`; the top-level
2591    /// `parse_select_stmt` is responsible for filling those in.
2592    fn parse_bare_select(&mut self) -> Result<SelectStatement, ParseError> {
2593        if !matches!(self.peek(), Token::Select) {
2594            return Err(self.err(format!(
2595                "expected SELECT to start a query block, got {:?}",
2596                self.peek()
2597            )));
2598        }
2599        self.advance();
2600        let distinct = if matches!(self.peek(), Token::Distinct) {
2601            self.advance();
2602            true
2603        } else {
2604            false
2605        };
2606        let items = self.parse_select_list()?;
2607        let from = if matches!(self.peek(), Token::From) {
2608            self.advance();
2609            Some(self.parse_from_clause()?)
2610        } else {
2611            None
2612        };
2613        let where_ = if matches!(self.peek(), Token::Where) {
2614            self.advance();
2615            Some(self.parse_expr(0)?)
2616        } else {
2617            None
2618        };
2619        let mut group_by_all = false;
2620        let group_by = if matches!(self.peek(), Token::Group) {
2621            self.advance();
2622            if !matches!(self.peek(), Token::By) {
2623                return Err(self.err(format!("expected BY after GROUP, got {:?}", self.peek())));
2624            }
2625            self.advance();
2626            // v6.4.1 — `GROUP BY ALL` shortcut. Planner expands to
2627            // every non-aggregate SELECT-list item later.
2628            if matches!(self.peek(), Token::All) {
2629                self.advance();
2630                group_by_all = true;
2631                None
2632            } else {
2633                let mut groups = Vec::new();
2634                loop {
2635                    groups.push(self.parse_expr(0)?);
2636                    if matches!(self.peek(), Token::Comma) {
2637                        self.advance();
2638                    } else {
2639                        break;
2640                    }
2641                }
2642                Some(groups)
2643            }
2644        } else {
2645            None
2646        };
2647        let having = if matches!(self.peek(), Token::Having) {
2648            self.advance();
2649            Some(self.parse_expr(0)?)
2650        } else {
2651            None
2652        };
2653        Ok(SelectStatement {
2654            ctes: Vec::new(),
2655            distinct,
2656            items,
2657            from,
2658            where_,
2659            group_by,
2660            group_by_all,
2661            having,
2662            unions: Vec::new(),
2663            order_by: Vec::new(),
2664            limit: None,
2665            offset: None,
2666        })
2667    }
2668
2669    fn parse_create_table_stmt_after_create(&mut self) -> Result<Statement, ParseError> {
2670        // Caller already consumed CREATE; we're sitting on TABLE.
2671        debug_assert!(matches!(self.peek(), Token::Table));
2672        self.advance();
2673        let if_not_exists = self.consume_if_not_exists();
2674        let name = self.expect_ident_like()?;
2675        if !matches!(self.peek(), Token::LParen) {
2676            return Err(self.err(format!(
2677                "expected '(' after table name, got {:?}",
2678                self.peek()
2679            )));
2680        }
2681        self.advance();
2682        let mut columns = Vec::new();
2683        let mut foreign_keys: Vec<ForeignKeyConstraint> = Vec::new();
2684        let mut table_constraints: Vec<crate::ast::TableConstraint> = Vec::new();
2685        loop {
2686            // v7.6.0 / v7.9.18 — distinguish table-level constraint
2687            // clauses from column definitions. Constraints start
2688            // with `CONSTRAINT <name> …`, `FOREIGN KEY (…)`,
2689            // `PRIMARY KEY (…)`, or `UNIQUE (…)`. Anything else is
2690            // a column.
2691            if self.peek_table_level_pk_start() {
2692                table_constraints.push(self.parse_table_level_primary_key()?);
2693            } else if self.peek_table_level_unique_start() {
2694                table_constraints.push(self.parse_table_level_unique()?);
2695            } else if self.peek_table_level_check_start() {
2696                // v7.13.0 — table-level CHECK (mailrs round-5 G3).
2697                table_constraints.push(self.parse_table_level_check()?);
2698            } else if self.peek_mysql_inline_key_start() {
2699                // v7.14.0 — mysqldump emits inline `KEY name (cols)`,
2700                // `INDEX name (cols)`, `UNIQUE KEY name (cols)`,
2701                // `FULLTEXT KEY name (cols)`, `SPATIAL KEY name (cols)`
2702                // inside the column list. Skip name + paren list;
2703                // for UNIQUE KEY, register as a UC.
2704                if let Some(uc) = self.parse_mysql_inline_key()? {
2705                    table_constraints.push(uc);
2706                }
2707            } else if self.peek_constraint_or_fk_start() {
2708                foreign_keys.push(self.parse_table_level_fk()?);
2709            } else {
2710                let (col, col_level_fk) = self.parse_column_def_with_fk()?;
2711                // v7.13.0 — fold inline UNIQUE / CHECK column
2712                // constraints into table-level entries so the
2713                // engine path stays uniform.
2714                if col.is_unique {
2715                    table_constraints.push(crate::ast::TableConstraint::Unique {
2716                        name: None,
2717                        columns: alloc::vec![col.name.clone()],
2718                        nulls_not_distinct: false,
2719                    });
2720                }
2721                if let Some(check_expr) = col.check.clone() {
2722                    table_constraints.push(crate::ast::TableConstraint::Check {
2723                        name: None,
2724                        expr: check_expr,
2725                    });
2726                }
2727                columns.push(col);
2728                if let Some(fk) = col_level_fk {
2729                    foreign_keys.push(fk);
2730                }
2731            }
2732            match self.peek() {
2733                Token::Comma => {
2734                    self.advance();
2735                }
2736                Token::RParen => {
2737                    self.advance();
2738                    break;
2739                }
2740                other => {
2741                    return Err(
2742                        self.err(format!("expected ',' or ')' in column list, got {other:?}"))
2743                    );
2744                }
2745            }
2746        }
2747        if columns.is_empty() {
2748            return Err(self.err("CREATE TABLE requires at least one column".into()));
2749        }
2750        // v7.14.0 — consume MySQL/MariaDB table options after the
2751        // closing `)`. mysqldump emits things like
2752        // `ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
2753        // AUTO_INCREMENT=42 ROW_FORMAT=DYNAMIC COMMENT='blog posts'`.
2754        // SPG accepts all forms as no-ops (each option is
2755        // `<ident> [=] <ident-or-string>` separated by whitespace).
2756        self.consume_mysql_table_options();
2757        Ok(Statement::CreateTable(CreateTableStatement {
2758            name,
2759            columns,
2760            if_not_exists,
2761            foreign_keys,
2762            table_constraints,
2763        }))
2764    }
2765
2766    /// v7.14.0 — true when the next tokens look like an inline
2767    /// MySQL index declaration: KEY / INDEX / UNIQUE KEY /
2768    /// UNIQUE INDEX / FULLTEXT [KEY|INDEX] / SPATIAL [KEY|INDEX]
2769    /// — each followed by an optional name + `(...)`. Critical:
2770    /// a column NAMED `key` / `index` (PG accepts as ident) must
2771    /// NOT be mistaken for the KEY constraint shape. We disambig
2772    /// by requiring the keyword to be followed by either `(` or
2773    /// `<ident> (`.
2774    fn peek_mysql_inline_key_start(&self) -> bool {
2775        let cur = self.peek();
2776        // Shapes:
2777        //   KEY (cols)
2778        //   KEY name (cols)
2779        //   INDEX (cols)
2780        //   INDEX name (cols)
2781        //   UNIQUE KEY [name] (cols)
2782        //   UNIQUE INDEX [name] (cols)
2783        //   FULLTEXT [KEY|INDEX] [name] (cols)
2784        //   SPATIAL [KEY|INDEX] [name] (cols)
2785        let after_keyword_followed_by_paren_or_ident_paren = |skip: usize| -> bool {
2786            // tokens at skip = the position AFTER the index-form
2787            // keywords (KEY/INDEX) have been consumed.
2788            match self.tokens.get(skip) {
2789                Some(Token::LParen) => true,
2790                Some(Token::Ident(_) | Token::QuotedIdent(_)) => {
2791                    matches!(self.tokens.get(skip + 1), Some(Token::LParen))
2792                }
2793                _ => false,
2794            }
2795        };
2796        // `INDEX` lexes as Token::Index (reserved), not as
2797        // Token::Ident("index"). Both shapes count as a KEY/INDEX
2798        // start; the peek helper below handles either.
2799        let is_key_or_index_tok = |t: &Token| -> bool {
2800            matches!(t, Token::Index)
2801                || matches!(t, Token::Ident(s) if s.eq_ignore_ascii_case("key") || s.eq_ignore_ascii_case("index"))
2802        };
2803        match cur {
2804            Token::Index => after_keyword_followed_by_paren_or_ident_paren(self.pos + 1),
2805            Token::Ident(s)
2806                if s.eq_ignore_ascii_case("key") || s.eq_ignore_ascii_case("index") =>
2807            {
2808                after_keyword_followed_by_paren_or_ident_paren(self.pos + 1)
2809            }
2810            Token::Ident(s)
2811                if s.eq_ignore_ascii_case("fulltext") || s.eq_ignore_ascii_case("spatial") =>
2812            {
2813                let nxt = self.tokens.get(self.pos + 1);
2814                let after_after = if nxt.is_some_and(is_key_or_index_tok) {
2815                    self.pos + 2
2816                } else {
2817                    self.pos + 1
2818                };
2819                after_keyword_followed_by_paren_or_ident_paren(after_after)
2820            }
2821            Token::Ident(s) if s.eq_ignore_ascii_case("unique") => {
2822                let nxt = self.tokens.get(self.pos + 1);
2823                if !nxt.is_some_and(is_key_or_index_tok) {
2824                    return false;
2825                }
2826                after_keyword_followed_by_paren_or_ident_paren(self.pos + 2)
2827            }
2828            _ => false,
2829        }
2830    }
2831
2832    /// v7.14.0 — parse the MySQL inline KEY/INDEX form. Returns
2833    /// Some(TableConstraint::Unique) for UNIQUE KEY (so SPG
2834    /// enforces uniqueness on INSERT). v7.15.0: plain KEY/INDEX
2835    /// returns Some(TableConstraint::Index) so the engine builds
2836    /// a real BTree index on the leading column (mysqldump
2837    /// `KEY idx_posts_author (author_id)` shape).
2838    /// FULLTEXT / SPATIAL still return None — accepted-as-no-op
2839    /// (the storage layer has no matching AM).
2840    fn parse_mysql_inline_key(
2841        &mut self,
2842    ) -> Result<Option<crate::ast::TableConstraint>, ParseError> {
2843        // Detect UNIQUE prefix.
2844        let is_unique = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("unique"))
2845        {
2846            self.advance();
2847            true
2848        } else {
2849            false
2850        };
2851        // Consume FULLTEXT / SPATIAL prefix and record it. SPG
2852        // has no native FULLTEXT / SPATIAL AM, so we still
2853        // accept-as-no-op for those (return None below); plain
2854        // KEY/INDEX builds a real BTree.
2855        let is_fulltext_or_spatial = if matches!(
2856            self.peek(),
2857            Token::Ident(s) if s.eq_ignore_ascii_case("fulltext") || s.eq_ignore_ascii_case("spatial")
2858        ) {
2859            self.advance();
2860            true
2861        } else {
2862            false
2863        };
2864        // KEY / INDEX keyword. `INDEX` lexes as Token::Index
2865        // (reserved); accept either token shape.
2866        match self.peek() {
2867            Token::Index => {
2868                self.advance();
2869            }
2870            Token::Ident(s) if s.eq_ignore_ascii_case("key") || s.eq_ignore_ascii_case("index") => {
2871                self.advance();
2872            }
2873            other => {
2874                return Err(self.err(alloc::format!(
2875                    "expected KEY/INDEX in inline index declaration, got {other:?}"
2876                )));
2877            }
2878        }
2879        // Optional index name (an ident before the `(`).
2880        // v7.15.0 — capture the name when present so the engine
2881        // builds the secondary index under the user's chosen
2882        // name (matches mysqldump's `KEY idx_x (col)` shape).
2883        let mut idx_name: Option<String> = None;
2884        if matches!(self.peek(), Token::Ident(_) | Token::QuotedIdent(_))
2885            && matches!(self.tokens.get(self.pos + 1), Some(Token::LParen))
2886        {
2887            if let Token::Ident(s) | Token::QuotedIdent(s) = self.advance() {
2888                idx_name = Some(s);
2889            }
2890        }
2891        // Optional `USING BTREE` / `USING HASH` (MySQL).
2892        if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("using")) {
2893            self.advance();
2894            if matches!(self.peek(), Token::Ident(_) | Token::QuotedIdent(_)) {
2895                self.advance();
2896            }
2897        }
2898        // Required column list `(col [, col]*)`.
2899        if !matches!(self.peek(), Token::LParen) {
2900            return Err(self.err(alloc::format!(
2901                "expected '(' in inline KEY/INDEX, got {:?}",
2902                self.peek()
2903            )));
2904        }
2905        self.advance();
2906        let mut cols: Vec<String> = Vec::new();
2907        loop {
2908            match self.peek().clone() {
2909                Token::Ident(s) | Token::QuotedIdent(s) => {
2910                    self.advance();
2911                    cols.push(s);
2912                }
2913                _ => break,
2914            }
2915            // Skip optional `(length)` per-column prefix.
2916            if matches!(self.peek(), Token::LParen) {
2917                let mut depth = 1usize;
2918                self.advance();
2919                while depth > 0 {
2920                    match self.peek() {
2921                        Token::LParen => depth += 1,
2922                        Token::RParen => depth -= 1,
2923                        Token::Eof => break,
2924                        _ => {}
2925                    }
2926                    self.advance();
2927                }
2928            }
2929            // Skip optional ASC / DESC.
2930            if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("asc") || s.eq_ignore_ascii_case("desc"))
2931                || matches!(self.peek(), Token::Asc | Token::Desc)
2932            {
2933                self.advance();
2934            }
2935            if matches!(self.peek(), Token::Comma) {
2936                self.advance();
2937                continue;
2938            }
2939            break;
2940        }
2941        if matches!(self.peek(), Token::RParen) {
2942            self.advance();
2943        }
2944        // Trailing options on the inline index — comment / etc.
2945        // Skip until comma or `)`.
2946        while !matches!(self.peek(), Token::Comma | Token::RParen | Token::Eof) {
2947            self.advance();
2948        }
2949        if cols.is_empty() {
2950            return Ok(None);
2951        }
2952        if is_unique {
2953            // Carry the captured idx_name on UNIQUE too so future
2954            // engine work can name the underlying BTree
2955            // accordingly; today the unique-constraint installer
2956            // synthesises the name itself, but Display round-trip
2957            // benefits from preserving it.
2958            Ok(Some(crate::ast::TableConstraint::Unique {
2959                name: idx_name,
2960                columns: cols,
2961                nulls_not_distinct: false,
2962            }))
2963        } else if is_fulltext_or_spatial {
2964            // SPG has no FULLTEXT / SPATIAL AM. Accept-as-no-op.
2965            Ok(None)
2966        } else {
2967            // v7.15.0 — plain KEY / INDEX builds a real BTree
2968            // secondary index.
2969            Ok(Some(crate::ast::TableConstraint::Index {
2970                name: idx_name,
2971                columns: cols,
2972            }))
2973        }
2974    }
2975
2976    /// v7.14.0 — consume MySQL/MariaDB table-options tail after
2977    /// the closing `)`: ENGINE=..., DEFAULT CHARSET=...,
2978    /// COLLATE=..., AUTO_INCREMENT=N, ROW_FORMAT=..., COMMENT='...'
2979    /// (in any order, separated by whitespace).
2980    fn consume_mysql_table_options(&mut self) {
2981        loop {
2982            // Heuristic: a table option is an ident (or `DEFAULT`
2983            // reserved keyword) followed by `=` and an
2984            // ident / string / integer.
2985            let name_lc = match self.peek().clone() {
2986                Token::Ident(s) | Token::QuotedIdent(s) => s.to_ascii_lowercase(),
2987                Token::Default => alloc::string::String::from("default"),
2988                _ => break,
2989            };
2990            let known = matches!(
2991                name_lc.as_str(),
2992                "engine"
2993                    | "default"
2994                    | "charset"
2995                    | "collate"
2996                    | "auto_increment"
2997                    | "row_format"
2998                    | "comment"
2999                    | "pack_keys"
3000                    | "stats_persistent"
3001                    | "stats_auto_recalc"
3002                    | "stats_sample_pages"
3003                    | "key_block_size"
3004                    | "tablespace"
3005                    | "min_rows"
3006                    | "max_rows"
3007                    | "checksum"
3008                    | "delay_key_write"
3009                    | "insert_method"
3010                    | "data"
3011                    | "index"
3012                    | "encryption"
3013                    | "compression"
3014            );
3015            if !known {
3016                break;
3017            }
3018            self.advance(); // option name
3019            // `DEFAULT` optional prefix is followed by `CHARSET` /
3020            // `COLLATE`; consume the next ident too.
3021            if name_lc == "default" {
3022                if matches!(self.peek(), Token::Ident(_) | Token::QuotedIdent(_)) {
3023                    self.advance();
3024                }
3025            }
3026            if matches!(self.peek(), Token::Eq) {
3027                self.advance();
3028            }
3029            match self.peek() {
3030                Token::Ident(_) | Token::QuotedIdent(_) | Token::String(_) | Token::Integer(_) => {
3031                    self.advance();
3032                }
3033                _ => {}
3034            }
3035        }
3036    }
3037
3038    /// v7.9.18 — true when the next tokens are `PRIMARY KEY (…)`.
3039    /// PRIMARY and KEY are bare idents; we look-ahead 2 to be
3040    /// sure (otherwise a column literally named `primary` would
3041    /// be mistaken).
3042    fn peek_table_level_pk_start(&self) -> bool {
3043        let cur = self.peek();
3044        let nxt = self.tokens.get(self.pos + 1);
3045        let nxt2 = self.tokens.get(self.pos + 2);
3046        let is_primary = matches!(cur, Token::Ident(s) if s.eq_ignore_ascii_case("primary"));
3047        let is_key = matches!(nxt, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("key"));
3048        let is_lparen = matches!(nxt2, Some(Token::LParen));
3049        is_primary && is_key && is_lparen
3050    }
3051
3052    /// v7.9.18 — true when the next tokens are `UNIQUE (…)`.
3053    /// v7.13.0 — also matches `UNIQUE NULLS [NOT] DISTINCT (…)`
3054    /// (mailrs round-5 G10).
3055    fn peek_table_level_unique_start(&self) -> bool {
3056        let cur = self.peek();
3057        let is_unique = matches!(cur, Token::Ident(s) if s.eq_ignore_ascii_case("unique"));
3058        if !is_unique {
3059            return false;
3060        }
3061        let n1 = self.tokens.get(self.pos + 1);
3062        // Plain `UNIQUE (…)`.
3063        if matches!(n1, Some(Token::LParen)) {
3064            return true;
3065        }
3066        // `UNIQUE NULLS [NOT] DISTINCT (…)`.
3067        let is_nulls = matches!(n1, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("nulls"));
3068        if !is_nulls {
3069            return false;
3070        }
3071        let n2 = self.tokens.get(self.pos + 2);
3072        let n3 = self.tokens.get(self.pos + 3);
3073        let n4 = self.tokens.get(self.pos + 4);
3074        // `UNIQUE NULLS DISTINCT (…)` — 4 tokens before `(`.
3075        if matches!(n2, Some(Token::Distinct)) && matches!(n3, Some(Token::LParen)) {
3076            return true;
3077        }
3078        // `UNIQUE NULLS NOT DISTINCT (…)` — 5 tokens before `(`.
3079        if matches!(n2, Some(Token::Not))
3080            && matches!(n3, Some(Token::Distinct))
3081            && matches!(n4, Some(Token::LParen))
3082        {
3083            return true;
3084        }
3085        false
3086    }
3087
3088    fn parse_table_level_primary_key(&mut self) -> Result<crate::ast::TableConstraint, ParseError> {
3089        self.advance(); // PRIMARY
3090        self.advance(); // KEY
3091        let columns = self.parse_paren_ident_list("PRIMARY KEY")?;
3092        Ok(crate::ast::TableConstraint::PrimaryKey {
3093            name: None,
3094            columns,
3095        })
3096    }
3097
3098    fn parse_table_level_unique(&mut self) -> Result<crate::ast::TableConstraint, ParseError> {
3099        self.advance(); // UNIQUE
3100        // v7.13.0 — optional `NULLS NOT DISTINCT` modifier
3101        // (mailrs round-5 G10, PG 15+ surface). Default behaviour
3102        // is `NULLS DISTINCT` per the SQL standard.
3103        let mut nulls_not_distinct = false;
3104        if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("nulls")) {
3105            let n1 = self.tokens.get(self.pos + 1);
3106            let n2 = self.tokens.get(self.pos + 2);
3107            let is_not = matches!(n1, Some(Token::Not));
3108            let is_distinct = matches!(n2, Some(Token::Distinct));
3109            if is_not && is_distinct {
3110                self.advance(); // NULLS
3111                self.advance(); // NOT
3112                self.advance(); // DISTINCT
3113                nulls_not_distinct = true;
3114            } else if matches!(n1, Some(Token::Distinct)) {
3115                self.advance(); // NULLS
3116                self.advance(); // DISTINCT
3117            }
3118        }
3119        let columns = self.parse_paren_ident_list("UNIQUE")?;
3120        Ok(crate::ast::TableConstraint::Unique {
3121            name: None,
3122            columns,
3123            nulls_not_distinct,
3124        })
3125    }
3126
3127    /// v7.13.0 — table-level `CHECK (<expr>)` constraint
3128    /// (mailrs round-5 G3). Consumes `CHECK` then a parenthesised
3129    /// expression.
3130    fn parse_table_level_check(&mut self) -> Result<crate::ast::TableConstraint, ParseError> {
3131        self.advance(); // CHECK
3132        if !matches!(self.peek(), Token::LParen) {
3133            return Err(self.err(alloc::format!(
3134                "expected '(' after CHECK, got {:?}",
3135                self.peek()
3136            )));
3137        }
3138        self.advance();
3139        let expr = self.parse_expr(0)?;
3140        if !matches!(self.peek(), Token::RParen) {
3141            return Err(self.err(alloc::format!(
3142                "expected ')' to close CHECK predicate, got {:?}",
3143                self.peek()
3144            )));
3145        }
3146        self.advance();
3147        Ok(crate::ast::TableConstraint::Check { name: None, expr })
3148    }
3149
3150    /// v7.13.0 — `true` when the next token is `CHECK` (a bare ident).
3151    fn peek_table_level_check_start(&self) -> bool {
3152        matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("check"))
3153    }
3154
3155    fn parse_paren_ident_list(&mut self, ctx: &str) -> Result<Vec<String>, ParseError> {
3156        if !matches!(self.peek(), Token::LParen) {
3157            return Err(self.err(alloc::format!(
3158                "expected '(' after {ctx}, got {:?}",
3159                self.peek()
3160            )));
3161        }
3162        self.advance();
3163        let mut out = Vec::new();
3164        loop {
3165            out.push(self.expect_ident_like()?);
3166            match self.peek() {
3167                Token::Comma => {
3168                    self.advance();
3169                }
3170                Token::RParen => {
3171                    self.advance();
3172                    break;
3173                }
3174                other => {
3175                    return Err(self.err(alloc::format!(
3176                        "expected ',' or ')' in {ctx} list, got {other:?}"
3177                    )));
3178                }
3179            }
3180        }
3181        if out.is_empty() {
3182            return Err(self.err(alloc::format!("{ctx} requires at least one column")));
3183        }
3184        Ok(out)
3185    }
3186
3187    /// v7.6.0 — true when the next tokens are `CONSTRAINT <name>
3188    /// FOREIGN KEY` or bare `FOREIGN KEY`. Both introduce a
3189    /// table-level FK; a column def never starts with either keyword
3190    /// (column names are not in this reserved set).
3191    fn peek_constraint_or_fk_start(&self) -> bool {
3192        let is_constraint_kw = matches!(
3193            self.peek(),
3194            Token::Ident(s) if s.eq_ignore_ascii_case("constraint")
3195        );
3196        let is_foreign_kw = matches!(
3197            self.peek(),
3198            Token::Ident(s) if s.eq_ignore_ascii_case("foreign")
3199        );
3200        is_constraint_kw || is_foreign_kw
3201    }
3202
3203    /// v7.6.0 — parse a table-level FK clause:
3204    /// `[CONSTRAINT <name>] FOREIGN KEY (<col>[,<col>]*) REFERENCES
3205    /// <tbl> [(<pcol>[,<pcol>]*)] [ON DELETE <action>] [ON UPDATE <action>]`.
3206    fn parse_table_level_fk(&mut self) -> Result<ForeignKeyConstraint, ParseError> {
3207        let mut name: Option<String> = None;
3208        if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("constraint")) {
3209            self.advance();
3210            name = Some(self.expect_ident_like()?);
3211        }
3212        // `FOREIGN`
3213        match self.advance() {
3214            Token::Ident(s) if s.eq_ignore_ascii_case("foreign") => {}
3215            other => return Err(self.err(format!("expected FOREIGN, got {other:?}"))),
3216        }
3217        // `KEY`
3218        match self.advance() {
3219            Token::Ident(s) if s.eq_ignore_ascii_case("key") => {}
3220            other => return Err(self.err(format!("expected KEY after FOREIGN, got {other:?}"))),
3221        }
3222        // `(col, col, ...)`
3223        if !matches!(self.peek(), Token::LParen) {
3224            return Err(self.err(format!(
3225                "expected '(' after FOREIGN KEY, got {:?}",
3226                self.peek()
3227            )));
3228        }
3229        self.advance();
3230        let mut columns = Vec::new();
3231        loop {
3232            columns.push(self.expect_ident_like()?);
3233            match self.peek() {
3234                Token::Comma => {
3235                    self.advance();
3236                }
3237                Token::RParen => {
3238                    self.advance();
3239                    break;
3240                }
3241                other => {
3242                    return Err(self.err(format!(
3243                        "expected ',' or ')' in FK column list, got {other:?}"
3244                    )));
3245                }
3246            }
3247        }
3248        if columns.is_empty() {
3249            return Err(self.err("FOREIGN KEY requires at least one column".into()));
3250        }
3251        let (parent_table, parent_columns, on_delete, on_update) =
3252            self.parse_references_tail(columns.len())?;
3253        Ok(ForeignKeyConstraint {
3254            name,
3255            columns,
3256            parent_table,
3257            parent_columns,
3258            on_delete,
3259            on_update,
3260        })
3261    }
3262
3263    /// v7.6.0 — parse the tail `REFERENCES <tbl> [(<pcol>...)] [ON
3264    /// DELETE <action>] [ON UPDATE <action>]`. `expected_arity` is
3265    /// the local column count, used to default the parent column
3266    /// list when omitted (SQL spec: parent's PK is implied).
3267    fn parse_references_tail(
3268        &mut self,
3269        expected_arity: usize,
3270    ) -> Result<(String, Vec<String>, FkAction, FkAction), ParseError> {
3271        match self.advance() {
3272            Token::Ident(s) if s.eq_ignore_ascii_case("references") => {}
3273            other => return Err(self.err(format!("expected REFERENCES, got {other:?}"))),
3274        }
3275        let parent_table = self.expect_ident_like()?;
3276        let mut parent_columns: Vec<String> = Vec::new();
3277        if matches!(self.peek(), Token::LParen) {
3278            self.advance();
3279            loop {
3280                parent_columns.push(self.expect_ident_like()?);
3281                match self.peek() {
3282                    Token::Comma => {
3283                        self.advance();
3284                    }
3285                    Token::RParen => {
3286                        self.advance();
3287                        break;
3288                    }
3289                    other => {
3290                        return Err(self.err(format!(
3291                            "expected ',' or ')' in REFERENCES column list, got {other:?}"
3292                        )));
3293                    }
3294                }
3295            }
3296        }
3297        if !parent_columns.is_empty() && parent_columns.len() != expected_arity {
3298            return Err(self.err(format!(
3299                "FK arity mismatch: {} local column(s) vs {} parent column(s)",
3300                expected_arity,
3301                parent_columns.len()
3302            )));
3303        }
3304        // v7.6.7 — accept and reject `[NOT] DEFERRABLE [INITIALLY
3305        // {DEFERRED | IMMEDIATE}]` so existing PG dumps don't fail
3306        // at parse time. SPG's single-writer model has no deferred
3307        // constraint window, so we surface this as a clean
3308        // unsupported-feature error rather than a syntax error.
3309        loop {
3310            if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("deferrable")) {
3311                return Err(self.err(
3312                    "DEFERRABLE constraints are not supported (SPG is single-writer; \
3313                     constraints are always evaluated immediately at commit)"
3314                        .into(),
3315                ));
3316            }
3317            if matches!(self.peek(), Token::Not) {
3318                let look = self.tokens.get(self.pos + 1);
3319                if matches!(look, Some(Token::Ident(s)) if s.eq_ignore_ascii_case("deferrable")) {
3320                    // NOT DEFERRABLE — accept as the SPG default
3321                    // and consume both tokens silently.
3322                    self.advance();
3323                    self.advance();
3324                    // Optional `INITIALLY IMMEDIATE` clause.
3325                    if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("initially"))
3326                    {
3327                        self.advance();
3328                        match self.advance() {
3329                            Token::Ident(s) if s.eq_ignore_ascii_case("immediate") => {}
3330                            other => {
3331                                return Err(self.err(format!(
3332                                    "expected IMMEDIATE after INITIALLY for NOT DEFERRABLE, \
3333                                     got {other:?}"
3334                                )));
3335                            }
3336                        }
3337                    }
3338                    continue;
3339                }
3340                break;
3341            }
3342            break;
3343        }
3344        // Optional `ON DELETE <action>` and `ON UPDATE <action>` in
3345        // either order, each at most once.
3346        let mut on_delete = FkAction::Restrict;
3347        let mut on_update = FkAction::Restrict;
3348        let mut seen_on_delete = false;
3349        let mut seen_on_update = false;
3350        loop {
3351            if !matches!(self.peek(), Token::On) {
3352                break;
3353            }
3354            self.advance();
3355            let which = self.advance();
3356            let action = self.parse_fk_action()?;
3357            match which {
3358                Token::Ident(ref s) if s.eq_ignore_ascii_case("delete") => {
3359                    if seen_on_delete {
3360                        return Err(self.err("ON DELETE specified twice".into()));
3361                    }
3362                    seen_on_delete = true;
3363                    on_delete = action;
3364                }
3365                Token::Ident(ref s) if s.eq_ignore_ascii_case("update") => {
3366                    if seen_on_update {
3367                        return Err(self.err("ON UPDATE specified twice".into()));
3368                    }
3369                    seen_on_update = true;
3370                    on_update = action;
3371                }
3372                other => {
3373                    return Err(
3374                        self.err(format!("expected DELETE or UPDATE after ON, got {other:?}"))
3375                    );
3376                }
3377            }
3378        }
3379        Ok((parent_table, parent_columns, on_delete, on_update))
3380    }
3381
3382    /// v7.6.0 — parse `CASCADE | RESTRICT | SET NULL | SET DEFAULT |
3383    /// NO ACTION`.
3384    fn parse_fk_action(&mut self) -> Result<FkAction, ParseError> {
3385        match self.advance() {
3386            Token::Ident(s) if s.eq_ignore_ascii_case("cascade") => Ok(FkAction::Cascade),
3387            Token::Ident(s) if s.eq_ignore_ascii_case("restrict") => Ok(FkAction::Restrict),
3388            Token::Ident(s) if s.eq_ignore_ascii_case("set") => match self.advance() {
3389                Token::Null => Ok(FkAction::SetNull),
3390                Token::Default => Ok(FkAction::SetDefault),
3391                other => Err(self.err(format!(
3392                    "expected NULL or DEFAULT after SET in FK action, got {other:?}"
3393                ))),
3394            },
3395            Token::Ident(s) if s.eq_ignore_ascii_case("no") => match self.advance() {
3396                Token::Ident(s) if s.eq_ignore_ascii_case("action") => Ok(FkAction::NoAction),
3397                other => Err(self.err(format!(
3398                    "expected ACTION after NO in FK action, got {other:?}"
3399                ))),
3400            },
3401            other => Err(self.err(format!(
3402                "expected CASCADE | RESTRICT | SET NULL | SET DEFAULT | NO ACTION, got {other:?}"
3403            ))),
3404        }
3405    }
3406
3407    /// Recognise the optional `IF NOT EXISTS` prefix shared by `CREATE
3408    /// TABLE` and `CREATE INDEX`. Returns `true` if consumed.
3409    fn consume_if_not_exists(&mut self) -> bool {
3410        // `IF` arrives as a bare Ident (we don't reserve it because it
3411        // also appears mid-expression in PG, though we don't support
3412        // those forms yet).
3413        let looks_like_if = matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("if"));
3414        if !looks_like_if {
3415            return false;
3416        }
3417        // Peek one ahead before committing: only consume IF when it's
3418        // actually `IF NOT EXISTS`.
3419        if !matches!(self.tokens.get(self.pos + 1), Some(Token::Not)) {
3420            return false;
3421        }
3422        if !matches!(
3423            self.tokens.get(self.pos + 2),
3424            Some(Token::Ident(s)) if s.eq_ignore_ascii_case("exists")
3425        ) {
3426            return false;
3427        }
3428        self.advance(); // IF
3429        self.advance(); // NOT
3430        self.advance(); // EXISTS
3431        true
3432    }
3433
3434    /// v7.12.4 — `IF EXISTS` modifier for DROP statements.
3435    /// Consumes IF EXISTS as a pair; returns false otherwise
3436    /// without consuming any tokens.
3437    fn consume_if_exists(&mut self) -> bool {
3438        let looks_like_if = matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("if"));
3439        if !looks_like_if {
3440            return false;
3441        }
3442        if !matches!(
3443            self.tokens.get(self.pos + 1),
3444            Some(Token::Ident(s)) if s.eq_ignore_ascii_case("exists")
3445        ) {
3446            return false;
3447        }
3448        self.advance(); // IF
3449        self.advance(); // EXISTS
3450        true
3451    }
3452
3453    /// v7.9.14 — consume `ASC | DESC | NULLS FIRST | NULLS LAST`
3454    /// qualifiers after an index column ref. ASC / DESC are
3455    /// reserved tokens; NULLS / FIRST / LAST are bare idents.
3456    /// We accept and discard them since single-column BTree
3457    /// stores rows in natural key order today.
3458    fn consume_optional_index_column_qualifiers(&mut self) {
3459        loop {
3460            match self.peek() {
3461                Token::Asc | Token::Desc => {
3462                    self.advance();
3463                }
3464                Token::Ident(s) if s.eq_ignore_ascii_case("nulls") => {
3465                    let look = self.tokens.get(self.pos + 1);
3466                    if matches!(
3467                        look,
3468                        Some(Token::Ident(k)) if k.eq_ignore_ascii_case("first")
3469                            || k.eq_ignore_ascii_case("last")
3470                    ) {
3471                        self.advance();
3472                        self.advance();
3473                    } else {
3474                        break;
3475                    }
3476                }
3477                _ => break,
3478            }
3479        }
3480    }
3481
3482    fn parse_create_index_stmt_after_create(
3483        &mut self,
3484        is_unique: bool,
3485    ) -> Result<Statement, ParseError> {
3486        // Caller consumed CREATE (and the optional UNIQUE); we're on INDEX.
3487        debug_assert!(matches!(self.peek(), Token::Index));
3488        self.advance();
3489        let if_not_exists = self.consume_if_not_exists();
3490        let name = self.expect_ident_like()?;
3491        if !matches!(self.peek(), Token::On) {
3492            return Err(self.err(format!(
3493                "expected ON after CREATE INDEX <name>, got {:?}",
3494                self.peek()
3495            )));
3496        }
3497        self.advance();
3498        let table = self.expect_ident_like()?;
3499        // Optional `USING <method>` — only recognised method in v2.0 is
3500        // `hnsw` (a single-layer NSW graph for kNN). `USING` is the bare
3501        // ident `using` (we don't promote it to a reserved keyword
3502        // because it isn't reserved anywhere else in our SQL surface).
3503        let method = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("using")) {
3504            self.advance();
3505            let m = self.expect_ident_like()?;
3506            match m.to_ascii_lowercase().as_str() {
3507                "hnsw" => IndexMethod::Hnsw,
3508                "btree" => IndexMethod::BTree,
3509                "brin" => IndexMethod::Brin,
3510                // v7.12.3 — real GIN inverted index over `tsvector`.
3511                // v7.9.26b's `USING gin` → BTree silent fallback is
3512                // gone; the engine validates that the indexed column
3513                // is `tsvector` at CREATE INDEX time.
3514                "gin" => IndexMethod::Gin,
3515                // v7.9.26b — PG `pg_dump` emits `USING gist` /
3516                // `USING spgist` / `USING hash` for their built-in
3517                // AMs that SPG doesn't have a matching
3518                // implementation for; degrade to BTree on the
3519                // leading column so the schema loads + the index
3520                // catalogue stays consistent. Operator pays the
3521                // planner cost only for the queries that would have
3522                // used the specialised AM.
3523                "gist" | "spgist" | "hash" => IndexMethod::BTree,
3524                // v7.11.3 — pgvector ships both `ivfflat` and
3525                // `hnsw`. Customers shouldn't have to choose
3526                // their on-disk index method based on what SPG
3527                // implements; accept `ivfflat` as a synonym for
3528                // `hnsw` so PG schemas using either method drop
3529                // in. The vector distance op (`<->` / `<#>` /
3530                // `<=>`) at query time still picks the metric.
3531                "ivfflat" => IndexMethod::Hnsw,
3532                other => {
3533                    return Err(self.err(alloc::format!(
3534                        "unknown index method {other:?}; supported: hnsw, btree, brin, gin (gist/spgist/hash accepted as BTree fallback)"
3535                    )));
3536                }
3537            }
3538        } else {
3539            IndexMethod::BTree
3540        };
3541        if !matches!(self.peek(), Token::LParen) {
3542            return Err(self.err(format!(
3543                "expected '(' before indexed column, got {:?}",
3544                self.peek()
3545            )));
3546        }
3547        self.advance();
3548        // v6.8.2 — accept either a bare column ident (legacy) or
3549        // an expression `fn(col, …)` for expression indexes.
3550        // Distinguish by peeking the token *after* the current
3551        // ident: `ident )` is the legacy column-only path;
3552        // anything else triggers the Pratt expression parser.
3553        // (`advance()` uses `mem::replace` to nil out the current
3554        // slot, so we can't save+rewind cleanly — peek-ahead via
3555        // direct index avoids the mutation.)
3556        let mut opclass: Option<String> = None;
3557        let (column, expression): (String, Option<Expr>) = match self.peek().clone() {
3558            // Single column with `)` immediately after — fast path.
3559            // v7.9.29 — also: bare column followed by `,` (the
3560            // multi-column form `(a, b, c)`). Without this branch
3561            // the leading ident gets pulled into `parse_expr`
3562            // which then sets `expression = Some(Column(a))` and
3563            // breaks Display round-trip on the multi-column shape.
3564            Token::Ident(s) | Token::QuotedIdent(s)
3565                if matches!(
3566                    self.tokens.get(self.pos + 1),
3567                    Some(Token::RParen | Token::Comma)
3568                ) =>
3569            {
3570                self.advance();
3571                (s, None)
3572            }
3573            // v7.9.22 — single column followed by a pgvector
3574            // opclass ident: `(col vector_cosine_ops)`. mailrs G5.
3575            // v7.15.0 — capture the opclass instead of discarding
3576            // it so the engine can dispatch (e.g. `gin_trgm_ops`
3577            // → real trigram-shingle GIN over a TEXT column).
3578            // Vector/HNSW opclasses still take their distance
3579            // metric from the query operator (`<->` / `<#>` /
3580            // `<=>`), so for those callers the opclass stays
3581            // informational.
3582            Token::Ident(s) | Token::QuotedIdent(s)
3583                if matches!(
3584                    self.tokens.get(self.pos + 1),
3585                    Some(Token::Ident(op) | Token::QuotedIdent(op))
3586                        if is_vector_opclass_name(op)
3587                ) =>
3588            {
3589                self.advance(); // column name
3590                // Capture the opclass token, lower-cased for
3591                // case-insensitive engine dispatch.
3592                let op_tok = self.advance();
3593                if let Token::Ident(op) | Token::QuotedIdent(op) = op_tok {
3594                    opclass = Some(op.to_ascii_lowercase());
3595                }
3596                (s, None)
3597            }
3598            Token::Ident(_) | Token::QuotedIdent(_) => {
3599                let key_expr = self.parse_expr(0)?;
3600                let primary = extract_first_column(&key_expr).ok_or_else(|| {
3601                    self.err("expression index key must reference at least one column".into())
3602                })?;
3603                (primary, Some(key_expr))
3604            }
3605            other => {
3606                return Err(self.err(format!(
3607                    "expected column ident or expression, got {other:?}"
3608                )));
3609            }
3610        };
3611        // v7.9.14 — accept extra comma-separated columns inside
3612        // the index key parens (`CREATE INDEX … (a, b, c)`).
3613        // mailrs F2. Each extra column may carry an optional
3614        // `ASC` / `DESC` / `NULLS FIRST` / `NULLS LAST` clause
3615        // — parsed and discarded; SPG doesn't honour direction
3616        // on a BTree index today (column ordering is intrinsic
3617        // to the storage). v7.10 will widen to genuine composite
3618        // index keys.
3619        let mut extra_columns: Vec<String> = Vec::new();
3620        // The leading column may also have ASC/DESC after it.
3621        self.consume_optional_index_column_qualifiers();
3622        while matches!(self.peek(), Token::Comma) {
3623            self.advance();
3624            let extra = self.expect_ident_like()?;
3625            self.consume_optional_index_column_qualifiers();
3626            extra_columns.push(extra);
3627        }
3628        if !matches!(self.peek(), Token::RParen) {
3629            return Err(self.err(format!(
3630                "expected ')' after indexed column / expression, got {:?}",
3631                self.peek()
3632            )));
3633        }
3634        self.advance();
3635        // v6.8.0 — optional `INCLUDE (col1, col2, …)` clause for
3636        // index-only-scan annotation. Bare ident (not a reserved
3637        // keyword) so we test by case-insensitive string match.
3638        let included_columns = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("include"))
3639        {
3640            self.advance();
3641            if !matches!(self.peek(), Token::LParen) {
3642                return Err(self.err(format!("expected '(' after INCLUDE, got {:?}", self.peek())));
3643            }
3644            self.advance();
3645            let mut cols = Vec::new();
3646            loop {
3647                cols.push(self.expect_ident_like()?);
3648                match self.peek() {
3649                    Token::Comma => {
3650                        self.advance();
3651                    }
3652                    Token::RParen => {
3653                        self.advance();
3654                        break;
3655                    }
3656                    other => {
3657                        return Err(self.err(format!(
3658                            "expected ',' or ')' in INCLUDE list, got {other:?}"
3659                        )));
3660                    }
3661                }
3662            }
3663            cols
3664        } else {
3665            Vec::new()
3666        };
3667        // v7.11.3 — accept and discard PG `WITH (k = v, ...)` index
3668        // storage parameters. pgvector emits `WITH (lists = N)` for
3669        // ivfflat and `WITH (m = N, ef_construction = M)` for hnsw;
3670        // SPG's HNSW picks its own parameters today (tunable via
3671        // env vars), so the WITH clause is informational and dropped.
3672        if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("with")) {
3673            self.advance();
3674            if !matches!(self.peek(), Token::LParen) {
3675                return Err(self.err(format!(
3676                    "expected '(' after WITH in CREATE INDEX, got {:?}",
3677                    self.peek()
3678                )));
3679            }
3680            self.advance();
3681            loop {
3682                if matches!(self.peek(), Token::RParen) {
3683                    self.advance();
3684                    break;
3685                }
3686                // Drain `key = value` or bare `key` tokens.
3687                let _ = self.advance(); // key
3688                if matches!(self.peek(), Token::Eq) {
3689                    self.advance();
3690                    let _ = self.advance(); // value (int / string / ident)
3691                }
3692                match self.peek() {
3693                    Token::Comma => {
3694                        self.advance();
3695                    }
3696                    Token::RParen => {
3697                        self.advance();
3698                        break;
3699                    }
3700                    other => {
3701                        return Err(self.err(format!(
3702                            "expected ',' or ')' in WITH (…) clause, got {other:?}"
3703                        )));
3704                    }
3705                }
3706            }
3707        }
3708        // v6.8.1 — optional `WHERE <expr>` partial-index predicate.
3709        let partial_predicate = if matches!(self.peek(), Token::Where) {
3710            self.advance();
3711            Some(self.parse_expr(0)?)
3712        } else {
3713            None
3714        };
3715        // v7.9.29 — UNIQUE on a vector index (HNSW) makes no
3716        // sense: uniqueness over an ANN structure has no clean
3717        // semantics. Reject early. (BRIN UNIQUE is similarly
3718        // meaningless — block both.)
3719        if is_unique && !matches!(method, IndexMethod::BTree) {
3720            return Err(self.err(alloc::format!(
3721                "UNIQUE is only supported on BTree indexes, got USING {:?}",
3722                method
3723            )));
3724        }
3725        Ok(Statement::CreateIndex(CreateIndexStatement {
3726            name,
3727            table,
3728            column,
3729            method,
3730            if_not_exists,
3731            included_columns,
3732            partial_predicate,
3733            extra_columns: extra_columns.clone(),
3734            expression,
3735            is_unique,
3736            opclass,
3737        }))
3738    }
3739
3740    /// v7.6.0 — wraps `parse_column_def` and consumes an optional
3741    /// column-level `REFERENCES ...` clause. The trailing FK is
3742    /// normalised into table-level shape (single-element columns +
3743    /// parent_columns) so the engine sees one uniform constraint list.
3744    fn parse_column_def_with_fk(
3745        &mut self,
3746    ) -> Result<(ColumnDef, Option<ForeignKeyConstraint>), ParseError> {
3747        let col = self.parse_column_def()?;
3748        // Inline form: `col INT REFERENCES tbl(pcol) [ON DELETE ...] [ON UPDATE ...]`.
3749        let inline_references = matches!(
3750            self.peek(),
3751            Token::Ident(s) if s.eq_ignore_ascii_case("references")
3752        );
3753        if !inline_references {
3754            return Ok((col, None));
3755        }
3756        let (parent_table, parent_columns, on_delete, on_update) = self.parse_references_tail(1)?;
3757        let fk = ForeignKeyConstraint {
3758            name: None,
3759            columns: vec![col.name.clone()],
3760            parent_table,
3761            parent_columns,
3762            on_delete,
3763            on_update,
3764        };
3765        Ok((col, Some(fk)))
3766    }
3767
3768    /// v7.13.0 — parse a column type (consuming the type ident and
3769    /// any trailing parameters / `[]`), without surrounding column
3770    /// constraints. Used by ALTER COLUMN TYPE (mailrs round-5 G8).
3771    /// Returns the resolved `ColumnTypeName` plus implied
3772    /// `(auto_increment, not_null)` flags from PG SERIAL family
3773    /// shorthands — callers that don't expect those (ALTER COLUMN
3774    /// TYPE) can discard them.
3775    fn parse_column_type_name(&mut self) -> Result<ColumnTypeName, ParseError> {
3776        let (ty, _, _) = self.parse_type_with_implied_flags()?;
3777        Ok(ty)
3778    }
3779
3780    fn parse_type_with_implied_flags(
3781        &mut self,
3782    ) -> Result<(ColumnTypeName, bool, bool), ParseError> {
3783        let ty_ident = match self.advance() {
3784            Token::Ident(s) => s,
3785            other => {
3786                return Err(ParseError {
3787                    message: format!("expected column type, got {other:?}"),
3788                    token_pos: self.pos.saturating_sub(1),
3789                });
3790            }
3791        };
3792        let mut implied_auto_increment = false;
3793        let mut implied_not_null = false;
3794        let mut ty = match ty_ident.as_str() {
3795            // PG SERIAL family. Implies NOT NULL + AUTO_INCREMENT.
3796            "smallserial" | "serial2" => {
3797                implied_auto_increment = true;
3798                implied_not_null = true;
3799                ColumnTypeName::SmallInt
3800            }
3801            "serial" | "serial4" => {
3802                implied_auto_increment = true;
3803                implied_not_null = true;
3804                ColumnTypeName::Int
3805            }
3806            "bigserial" | "serial8" => {
3807                implied_auto_increment = true;
3808                implied_not_null = true;
3809                ColumnTypeName::BigInt
3810            }
3811            // MySQL flavours we accept by aliasing to the closest SPG
3812            // type. TINYINT covers MySQL's i8 — held inside SMALLINT
3813            // since SPG doesn't have a dedicated i8. MEDIUMINT (MySQL
3814            // 24-bit) → INT. UNSIGNED modifiers are consumed below
3815            // without semantic effect.
3816            "smallint" | "tinyint" => {
3817                // v7.14.0 — MySQL display-width on integers
3818                // (`TINYINT(1)`, `INT(11)`, `BIGINT(20)`). The
3819                // parenthesised number is purely cosmetic — it
3820                // doesn't change storage. Accept + discard.
3821                self.consume_optional_paren_size();
3822                ColumnTypeName::SmallInt
3823            }
3824            "int" | "integer" | "mediumint" => {
3825                self.consume_optional_paren_size();
3826                ColumnTypeName::Int
3827            }
3828            "bigint" => {
3829                self.consume_optional_paren_size();
3830                ColumnTypeName::BigInt
3831            }
3832            // DOUBLE / REAL are 64-bit IEEE — same as our FLOAT.
3833            // v7.13.0 — `DOUBLE PRECISION` (PG canonical spelling)
3834            // (mailrs round-5 G6). Consume the optional `PRECISION`
3835            // tail when the type keyword was `double` / `DOUBLE`.
3836            "float" | "double" | "real" => {
3837                if ty_ident.eq_ignore_ascii_case("double")
3838                    && matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("precision"))
3839                {
3840                    self.advance();
3841                }
3842                ColumnTypeName::Float
3843            }
3844            // v7.13.0 — `FLOAT8` (PG short form) maps the same as FLOAT.
3845            "float4" | "float8" => ColumnTypeName::Float,
3846            "text" => ColumnTypeName::Text,
3847            "bool" | "boolean" => ColumnTypeName::Bool,
3848            "varchar" => ColumnTypeName::Varchar(self.parse_paren_size("VARCHAR")?),
3849            "char" => ColumnTypeName::Char(self.parse_paren_size("CHAR")?),
3850            "vector" => {
3851                let dim = self.parse_paren_size("VECTOR")?;
3852                let encoding = self.parse_optional_vector_encoding()?;
3853                ColumnTypeName::Vector { dim, encoding }
3854            }
3855            "numeric" => {
3856                let (precision, scale) = self.parse_optional_numeric_params()?;
3857                ColumnTypeName::Numeric(precision, scale)
3858            }
3859            "date" => ColumnTypeName::Date,
3860            // MySQL's `DATETIME` is the same domain as standard
3861            // `TIMESTAMP` — accept both spellings.
3862            "timestamp" | "datetime" => {
3863                // v7.14.0 — PG canonical `TIMESTAMP WITH TIME ZONE`
3864                // / `TIMESTAMP WITHOUT TIME ZONE`. pg_dump emits
3865                // the full form. SPG canonicalises:
3866                //   - WITH TIME ZONE    → Timestamptz
3867                //   - WITHOUT TIME ZONE → Timestamp
3868                if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("with"))
3869                    && matches!(self.tokens.get(self.pos + 1), Some(Token::Ident(s)) if s.eq_ignore_ascii_case("time"))
3870                    && matches!(self.tokens.get(self.pos + 2), Some(Token::Ident(s)) if s.eq_ignore_ascii_case("zone"))
3871                {
3872                    self.advance(); // WITH
3873                    self.advance(); // TIME
3874                    self.advance(); // ZONE
3875                    ColumnTypeName::Timestamptz
3876                } else if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("without"))
3877                    && matches!(self.tokens.get(self.pos + 1), Some(Token::Ident(s)) if s.eq_ignore_ascii_case("time"))
3878                    && matches!(self.tokens.get(self.pos + 2), Some(Token::Ident(s)) if s.eq_ignore_ascii_case("zone"))
3879                {
3880                    self.advance(); // WITHOUT
3881                    self.advance(); // TIME
3882                    self.advance(); // ZONE
3883                    ColumnTypeName::Timestamp
3884                } else {
3885                    // Optional `(precision)` parenthesised modifier
3886                    // (PG fractional seconds precision). SPG stores
3887                    // µs always; accept + discard.
3888                    self.consume_optional_paren_size();
3889                    ColumnTypeName::Timestamp
3890                }
3891            }
3892            // v7.9.2 — `TIMESTAMPTZ` and full PG spelling
3893            // `TIMESTAMP WITH TIME ZONE`. Same storage as TIMESTAMP;
3894            // only PG-wire OID differs.
3895            "timestamptz" => ColumnTypeName::Timestamptz,
3896            // v4.9: JSON / JSONB. Stored as raw text — no parse-time
3897            // validation. We accept the JSONB spelling too because
3898            // most PG clients default to it; SPG doesn't distinguish
3899            // the two (no path-operator perf advantage to model).
3900            "json" => ColumnTypeName::Json,
3901            "jsonb" => ColumnTypeName::Jsonb,
3902            // v7.10.4 — PG `BYTEA` and the SPG `BYTES` alias both
3903            // surface here. Same storage shape; mapping happens at
3904            // the engine side via the ColumnTypeName → DataType
3905            // resolver. Literal forms are handled at coerce_value
3906            // time so the lexer stays untouched.
3907            "bytea" | "bytes" => ColumnTypeName::Bytes,
3908            // v7.12.0 — PG full-text search types. mailrs G-CRIT-3.
3909            // The actual `to_tsvector` / `@@` / `ts_rank` surface
3910            // arrives in v7.12.1+; the type itself loads here so
3911            // mailrs's `scripts/init-schema.sql` runs unmodified.
3912            "tsvector" => ColumnTypeName::TsVector,
3913            "tsquery" => ColumnTypeName::TsQuery,
3914            other => {
3915                return Err(ParseError {
3916                    message: format!("unsupported column type {other:?}"),
3917                    token_pos: self.pos.saturating_sub(1),
3918                });
3919            }
3920        };
3921        // MySQL's `UNSIGNED` modifier sits right after the type
3922        // keyword. SPG doesn't carry a separate unsigned variant —
3923        // accepting the keyword keeps existing schemas compatible
3924        // without changing semantics. Drop it silently.
3925        if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("unsigned")) {
3926            self.advance();
3927        }
3928        // v7.14.0 — mysqldump emits `<type> CHARACTER SET <name>` and
3929        // `<type> COLLATE <name>` post-fixes on text columns. SPG
3930        // stores text as UTF-8 always and orders bytewise; charset /
3931        // collate are accepted as no-ops so PG / MySQL / MariaDB
3932        // dumps load without parser noise.
3933        loop {
3934            if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("character"))
3935                && matches!(self.tokens.get(self.pos + 1), Some(Token::Ident(s)) if s.eq_ignore_ascii_case("set"))
3936            {
3937                self.advance(); // CHARACTER
3938                self.advance(); // SET
3939                if matches!(self.peek(), Token::Ident(_) | Token::QuotedIdent(_) | Token::String(_))
3940                {
3941                    self.advance();
3942                }
3943                continue;
3944            }
3945            if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("collate")) {
3946                self.advance(); // COLLATE
3947                if matches!(self.peek(), Token::Ident(_) | Token::QuotedIdent(_) | Token::String(_))
3948                {
3949                    self.advance();
3950                }
3951                continue;
3952            }
3953            break;
3954        }
3955        // v7.10.10 — postfix `[]` widens TEXT → TEXT[]. PG accepts
3956        // `TYPE[]` after any base type; v7.10 only models TEXT[]
3957        // so we reject other base types here. mailrs uses TEXT[]
3958        // for labels / addresses / message-on-thread.
3959        if matches!(self.peek(), Token::LBracket) {
3960            self.advance();
3961            if !matches!(self.peek(), Token::RBracket) {
3962                return Err(self.err(alloc::format!(
3963                    "TEXT[] takes no dimension; got {:?}",
3964                    self.peek()
3965                )));
3966            }
3967            self.advance();
3968            // v7.11.13 — widened to INT[] and BIGINT[] in addition
3969            // to TEXT[]. Other base types (BOOL[], NUMERIC[], etc.)
3970            // still error here.
3971            ty = match ty {
3972                ColumnTypeName::Text => ColumnTypeName::TextArray,
3973                ColumnTypeName::Int => ColumnTypeName::IntArray,
3974                ColumnTypeName::BigInt => ColumnTypeName::BigIntArray,
3975                other => {
3976                    return Err(self.err(alloc::format!(
3977                        "v7.11 supports TEXT[] / INT[] / BIGINT[] only; got {other:?}[]"
3978                    )));
3979                }
3980            };
3981        }
3982        Ok((ty, implied_auto_increment, implied_not_null))
3983    }
3984
3985    fn parse_column_def(&mut self) -> Result<ColumnDef, ParseError> {
3986        let name = self.expect_ident_like()?;
3987        let (ty, implied_auto_increment, implied_not_null) =
3988            self.parse_type_with_implied_flags()?;
3989        // Column constraints: `DEFAULT <expr>`, `NOT NULL`, and the
3990        // MySQL-flavoured `AUTO_INCREMENT` may appear in any order;
3991        // each at most once.
3992        let mut default: Option<Expr> = None;
3993        let mut nullable = !implied_not_null;
3994        let mut nullability_seen = implied_not_null;
3995        let mut auto_increment = implied_auto_increment;
3996        let mut is_primary_key = false;
3997        let mut is_unique = false;
3998        let mut check: Option<Expr> = None;
3999        loop {
4000            if matches!(self.peek(), Token::Default) {
4001                if default.is_some() {
4002                    return Err(self.err("DEFAULT specified twice".into()));
4003                }
4004                self.advance();
4005                default = Some(self.parse_expr(0)?);
4006                continue;
4007            }
4008            if matches!(self.peek(), Token::Not) {
4009                if nullability_seen {
4010                    return Err(self.err("NOT NULL specified twice".into()));
4011                }
4012                self.advance();
4013                if !matches!(self.peek(), Token::Null) {
4014                    return Err(self.err(format!(
4015                        "expected NULL after NOT in column def, got {:?}",
4016                        self.peek()
4017                    )));
4018                }
4019                self.advance();
4020                nullable = false;
4021                nullability_seen = true;
4022                continue;
4023            }
4024            // v7.14.0 — MySQL accepts a bare `NULL` as an explicit
4025            // "this column is nullable" marker (the default in
4026            // standard SQL anyway). mysqldump emits it routinely
4027            // (`col TYPE NULL DEFAULT NULL` for nullable
4028            // timestamps etc). Accept + no-op.
4029            if matches!(self.peek(), Token::Null) {
4030                if nullability_seen && !nullable {
4031                    return Err(self.err(
4032                        "column declared NOT NULL then NULL — pick one".into(),
4033                    ));
4034                }
4035                self.advance();
4036                nullable = true;
4037                nullability_seen = true;
4038                continue;
4039            }
4040            // `AUTO_INCREMENT` or its abbreviated form `AUTOINCREMENT`
4041            // arrives as a bare Ident. Match either, case-insensitive.
4042            if let Token::Ident(s) = self.peek()
4043                && (s.eq_ignore_ascii_case("auto_increment")
4044                    || s.eq_ignore_ascii_case("autoincrement"))
4045            {
4046                if auto_increment {
4047                    return Err(self.err("AUTO_INCREMENT specified twice".into()));
4048                }
4049                self.advance();
4050                auto_increment = true;
4051                continue;
4052            }
4053            // v7.9.13 — inline `PRIMARY KEY` column constraint
4054            // (mailrs F1). Implies `NOT NULL`. The engine creates
4055            // a BTree index for the PK column at CREATE TABLE time
4056            // so FK parent-side index lookups resolve.
4057            if let Token::Ident(s) = self.peek()
4058                && s.eq_ignore_ascii_case("primary")
4059            {
4060                if is_primary_key {
4061                    return Err(self.err("PRIMARY KEY specified twice".into()));
4062                }
4063                // Peek-ahead for the required `KEY` token.
4064                let next = self.tokens.get(self.pos + 1);
4065                let next_is_key = matches!(
4066                    next,
4067                    Some(Token::Ident(k)) if k.eq_ignore_ascii_case("key")
4068                );
4069                if !next_is_key {
4070                    return Err(self.err(format!(
4071                        "expected KEY after PRIMARY in column def, got {:?}",
4072                        next
4073                    )));
4074                }
4075                self.advance(); // PRIMARY
4076                self.advance(); // KEY
4077                is_primary_key = true;
4078                if nullability_seen && nullable {
4079                    return Err(self.err(
4080                        "column declared NULL but inline PRIMARY KEY implies NOT NULL".into(),
4081                    ));
4082                }
4083                nullable = false;
4084                nullability_seen = true;
4085                continue;
4086            }
4087            // v7.13.0 — inline `UNIQUE` column constraint
4088            // (mailrs round-5 G2). Fold into a single-column
4089            // table-level UNIQUE at CREATE TABLE post-process time.
4090            if let Token::Ident(s) = self.peek()
4091                && s.eq_ignore_ascii_case("unique")
4092            {
4093                if is_unique {
4094                    return Err(self.err("UNIQUE specified twice".into()));
4095                }
4096                self.advance();
4097                is_unique = true;
4098                continue;
4099            }
4100            // v7.13.0 — inline `CHECK (<expr>)` column constraint
4101            // (mailrs round-5 G3). PG semantics: column-level
4102            // CHECK is equivalent to a table-level CHECK. Multiple
4103            // inline CHECKs on the same column AND together.
4104            if let Token::Ident(s) = self.peek()
4105                && s.eq_ignore_ascii_case("check")
4106            {
4107                self.advance();
4108                if !matches!(self.peek(), Token::LParen) {
4109                    return Err(self.err(alloc::format!(
4110                        "expected '(' after CHECK in column def, got {:?}",
4111                        self.peek()
4112                    )));
4113                }
4114                self.advance();
4115                let pred = self.parse_expr(0)?;
4116                if !matches!(self.peek(), Token::RParen) {
4117                    return Err(self.err(alloc::format!(
4118                        "expected ')' to close CHECK predicate, got {:?}",
4119                        self.peek()
4120                    )));
4121                }
4122                self.advance();
4123                check = Some(match check.take() {
4124                    Some(prev) => Expr::Binary {
4125                        op: BinOp::And,
4126                        lhs: Box::new(prev),
4127                        rhs: Box::new(pred),
4128                    },
4129                    None => pred,
4130                });
4131                continue;
4132            }
4133            break;
4134        }
4135        Ok(ColumnDef {
4136            name,
4137            ty,
4138            nullable,
4139            default,
4140            auto_increment,
4141            is_primary_key,
4142            is_unique,
4143            check,
4144        })
4145    }
4146
4147    /// `NUMERIC` may appear without parameters, with one (precision
4148    /// only, scale=0), or with both. Returns `(precision, scale)` with
4149    /// 0 = unspecified for the bare form.
4150    fn parse_optional_numeric_params(&mut self) -> Result<(u8, u8), ParseError> {
4151        if !matches!(self.peek(), Token::LParen) {
4152            // Bare `NUMERIC` — PG treats this as "unlimited precision";
4153            // we surface it as precision=0 to mean "unconstrained" so
4154            // the engine doesn't need a separate variant.
4155            return Ok((0, 0));
4156        }
4157        self.advance();
4158        let precision = match self.advance() {
4159            Token::Integer(n) if (1..=38).contains(&n) => u8::try_from(n).expect("range-checked"),
4160            other => {
4161                return Err(ParseError {
4162                    message: format!(
4163                        "NUMERIC precision must be an integer in 1..=38, got {other:?}"
4164                    ),
4165                    token_pos: self.pos.saturating_sub(1),
4166                });
4167            }
4168        };
4169        let scale = if matches!(self.peek(), Token::Comma) {
4170            self.advance();
4171            match self.advance() {
4172                Token::Integer(n) if (0..=i64::from(precision)).contains(&n) => {
4173                    u8::try_from(n).expect("range-checked")
4174                }
4175                other => {
4176                    return Err(ParseError {
4177                        message: format!(
4178                            "NUMERIC scale must be a non-negative integer ≤ precision, got {other:?}"
4179                        ),
4180                        token_pos: self.pos.saturating_sub(1),
4181                    });
4182                }
4183            }
4184        } else {
4185            0
4186        };
4187        if !matches!(self.peek(), Token::RParen) {
4188            return Err(self.err(format!(
4189                "expected ')' to close NUMERIC params, got {:?}",
4190                self.peek()
4191            )));
4192        }
4193        self.advance();
4194        Ok((precision, scale))
4195    }
4196
4197    /// Parse `(N)` where `N` is a positive integer literal — used by the
4198    /// `VARCHAR`/`CHAR`/`VECTOR` column types. `label` is the type name
4199    /// for the error message.
4200    /// v6.0.1: parse the optional `USING <encoding>` clause that
4201    /// follows `VECTOR(N)` in a column definition. Missing clause
4202    /// → `VecEncoding::F32` (pre-v6 default). Unknown encoding
4203    /// ident → `ParseError` listing the encodings recognised today.
4204    fn parse_optional_vector_encoding(&mut self) -> Result<VecEncoding, ParseError> {
4205        if !matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("using")) {
4206            return Ok(VecEncoding::F32);
4207        }
4208        // v7.13.2 — mailrs round-6 S6: `USING` after a vector type
4209        // overlaps with `ALTER COLUMN TYPE … USING <expr>`. Only
4210        // consume the token when the very next token is a known
4211        // vector-encoding keyword (SQ8 / HALF). Otherwise leave
4212        // `USING` for the caller — it's the rewrite-expression form.
4213        let n1 = self.tokens.get(self.pos + 1);
4214        let next_is_encoding = matches!(
4215            n1,
4216            Some(Token::Ident(s))
4217                if s.eq_ignore_ascii_case("sq8") || s.eq_ignore_ascii_case("half")
4218        );
4219        if !next_is_encoding {
4220            return Ok(VecEncoding::F32);
4221        }
4222        self.advance();
4223        let enc_ident = match self.advance() {
4224            Token::Ident(s) => s,
4225            other => {
4226                return Err(self.err(format!(
4227                    "expected vector encoding after USING, got {other:?}"
4228                )));
4229            }
4230        };
4231        match enc_ident.to_ascii_lowercase().as_str() {
4232            "sq8" => Ok(VecEncoding::Sq8),
4233            // v6.0.3: `HALF` (pgvector convention) selects IEEE-754
4234            // binary16 per-element storage.
4235            "half" => Ok(VecEncoding::F16),
4236            other => Err(self.err(format!(
4237                "unknown vector encoding {other:?}; supported: SQ8, HALF"
4238            ))),
4239        }
4240    }
4241
4242    /// v7.14.0 — consume an optional MySQL display-width
4243    /// parenthesised number after an integer type, returning
4244    /// nothing. `TINYINT(1)` etc.
4245    fn consume_optional_paren_size(&mut self) {
4246        if !matches!(self.peek(), Token::LParen) {
4247            return;
4248        }
4249        self.advance();
4250        // Skip until matching RParen (allow nested or any tokens).
4251        let mut depth = 1usize;
4252        while depth > 0 {
4253            match self.peek() {
4254                Token::LParen => depth += 1,
4255                Token::RParen => depth -= 1,
4256                Token::Eof => return,
4257                _ => {}
4258            }
4259            self.advance();
4260        }
4261    }
4262
4263    fn parse_paren_size(&mut self, label: &str) -> Result<u32, ParseError> {
4264        if !matches!(self.peek(), Token::LParen) {
4265            return Err(self.err(format!("{label} type requires (N), got {:?}", self.peek())));
4266        }
4267        self.advance();
4268        let n = match self.advance() {
4269            Token::Integer(n) if n > 0 => u32::try_from(n).map_err(|_| ParseError {
4270                message: format!("{label} size too large: {n}"),
4271                token_pos: self.pos.saturating_sub(1),
4272            })?,
4273            other => {
4274                return Err(ParseError {
4275                    message: format!("expected positive integer {label} size, got {other:?}"),
4276                    token_pos: self.pos.saturating_sub(1),
4277                });
4278            }
4279        };
4280        if !matches!(self.peek(), Token::RParen) {
4281            return Err(self.err(format!(
4282                "expected ')' after {label} size, got {:?}",
4283                self.peek()
4284            )));
4285        }
4286        self.advance();
4287        Ok(n)
4288    }
4289
4290    fn parse_insert_stmt(&mut self) -> Result<Statement, ParseError> {
4291        debug_assert!(matches!(self.peek(), Token::Insert));
4292        self.advance();
4293        if !matches!(self.peek(), Token::Into) {
4294            return Err(self.err(format!("expected INTO after INSERT, got {:?}", self.peek())));
4295        }
4296        self.advance();
4297        let table = self.expect_ident_like()?;
4298        // Optional column list — `INSERT INTO t (a, b) VALUES ...`.
4299        let columns = if matches!(self.peek(), Token::LParen) {
4300            self.advance();
4301            let mut names = Vec::new();
4302            loop {
4303                names.push(self.expect_ident_like()?);
4304                match self.peek() {
4305                    Token::Comma => {
4306                        self.advance();
4307                    }
4308                    Token::RParen => {
4309                        self.advance();
4310                        break;
4311                    }
4312                    other => {
4313                        return Err(self.err(format!(
4314                            "expected ',' or ')' in INSERT column list, got {other:?}"
4315                        )));
4316                    }
4317                }
4318            }
4319            Some(names)
4320        } else {
4321            None
4322        };
4323        // v7.13.0 — `INSERT INTO t [(cols)] SELECT …` (mailrs
4324        // round-5 G4). Dispatch on VALUES vs SELECT.
4325        if matches!(self.peek(), Token::Select) {
4326            let select_stmt = match self.parse_select_stmt()? {
4327                Statement::Select(s) => s,
4328                other => {
4329                    return Err(self.err(alloc::format!(
4330                        "expected SELECT after INSERT INTO ... target, got {other:?}"
4331                    )));
4332                }
4333            };
4334            let on_conflict = self.parse_optional_on_conflict()?;
4335            let returning = self.parse_optional_returning()?;
4336            return Ok(Statement::Insert(InsertStatement {
4337                table,
4338                columns,
4339                rows: Vec::new(),
4340                select_source: Some(Box::new(select_stmt)),
4341                on_conflict,
4342                returning,
4343            }));
4344        }
4345        if !matches!(self.peek(), Token::Values) {
4346            return Err(self.err(format!(
4347                "expected VALUES or SELECT after table name, got {:?}",
4348                self.peek()
4349            )));
4350        }
4351        self.advance();
4352        if !matches!(self.peek(), Token::LParen) {
4353            return Err(self.err(format!("expected '(' after VALUES, got {:?}", self.peek())));
4354        }
4355        let mut rows = Vec::new();
4356        loop {
4357            // Each iteration consumes one `(expr, expr, …)` tuple.
4358            if !matches!(self.peek(), Token::LParen) {
4359                return Err(self.err(format!(
4360                    "expected '(' for next VALUES tuple, got {:?}",
4361                    self.peek()
4362                )));
4363            }
4364            self.advance();
4365            let mut tuple = Vec::new();
4366            loop {
4367                tuple.push(self.parse_expr(0)?);
4368                match self.peek() {
4369                    Token::Comma => {
4370                        self.advance();
4371                    }
4372                    Token::RParen => {
4373                        self.advance();
4374                        break;
4375                    }
4376                    other => {
4377                        return Err(self.err(format!(
4378                            "expected ',' or ')' in VALUES tuple, got {other:?}"
4379                        )));
4380                    }
4381                }
4382            }
4383            if tuple.is_empty() {
4384                return Err(self.err("INSERT VALUES tuple requires at least one value".into()));
4385            }
4386            rows.push(tuple);
4387            // Continue with comma-separated tuples.
4388            if matches!(self.peek(), Token::Comma) {
4389                self.advance();
4390            } else {
4391                break;
4392            }
4393        }
4394        let on_conflict = self.parse_optional_on_conflict()?;
4395        let returning = self.parse_optional_returning()?;
4396        Ok(Statement::Insert(InsertStatement {
4397            table,
4398            columns,
4399            rows,
4400            select_source: None,
4401            on_conflict,
4402            returning,
4403        }))
4404    }
4405
4406    /// v7.9.7 — parse the optional `ON CONFLICT (cols) DO …`
4407    /// clause sitting between the INSERT body and the trailing
4408    /// RETURNING. All keywords come in as bare idents; `ON` is
4409    /// a reserved Token though.
4410    fn parse_optional_on_conflict(
4411        &mut self,
4412    ) -> Result<Option<crate::ast::OnConflictClause>, ParseError> {
4413        if !matches!(self.peek(), Token::On) {
4414            return Ok(None);
4415        }
4416        // Peek further: we want exactly "ON CONFLICT ...". If the
4417        // next ident isn't "conflict", let some other parser handle.
4418        let next_is_conflict = matches!(
4419            self.tokens.get(self.pos + 1),
4420            Some(Token::Ident(s) | Token::QuotedIdent(s)) if s.eq_ignore_ascii_case("conflict")
4421        );
4422        if !next_is_conflict {
4423            return Ok(None);
4424        }
4425        self.advance(); // ON
4426        self.advance(); // CONFLICT
4427        // Optional `(col [, col]*)` target list.
4428        let mut target_columns: Vec<String> = Vec::new();
4429        if matches!(self.peek(), Token::LParen) {
4430            self.advance();
4431            loop {
4432                target_columns.push(self.expect_ident_like()?);
4433                match self.peek() {
4434                    Token::Comma => {
4435                        self.advance();
4436                    }
4437                    Token::RParen => {
4438                        self.advance();
4439                        break;
4440                    }
4441                    other => {
4442                        return Err(self.err(alloc::format!(
4443                            "expected ',' or ')' in ON CONFLICT target list, got {other:?}"
4444                        )));
4445                    }
4446                }
4447            }
4448        }
4449        // Required `DO`.
4450        match self.advance() {
4451            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("do") => {}
4452            other => {
4453                return Err(self.err(alloc::format!(
4454                    "expected DO after ON CONFLICT [(…)], got {other:?}"
4455                )));
4456            }
4457        }
4458        // Action: NOTHING | UPDATE SET …
4459        let action = match self.advance() {
4460            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("nothing") => {
4461                crate::ast::OnConflictAction::Nothing
4462            }
4463            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("update") => {
4464                self.parse_on_conflict_update_action()?
4465            }
4466            other => {
4467                return Err(self.err(alloc::format!(
4468                    "expected NOTHING or UPDATE after ON CONFLICT DO, got {other:?}"
4469                )));
4470            }
4471        };
4472        Ok(Some(crate::ast::OnConflictClause {
4473            target_columns,
4474            action,
4475        }))
4476    }
4477
4478    /// v7.9.7 — tail of `ON CONFLICT … DO UPDATE`: parse
4479    /// `SET col = expr [, …] [WHERE cond]`. Caller already
4480    /// consumed `UPDATE`.
4481    fn parse_on_conflict_update_action(
4482        &mut self,
4483    ) -> Result<crate::ast::OnConflictAction, ParseError> {
4484        // `SET`
4485        match self.advance() {
4486            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("set") => {}
4487            other => {
4488                return Err(self.err(alloc::format!(
4489                    "expected SET after ON CONFLICT DO UPDATE, got {other:?}"
4490                )));
4491            }
4492        }
4493        let mut assignments: Vec<(String, Expr)> = Vec::new();
4494        loop {
4495            let col = self.expect_ident_like()?;
4496            if !matches!(self.peek(), Token::Eq) {
4497                return Err(self.err(alloc::format!(
4498                    "expected `=` after column in ON CONFLICT DO UPDATE SET, got {:?}",
4499                    self.peek()
4500                )));
4501            }
4502            self.advance();
4503            let value = self.parse_expr(0)?;
4504            assignments.push((col, value));
4505            if matches!(self.peek(), Token::Comma) {
4506                self.advance();
4507                continue;
4508            }
4509            break;
4510        }
4511        let where_ = if matches!(self.peek(), Token::Where) {
4512            self.advance();
4513            Some(self.parse_expr(0)?)
4514        } else {
4515            None
4516        };
4517        Ok(crate::ast::OnConflictAction::Update {
4518            assignments,
4519            where_,
4520        })
4521    }
4522
4523    fn parse_select_list(&mut self) -> Result<Vec<SelectItem>, ParseError> {
4524        let mut items = Vec::new();
4525        loop {
4526            items.push(self.parse_select_item()?);
4527            if matches!(self.peek(), Token::Comma) {
4528                self.advance();
4529            } else {
4530                break;
4531            }
4532        }
4533        Ok(items)
4534    }
4535
4536    fn parse_select_item(&mut self) -> Result<SelectItem, ParseError> {
4537        if matches!(self.peek(), Token::Star) {
4538            self.advance();
4539            return Ok(SelectItem::Wildcard);
4540        }
4541        let expr = self.parse_expr(0)?;
4542        let alias = self.parse_optional_alias();
4543        Ok(SelectItem::Expr { expr, alias })
4544    }
4545
4546    fn parse_table_ref(&mut self) -> Result<TableRef, ParseError> {
4547        // v7.11.7 — `FROM unnest(<expr>) [AS] <alias>` set-returning
4548        // source. Detect at the head before the bare-ident fallback;
4549        // unnest is not a reserved token.
4550        if matches!(self.peek(), Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("unnest"))
4551            && matches!(self.tokens.get(self.pos + 1), Some(Token::LParen))
4552        {
4553            self.advance(); // unnest
4554            self.advance(); // (
4555            let expr = self.parse_expr(0)?;
4556            if !matches!(self.peek(), Token::RParen) {
4557                return Err(self.err(alloc::format!(
4558                    "expected ')' after unnest() argument, got {:?}",
4559                    self.peek()
4560                )));
4561            }
4562            self.advance();
4563            let (alias_ident, unnest_column_aliases) = self.parse_optional_alias_with_columns();
4564            let name = alias_ident.clone().unwrap_or_else(|| "unnest".to_string());
4565            return Ok(TableRef {
4566                name,
4567                alias: alias_ident,
4568                as_of_segment: None,
4569                unnest_expr: Some(Box::new(expr)),
4570                unnest_column_aliases,
4571            });
4572        }
4573        let name = self.expect_ident_like()?;
4574        // v6.10.2 — optional `AS OF SEGMENT '<id>'` cold-tier
4575        // time-travel clause. Parse BEFORE the alias so the
4576        // alias can still ride at the tail (`tbl AS OF SEGMENT
4577        // '5' alias`). `AS` is a reserved keyword token, while
4578        // `OF` and `SEGMENT` are bare idents.
4579        let as_of_segment = if matches!(self.peek(), Token::As)
4580            && matches!(self.tokens.get(self.pos + 1), Some(Token::Ident(s) | Token::QuotedIdent(s)) if s.eq_ignore_ascii_case("of"))
4581        {
4582            self.advance(); // AS
4583            self.advance(); // OF
4584            let kw = match self.peek().clone() {
4585                Token::Ident(s) | Token::QuotedIdent(s) => s,
4586                other => {
4587                    return Err(self.err(format!("expected SEGMENT after AS OF, got {other:?}")));
4588                }
4589            };
4590            if !kw.eq_ignore_ascii_case("segment") {
4591                return Err(self.err(format!(
4592                    "expected SEGMENT after AS OF, got {kw:?}; v6.10.2 supports SEGMENT only"
4593                )));
4594            }
4595            self.advance();
4596            // Segment id literal — accept either a string or
4597            // integer for operator ergonomics.
4598            let id = match self.advance() {
4599                Token::String(s) => s
4600                    .parse::<u32>()
4601                    .map_err(|e| self.err(format!("AS OF SEGMENT id parse: {e}")))?,
4602                Token::Integer(n) => u32::try_from(n)
4603                    .map_err(|e| self.err(format!("AS OF SEGMENT id parse: {e}")))?,
4604                other => {
4605                    return Err(self.err(format!(
4606                        "expected segment id literal after AS OF SEGMENT, got {other:?}"
4607                    )));
4608                }
4609            };
4610            Some(id)
4611        } else {
4612            None
4613        };
4614        let alias = self.parse_optional_alias();
4615        Ok(TableRef {
4616            name,
4617            alias,
4618            as_of_segment,
4619            unnest_expr: None,
4620            unnest_column_aliases: Vec::new(),
4621        })
4622    }
4623
4624    /// v7.13.2 — mailrs round-6 S5. Like `parse_optional_alias`
4625    /// but also accepts `AS alias(col [, col, …])` — the
4626    /// PG-standard table-function column-list form. The column
4627    /// list is only honoured when paired with `UNNEST(...)` in
4628    /// the parent; other call sites currently discard it.
4629    fn parse_optional_alias_with_columns(&mut self) -> (Option<String>, Vec<String>) {
4630        let alias = self.parse_optional_alias();
4631        if alias.is_none() {
4632            return (None, Vec::new());
4633        }
4634        let mut cols: Vec<String> = Vec::new();
4635        if matches!(self.peek(), Token::LParen) {
4636            self.advance();
4637            loop {
4638                match self.peek().clone() {
4639                    Token::Ident(s) | Token::QuotedIdent(s) => {
4640                        self.advance();
4641                        cols.push(s);
4642                    }
4643                    _ => break,
4644                }
4645                if matches!(self.peek(), Token::Comma) {
4646                    self.advance();
4647                    continue;
4648                }
4649                break;
4650            }
4651            if matches!(self.peek(), Token::RParen) {
4652                self.advance();
4653            }
4654        }
4655        (alias, cols)
4656    }
4657
4658    /// FROM-clause: a primary table reference plus zero-or-more joined
4659    /// peers expressed via either `, <table>` (cross-product, no ON) or
4660    /// `[INNER|LEFT [OUTER]|CROSS] JOIN <table> [ON expr]`. v1.10 keeps
4661    /// the join list flat (left-associative nested-loop semantics).
4662    fn parse_from_clause(&mut self) -> Result<FromClause, ParseError> {
4663        let primary = self.parse_table_ref()?;
4664        let mut joins = Vec::new();
4665        loop {
4666            // `, <table>` — cross-product with no ON.
4667            if matches!(self.peek(), Token::Comma) {
4668                self.advance();
4669                let table = self.parse_table_ref()?;
4670                joins.push(FromJoin {
4671                    kind: JoinKind::Cross,
4672                    table,
4673                    on: None,
4674                });
4675                continue;
4676            }
4677            // Explicit JOIN syntax. Accept INNER JOIN, LEFT [OUTER] JOIN,
4678            // CROSS JOIN, and bare JOIN (defaults to INNER).
4679            let kind =
4680                match self.peek() {
4681                    Token::Inner => {
4682                        self.advance();
4683                        if !matches!(self.peek(), Token::Join) {
4684                            return Err(self
4685                                .err(format!("expected JOIN after INNER, got {:?}", self.peek())));
4686                        }
4687                        self.advance();
4688                        JoinKind::Inner
4689                    }
4690                    Token::Left => {
4691                        self.advance();
4692                        if matches!(self.peek(), Token::Outer) {
4693                            self.advance();
4694                        }
4695                        if !matches!(self.peek(), Token::Join) {
4696                            return Err(self.err(format!(
4697                                "expected JOIN after LEFT [OUTER], got {:?}",
4698                                self.peek()
4699                            )));
4700                        }
4701                        self.advance();
4702                        JoinKind::Left
4703                    }
4704                    Token::Cross => {
4705                        self.advance();
4706                        if !matches!(self.peek(), Token::Join) {
4707                            return Err(self
4708                                .err(format!("expected JOIN after CROSS, got {:?}", self.peek())));
4709                        }
4710                        self.advance();
4711                        JoinKind::Cross
4712                    }
4713                    Token::Join => {
4714                        self.advance();
4715                        JoinKind::Inner
4716                    }
4717                    _ => break,
4718                };
4719            let table = self.parse_table_ref()?;
4720            let on = if matches!(self.peek(), Token::On) {
4721                self.advance();
4722                Some(self.parse_expr(0)?)
4723            } else if kind == JoinKind::Cross {
4724                None
4725            } else {
4726                return Err(self.err(format!(
4727                    "expected ON after {:?} JOIN, got {:?}",
4728                    kind,
4729                    self.peek()
4730                )));
4731            };
4732            joins.push(FromJoin { kind, table, on });
4733        }
4734        Ok(FromClause { primary, joins })
4735    }
4736
4737    /// Optional alias after an expression or table:
4738    /// `AS <ident>` is unambiguous; a bare `<ident>` directly after is also
4739    /// accepted (PG-style implicit alias). Returns `None` if the next token
4740    /// is not alias-shaped (e.g. comma, FROM, WHERE, semicolon, EOF, operator).
4741    fn parse_optional_alias(&mut self) -> Option<String> {
4742        if matches!(self.peek(), Token::As) {
4743            self.advance();
4744            // After AS, the next token MUST be an identifier-like — if not,
4745            // we still return None and let the caller surface the error on the
4746            // next expectation. v0.2 keeps the alias path forgiving; the
4747            // corpus tests don't exercise the malformed case.
4748            if let Token::Ident(_) | Token::QuotedIdent(_) = self.peek() {
4749                return self.expect_ident_like().ok();
4750            }
4751            return None;
4752        }
4753        if let Token::Ident(_) | Token::QuotedIdent(_) = self.peek() {
4754            return self.expect_ident_like().ok();
4755        }
4756        None
4757    }
4758
4759    /// Pratt loop. `min_prec` is the minimum binary-op precedence we'll accept.
4760    fn parse_expr(&mut self, min_prec: u8) -> Result<Expr, ParseError> {
4761        let mut lhs = self.parse_unary()?;
4762        while let Some((op, prec)) = binop_from(self.peek()) {
4763            if prec < min_prec {
4764                break;
4765            }
4766            self.advance();
4767            // v7.10.12 — `x <op> ANY(arr)` / `x <op> ALL(arr)`.
4768            // ANY is a bare ident; ALL is a reserved Token. Both
4769            // require an immediate `(` to disambiguate from
4770            // identifier columns named `any` / `all`.
4771            let any_kind = match self.peek() {
4772                Token::All if matches!(self.tokens.get(self.pos + 1), Some(Token::LParen)) => {
4773                    Some(false)
4774                }
4775                Token::Ident(s) | Token::QuotedIdent(s)
4776                    if (s.eq_ignore_ascii_case("any") || s.eq_ignore_ascii_case("all"))
4777                        && matches!(self.tokens.get(self.pos + 1), Some(Token::LParen)) =>
4778                {
4779                    Some(s.eq_ignore_ascii_case("any"))
4780                }
4781                _ => None,
4782            };
4783            if let Some(is_any) = any_kind {
4784                self.advance(); // ident
4785                self.advance(); // (
4786                let arr = self.parse_expr(0)?;
4787                if !matches!(self.peek(), Token::RParen) {
4788                    return Err(self.err(alloc::format!(
4789                        "expected ')' after ANY/ALL argument, got {:?}",
4790                        self.peek()
4791                    )));
4792                }
4793                self.advance();
4794                lhs = Expr::AnyAll {
4795                    expr: Box::new(lhs),
4796                    op,
4797                    array: Box::new(arr),
4798                    is_any,
4799                };
4800                continue;
4801            }
4802            let rhs = self.parse_expr(prec + 1)?;
4803            lhs = Expr::Binary {
4804                lhs: Box::new(lhs),
4805                op,
4806                rhs: Box::new(rhs),
4807            };
4808        }
4809        Ok(lhs)
4810    }
4811
4812    fn parse_unary(&mut self) -> Result<Expr, ParseError> {
4813        match self.peek() {
4814            Token::Not => {
4815                self.advance();
4816                // NOT sits between AND (2) and comparisons (4) — bind everything
4817                // ≥3, which leaves AND/OR outside.
4818                let e = self.parse_expr(3)?;
4819                Ok(Expr::Unary {
4820                    op: UnOp::Not,
4821                    expr: Box::new(e),
4822                })
4823            }
4824            Token::Minus => {
4825                self.advance();
4826                // Unary minus binds tighter than `*`/`/` (now at prec 7 after
4827                // `<->` slotted into 5 and arithmetic shifted up).
4828                let e = self.parse_expr(8)?;
4829                Ok(Expr::Unary {
4830                    op: UnOp::Neg,
4831                    expr: Box::new(e),
4832                })
4833            }
4834            _ => self.parse_atom(),
4835        }
4836    }
4837
4838    fn parse_atom(&mut self) -> Result<Expr, ParseError> {
4839        let tok_pos = self.pos;
4840        match self.advance() {
4841            Token::Integer(n) => Ok(Expr::Literal(Literal::Integer(n))),
4842            Token::Float(x) => Ok(Expr::Literal(Literal::Float(x))),
4843            Token::String(s) => Ok(Expr::Literal(Literal::String(s))),
4844            Token::True => Ok(Expr::Literal(Literal::Bool(true))),
4845            Token::False => Ok(Expr::Literal(Literal::Bool(false))),
4846            Token::Null => Ok(Expr::Literal(Literal::Null)),
4847            // v6.1.1 — `$N` placeholder. The actual Value lookup
4848            // happens in the engine eval path against the prepared-
4849            // statement bind buffer.
4850            Token::Placeholder(n) => Ok(Expr::Placeholder(n)),
4851            Token::LParen => {
4852                // v4.10: `(SELECT ...)` in expression position is a
4853                // scalar subquery; otherwise it's a parenthesised
4854                // expression. Peek for SELECT keyword to dispatch.
4855                if matches!(self.peek(), Token::Select) {
4856                    let inner = self.parse_select_stmt()?;
4857                    match self.advance() {
4858                        Token::RParen => {
4859                            let Statement::Select(s) = inner else {
4860                                unreachable!("parse_select_stmt returns Select")
4861                            };
4862                            Ok(Expr::ScalarSubquery(Box::new(s)))
4863                        }
4864                        other => Err(ParseError {
4865                            message: format!("expected ')' after scalar subquery, got {other:?}"),
4866                            token_pos: self.pos.saturating_sub(1),
4867                        }),
4868                    }
4869                } else {
4870                    let e = self.parse_expr(0)?;
4871                    match self.advance() {
4872                        Token::RParen => Ok(e),
4873                        other => Err(ParseError {
4874                            message: format!("expected ')', got {other:?}"),
4875                            token_pos: self.pos.saturating_sub(1),
4876                        }),
4877                    }
4878                }
4879            }
4880            Token::LBracket => self.parse_vector_literal_body(),
4881            Token::Extract => self.parse_extract_atom(),
4882            Token::Interval => self.parse_interval_atom(),
4883            // v4.10: EXISTS / NOT EXISTS. EXISTS isn't a reserved
4884            // token; we match on the bare ident. NOT is a token
4885            // (consumed in the comparison rung), but `EXISTS (...)`
4886            // at the top of an expression starts here.
4887            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("exists") => {
4888                self.parse_exists_atom(false)
4889            }
4890            // v7.13.0 — `CASE [<operand>] WHEN <cond> THEN <val>
4891            // [WHEN ...] [ELSE <val>] END` (mailrs round-5 G9).
4892            // CASE is a bare ident; we dispatch on lowercase match.
4893            Token::Ident(s) | Token::QuotedIdent(s) if s.eq_ignore_ascii_case("case") => {
4894                self.parse_case_atom()
4895            }
4896            // v7.10.10 — `ARRAY[expr, expr, …]` constructor. ARRAY
4897            // is not a reserved token; we match by case-insensitive
4898            // ident. The opening `[` must follow immediately.
4899            Token::Ident(s) | Token::QuotedIdent(s)
4900                if s.eq_ignore_ascii_case("array") && matches!(self.peek(), Token::LBracket) =>
4901            {
4902                self.advance(); // consume `[`
4903                let mut items: Vec<Expr> = Vec::new();
4904                if !matches!(self.peek(), Token::RBracket) {
4905                    loop {
4906                        items.push(self.parse_expr(0)?);
4907                        match self.peek() {
4908                            Token::Comma => {
4909                                self.advance();
4910                            }
4911                            Token::RBracket => break,
4912                            other => {
4913                                return Err(self.err(alloc::format!(
4914                                    "expected ',' or ']' in ARRAY literal, got {other:?}"
4915                                )));
4916                            }
4917                        }
4918                    }
4919                }
4920                self.advance(); // consume `]`
4921                Ok(Expr::Array(items))
4922            }
4923            Token::Ident(s) | Token::QuotedIdent(s) => self.finish_ident_atom(s),
4924            other => Err(ParseError {
4925                message: format!("unexpected token {other:?} in expression"),
4926                token_pos: tok_pos,
4927            }),
4928        }
4929        // After parsing the atom, fold any postfix `::vector` casts.
4930        .and_then(|atom| self.finish_postfix_casts(atom))
4931    }
4932
4933    /// Postfix operators on an atom: `::TYPE` cast and `IS [NOT] NULL`.
4934    /// Both bind tighter than any binary op.
4935    fn finish_postfix_casts(&mut self, mut expr: Expr) -> Result<Expr, ParseError> {
4936        loop {
4937            if matches!(self.peek(), Token::DoubleColon) {
4938                self.advance();
4939                // v7.9.25 / v7.9.26 — broaden the postfix `::` cast
4940                // target set to include INTERVAL (reserved Token),
4941                // TIMESTAMPTZ, and PG catalog regtype / regclass.
4942                // mailrs follow-up H3a + H3b.
4943                let target = match self.advance() {
4944                    Token::Ident(s) => match s.to_ascii_lowercase().as_str() {
4945                        "int" | "integer" | "int4" => {
4946                            if matches!(self.peek(), Token::LBracket)
4947                                && matches!(self.tokens.get(self.pos + 1), Some(Token::RBracket))
4948                            {
4949                                self.advance();
4950                                self.advance();
4951                                CastTarget::IntArray
4952                            } else {
4953                                CastTarget::Int
4954                            }
4955                        }
4956                        "bigint" | "int8" => {
4957                            if matches!(self.peek(), Token::LBracket)
4958                                && matches!(self.tokens.get(self.pos + 1), Some(Token::RBracket))
4959                            {
4960                                self.advance();
4961                                self.advance();
4962                                CastTarget::BigIntArray
4963                            } else {
4964                                CastTarget::BigInt
4965                            }
4966                        }
4967                        "float" | "double" | "real" => CastTarget::Float,
4968                        "text" => {
4969                            // v7.10.11 — `::TEXT[]` widens to TextArray.
4970                            if matches!(self.peek(), Token::LBracket)
4971                                && matches!(self.tokens.get(self.pos + 1), Some(Token::RBracket))
4972                            {
4973                                self.advance();
4974                                self.advance();
4975                                CastTarget::TextArray
4976                            } else {
4977                                CastTarget::Text
4978                            }
4979                        }
4980                        "bool" | "boolean" => CastTarget::Bool,
4981                        "vector" => CastTarget::Vector,
4982                        "date" => CastTarget::Date,
4983                        "timestamp" | "datetime" => CastTarget::Timestamp,
4984                        "timestamptz" => CastTarget::Timestamptz,
4985                        "interval" => CastTarget::Interval,
4986                        "json" => CastTarget::Json,
4987                        "jsonb" => CastTarget::Jsonb,
4988                        "regtype" => CastTarget::RegType,
4989                        "regclass" => CastTarget::RegClass,
4990                        // v7.12.0 — `::tsvector` / `::tsquery`.
4991                        // Engine decodes the LHS text via the PG
4992                        // external form parser.
4993                        "tsvector" => CastTarget::TsVector,
4994                        "tsquery" => CastTarget::TsQuery,
4995                        other => {
4996                            return Err(ParseError {
4997                                message: format!("unsupported cast target `::{other}`"),
4998                                token_pos: self.pos.saturating_sub(1),
4999                            });
5000                        }
5001                    },
5002                    Token::Interval => CastTarget::Interval,
5003                    other => {
5004                        return Err(ParseError {
5005                            message: format!("expected type ident after `::`, got {other:?}"),
5006                            token_pos: self.pos.saturating_sub(1),
5007                        });
5008                    }
5009                };
5010                expr = Expr::Cast {
5011                    expr: Box::new(expr),
5012                    target,
5013                };
5014                continue;
5015            }
5016            if matches!(self.peek(), Token::Is) {
5017                self.advance();
5018                let negated = if matches!(self.peek(), Token::Not) {
5019                    self.advance();
5020                    true
5021                } else {
5022                    false
5023                };
5024                // v7.9.27b — `IS [NOT] DISTINCT FROM <rhs>`.
5025                // mailrs pg_dump.
5026                if matches!(self.peek(), Token::Distinct) {
5027                    self.advance();
5028                    if !matches!(self.peek(), Token::From) {
5029                        return Err(self.err(format!(
5030                            "expected FROM after IS{} DISTINCT, got {:?}",
5031                            if negated { " NOT" } else { "" },
5032                            self.peek()
5033                        )));
5034                    }
5035                    self.advance();
5036                    // Right-hand side: parse at the same precedence
5037                    // tier as comparison so `x IS DISTINCT FROM a + b`
5038                    // groups as `x IS DISTINCT FROM (a + b)`.
5039                    let rhs = self.parse_expr(20)?;
5040                    let op = if negated {
5041                        BinOp::IsNotDistinctFrom
5042                    } else {
5043                        BinOp::IsDistinctFrom
5044                    };
5045                    expr = Expr::Binary {
5046                        op,
5047                        lhs: Box::new(expr),
5048                        rhs: Box::new(rhs),
5049                    };
5050                    continue;
5051                }
5052                if !matches!(self.peek(), Token::Null) {
5053                    return Err(self.err(format!(
5054                        "expected NULL or DISTINCT after IS{}, got {:?}",
5055                        if negated { " NOT" } else { "" },
5056                        self.peek()
5057                    )));
5058                }
5059                self.advance();
5060                expr = Expr::IsNull {
5061                    expr: Box::new(expr),
5062                    negated,
5063                };
5064                continue;
5065            }
5066            // `x [NOT] BETWEEN a AND b`, `x [NOT] IN (...)`, `x [NOT] LIKE p`.
5067            // Look one token ahead so a stray `NOT` not followed by any of
5068            // these flows through to the early return below untouched.
5069            let negated = if matches!(self.peek(), Token::Not) {
5070                let next = self.tokens.get(self.pos + 1);
5071                matches!(next, Some(Token::Between | Token::In | Token::Like))
5072            } else {
5073                false
5074            };
5075            if negated {
5076                self.advance();
5077            }
5078            if matches!(self.peek(), Token::Between) {
5079                expr = self.parse_between_tail(expr, negated)?;
5080                continue;
5081            }
5082            if matches!(self.peek(), Token::In) {
5083                expr = self.parse_in_tail(expr, negated)?;
5084                continue;
5085            }
5086            if matches!(self.peek(), Token::Like) {
5087                self.advance();
5088                // Pattern at the same precedence as other comparison RHSes —
5089                // 5 leaves AND/OR alone so `a LIKE 'x%' AND b` parses right.
5090                let pattern = self.parse_expr(5)?;
5091                expr = Expr::Like {
5092                    expr: Box::new(expr),
5093                    pattern: Box::new(pattern),
5094                    negated,
5095                };
5096                continue;
5097            }
5098            // v7.10.12 — `arr[i]` subscript. PG 1-based; engine
5099            // returns NULL for out-of-range. Multiple subscripts
5100            // chain: `a[i][j]` parses left-to-right.
5101            if matches!(self.peek(), Token::LBracket) {
5102                self.advance();
5103                let index = self.parse_expr(0)?;
5104                if !matches!(self.peek(), Token::RBracket) {
5105                    return Err(self.err(alloc::format!(
5106                        "expected ']' after array index, got {:?}",
5107                        self.peek()
5108                    )));
5109                }
5110                self.advance();
5111                expr = Expr::ArraySubscript {
5112                    target: Box::new(expr),
5113                    index: Box::new(index),
5114                };
5115                continue;
5116            }
5117            return Ok(expr);
5118        }
5119    }
5120
5121    /// `x BETWEEN low AND high`  →  `(x >= low) AND (x <= high)`, wrapped in
5122    /// `NOT` when `negated`. Bounds parse at precedence 5 so the trailing
5123    /// `AND` is not swallowed.
5124    fn parse_between_tail(&mut self, expr: Expr, negated: bool) -> Result<Expr, ParseError> {
5125        self.advance(); // BETWEEN
5126        let low = self.parse_expr(5)?;
5127        if !matches!(self.peek(), Token::And) {
5128            return Err(self.err(format!(
5129                "expected AND after BETWEEN low bound, got {:?}",
5130                self.peek()
5131            )));
5132        }
5133        self.advance();
5134        let high = self.parse_expr(5)?;
5135        let target = Box::new(expr);
5136        let combined = Expr::Binary {
5137            lhs: Box::new(Expr::Binary {
5138                lhs: target.clone(),
5139                op: BinOp::GtEq,
5140                rhs: Box::new(low),
5141            }),
5142            op: BinOp::And,
5143            rhs: Box::new(Expr::Binary {
5144                lhs: target,
5145                op: BinOp::LtEq,
5146                rhs: Box::new(high),
5147            }),
5148        };
5149        Ok(maybe_not(combined, negated))
5150    }
5151
5152    /// `x IN (a, b, c)`  →  chained OR of equalities. Empty list collapses
5153    /// to FALSE (TRUE under NOT IN), matching standard SQL semantics.
5154    /// v4.11: parse `WITH name AS (SELECT ...) [, ...] SELECT ...`.
5155    /// Caller already consumed the leading `WITH` ident.
5156    fn parse_with_cte_then_select(&mut self) -> Result<Statement, ParseError> {
5157        // v4.22: WITH RECURSIVE — optional keyword right after WITH.
5158        // Comes through as an identifier; consume it if present and
5159        // mark every CTE in the clause as recursive (PG semantics —
5160        // the flag is per-WITH, not per-CTE).
5161        let mut recursive = false;
5162        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
5163            && s.eq_ignore_ascii_case("recursive")
5164        {
5165            self.advance();
5166            recursive = true;
5167        }
5168        let mut ctes = Vec::new();
5169        loop {
5170            let name = self.expect_ident_like()?;
5171            // v4.22: optional column-name list — `WITH t(a,b,c) AS ...`.
5172            // PG uses these to rename the body's output columns; we
5173            // do the same below by overriding `columns[i].name`.
5174            let column_overrides: Vec<String> = if matches!(self.peek(), Token::LParen) {
5175                self.advance();
5176                let mut names = Vec::new();
5177                loop {
5178                    names.push(self.expect_ident_like()?);
5179                    if matches!(self.peek(), Token::Comma) {
5180                        self.advance();
5181                        continue;
5182                    }
5183                    break;
5184                }
5185                if !matches!(self.peek(), Token::RParen) {
5186                    return Err(self.err(format!(
5187                        "expected ')' to close CTE column list, got {:?}",
5188                        self.peek()
5189                    )));
5190                }
5191                self.advance();
5192                names
5193            } else {
5194                Vec::new()
5195            };
5196            // AS is a reserved Token::As (used by SELECT-item / FROM
5197            // aliasing) — handle it specially rather than as a bare
5198            // ident.
5199            if !matches!(self.peek(), Token::As) {
5200                return Err(self.err(format!(
5201                    "expected AS after CTE name {name:?}, got {:?}",
5202                    self.peek()
5203                )));
5204            }
5205            self.advance();
5206            if !matches!(self.peek(), Token::LParen) {
5207                return Err(self.err(format!(
5208                    "expected '(' after AS in WITH clause, got {:?}",
5209                    self.peek()
5210                )));
5211            }
5212            self.advance();
5213            if !matches!(self.peek(), Token::Select) {
5214                return Err(self.err(format!("WITH body must be a SELECT, got {:?}", self.peek())));
5215            }
5216            let inner = self.parse_select_stmt()?;
5217            if !matches!(self.peek(), Token::RParen) {
5218                return Err(self.err(format!(
5219                    "expected ')' after CTE body, got {:?}",
5220                    self.peek()
5221                )));
5222            }
5223            self.advance();
5224            let Statement::Select(body) = inner else {
5225                unreachable!("parse_select_stmt returns Select")
5226            };
5227            ctes.push(crate::ast::Cte {
5228                name,
5229                body,
5230                recursive,
5231                column_overrides,
5232            });
5233            if matches!(self.peek(), Token::Comma) {
5234                self.advance();
5235                continue;
5236            }
5237            break;
5238        }
5239        // The body SELECT follows. Must start with SELECT.
5240        if !matches!(self.peek(), Token::Select) {
5241            return Err(self.err(format!(
5242                "expected SELECT after WITH clause, got {:?}",
5243                self.peek()
5244            )));
5245        }
5246        let body_stmt = self.parse_select_stmt()?;
5247        let Statement::Select(mut body) = body_stmt else {
5248            unreachable!()
5249        };
5250        body.ctes = ctes;
5251        Ok(Statement::Select(body))
5252    }
5253
5254    /// v4.10: parse `EXISTS (SELECT ...)`. Caller (`parse_atom`)
5255    /// already consumed the leading `EXISTS` ident via
5256    /// `self.advance()`.
5257    /// v7.13.0 — parse the rest of a `CASE … END` expression after
5258    /// the leading `CASE` ident has been consumed (mailrs round-5
5259    /// G9). Supports both the searched form
5260    /// (`CASE WHEN cond THEN val …`) and the simple form
5261    /// (`CASE operand WHEN val THEN val …`).
5262    fn parse_case_atom(&mut self) -> Result<Expr, ParseError> {
5263        // Disambiguate searched vs simple form: if the next token
5264        // is `WHEN`, we're in the searched form. Otherwise the
5265        // intervening expression is the operand.
5266        let operand = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("when")) {
5267            None
5268        } else {
5269            Some(Box::new(self.parse_expr(0)?))
5270        };
5271        let mut branches: Vec<(Expr, Expr)> = Vec::new();
5272        loop {
5273            match self.peek() {
5274                Token::Ident(s) if s.eq_ignore_ascii_case("when") => {
5275                    self.advance();
5276                    let cond = self.parse_expr(0)?;
5277                    match self.peek() {
5278                        Token::Ident(t) if t.eq_ignore_ascii_case("then") => {
5279                            self.advance();
5280                        }
5281                        other => {
5282                            return Err(self.err(alloc::format!(
5283                                "expected THEN after CASE WHEN <expr>, got {other:?}"
5284                            )));
5285                        }
5286                    }
5287                    let value = self.parse_expr(0)?;
5288                    branches.push((cond, value));
5289                }
5290                _ => break,
5291            }
5292        }
5293        if branches.is_empty() {
5294            return Err(self.err("CASE requires at least one WHEN … THEN … branch".into()));
5295        }
5296        let else_branch = if matches!(self.peek(), Token::Ident(s) if s.eq_ignore_ascii_case("else"))
5297        {
5298            self.advance();
5299            Some(Box::new(self.parse_expr(0)?))
5300        } else {
5301            None
5302        };
5303        match self.peek() {
5304            Token::Ident(s) if s.eq_ignore_ascii_case("end") => {
5305                self.advance();
5306            }
5307            other => {
5308                return Err(self.err(alloc::format!(
5309                    "expected END to close CASE expression, got {other:?}"
5310                )));
5311            }
5312        }
5313        Ok(Expr::Case {
5314            operand,
5315            branches,
5316            else_branch,
5317        })
5318    }
5319
5320    fn parse_exists_atom(&mut self, negated: bool) -> Result<Expr, ParseError> {
5321        if !matches!(self.peek(), Token::LParen) {
5322            return Err(self.err(format!("expected '(' after EXISTS, got {:?}", self.peek())));
5323        }
5324        self.advance();
5325        let inner = self.parse_select_stmt()?;
5326        if !matches!(self.peek(), Token::RParen) {
5327            return Err(self.err(format!(
5328                "expected ')' after EXISTS-subquery, got {:?}",
5329                self.peek()
5330            )));
5331        }
5332        self.advance();
5333        let Statement::Select(s) = inner else {
5334            unreachable!("parse_select_stmt returns Select")
5335        };
5336        Ok(Expr::Exists {
5337            subquery: Box::new(s),
5338            negated,
5339        })
5340    }
5341
5342    fn parse_in_tail(&mut self, expr: Expr, negated: bool) -> Result<Expr, ParseError> {
5343        self.advance(); // IN
5344        if !matches!(self.peek(), Token::LParen) {
5345            return Err(self.err(format!("expected '(' after IN, got {:?}", self.peek())));
5346        }
5347        self.advance();
5348        // v4.10: `IN (SELECT ...)` — subquery branch.
5349        if matches!(self.peek(), Token::Select) {
5350            let inner = self.parse_select_stmt()?;
5351            if !matches!(self.peek(), Token::RParen) {
5352                return Err(self.err(format!(
5353                    "expected ')' after IN-subquery, got {:?}",
5354                    self.peek()
5355                )));
5356            }
5357            self.advance();
5358            let Statement::Select(s) = inner else {
5359                unreachable!("parse_select_stmt always returns Statement::Select")
5360            };
5361            return Ok(Expr::InSubquery {
5362                expr: Box::new(expr),
5363                subquery: Box::new(s),
5364                negated,
5365            });
5366        }
5367        let mut elements = Vec::new();
5368        if !matches!(self.peek(), Token::RParen) {
5369            loop {
5370                elements.push(self.parse_expr(0)?);
5371                match self.peek() {
5372                    Token::Comma => {
5373                        self.advance();
5374                    }
5375                    Token::RParen => break,
5376                    other => {
5377                        return Err(
5378                            self.err(format!("expected ',' or ')' in IN list, got {other:?}"))
5379                        );
5380                    }
5381                }
5382            }
5383        }
5384        self.advance(); // ')'
5385        let target = Box::new(expr);
5386        let combined = if elements.is_empty() {
5387            Expr::Literal(Literal::Bool(false))
5388        } else {
5389            let mut iter = elements.into_iter();
5390            let first = iter.next().unwrap();
5391            let mut acc = Expr::Binary {
5392                lhs: target.clone(),
5393                op: BinOp::Eq,
5394                rhs: Box::new(first),
5395            };
5396            for elt in iter {
5397                acc = Expr::Binary {
5398                    lhs: Box::new(acc),
5399                    op: BinOp::Or,
5400                    rhs: Box::new(Expr::Binary {
5401                        lhs: target.clone(),
5402                        op: BinOp::Eq,
5403                        rhs: Box::new(elt),
5404                    }),
5405                };
5406            }
5407            acc
5408        };
5409        Ok(maybe_not(combined, negated))
5410    }
5411
5412    /// Parse a pgvector array literal `[ x1, x2, ... ]`. The opening `[` is
5413    /// already consumed by the caller. Elements must be numeric literals
5414    /// (with optional unary `-`); any compound expression is rejected at
5415    /// parse time so the runtime never needs to evaluate inside a vector.
5416    /// `EXTRACT(<field> FROM <source>)`. The dispatching `parse_atom`
5417    /// has already consumed the `EXTRACT` token before calling us —
5418    /// we pick up at the opening `(`.
5419    fn parse_extract_atom(&mut self) -> Result<Expr, ParseError> {
5420        if !matches!(self.peek(), Token::LParen) {
5421            return Err(self.err(format!("expected '(' after EXTRACT, got {:?}", self.peek())));
5422        }
5423        self.advance();
5424        let field_name = self.expect_ident_like()?;
5425        let field = match field_name.to_ascii_lowercase().as_str() {
5426            "year" => ExtractField::Year,
5427            "month" => ExtractField::Month,
5428            "day" => ExtractField::Day,
5429            "hour" => ExtractField::Hour,
5430            "minute" => ExtractField::Minute,
5431            "second" => ExtractField::Second,
5432            "microsecond" | "microseconds" => ExtractField::Microsecond,
5433            other => {
5434                return Err(self.err(format!(
5435                    "unknown EXTRACT field {other:?}; \
5436                     supported: YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MICROSECOND"
5437                )));
5438            }
5439        };
5440        if !matches!(self.peek(), Token::From) {
5441            return Err(self.err(format!(
5442                "expected FROM after EXTRACT field, got {:?}",
5443                self.peek()
5444            )));
5445        }
5446        self.advance();
5447        let source = self.parse_expr(0)?;
5448        if !matches!(self.peek(), Token::RParen) {
5449            return Err(self.err(format!(
5450                "expected ')' to close EXTRACT, got {:?}",
5451                self.peek()
5452            )));
5453        }
5454        self.advance();
5455        Ok(Expr::Extract {
5456            field,
5457            source: Box::new(source),
5458        })
5459    }
5460
5461    /// `INTERVAL '<n> <unit> [<n> <unit> ...]'` — the `INTERVAL` keyword
5462    /// is already consumed; we expect a single string literal next and
5463    /// resolve it into `Literal::Interval` at parse time so the engine
5464    /// never has to re-tokenise inside the string.
5465    fn parse_interval_atom(&mut self) -> Result<Expr, ParseError> {
5466        let tok = self.advance();
5467        let Token::String(text) = tok else {
5468            return Err(self.err(format!(
5469                "expected string literal after INTERVAL, got {tok:?}"
5470            )));
5471        };
5472        let (months, micros) = parse_interval_text(&text).ok_or_else(|| ParseError {
5473            message: format!(
5474                "cannot parse INTERVAL {text:?}; \
5475                     expected `<n> <unit> [<n> <unit> ...]` with units \
5476                     microsecond[s], millisecond[s], second[s], minute[s], \
5477                     hour[s], day[s], week[s], month[s], year[s]"
5478            ),
5479            token_pos: self.pos.saturating_sub(1),
5480        })?;
5481        Ok(Expr::Literal(Literal::Interval {
5482            months,
5483            micros,
5484            text,
5485        }))
5486    }
5487
5488    fn parse_vector_literal_body(&mut self) -> Result<Expr, ParseError> {
5489        let mut elems = Vec::new();
5490        if matches!(self.peek(), Token::RBracket) {
5491            self.advance();
5492            return Ok(Expr::Literal(Literal::Vector(elems)));
5493        }
5494        loop {
5495            let e = self.parse_expr(0)?;
5496            let x = extract_numeric_literal(&e).ok_or_else(|| ParseError {
5497                message: format!("vector element must be a numeric literal, got {e:?}"),
5498                token_pos: self.pos,
5499            })?;
5500            elems.push(x);
5501            match self.peek() {
5502                Token::Comma => {
5503                    self.advance();
5504                }
5505                Token::RBracket => {
5506                    self.advance();
5507                    break;
5508                }
5509                other => {
5510                    return Err(self.err(format!("expected ',' or ']' in vector, got {other:?}")));
5511                }
5512            }
5513        }
5514        Ok(Expr::Literal(Literal::Vector(elems)))
5515    }
5516
5517    /// Atom that started with an identifier: could be `t.col`, `col`, or
5518    /// `func(arg, ...)`. Detect each shape by looking at the next token.
5519    /// v4.12: parse `(PARTITION BY expr, ... ORDER BY expr [DESC]
5520    /// [, ...])`. Caller has already consumed `OVER`. Either clause
5521    /// is optional; an empty `()` is also legal (PG semantics).
5522    /// v6.4.2 — consume an optional `IGNORE NULLS` / `RESPECT NULLS`
5523    /// modifier between `name(args)` and `OVER (...)`. Default is
5524    /// `Respect`. Unrecognised idents leave the stream unchanged.
5525    fn parse_null_treatment_modifier(&mut self) -> NullTreatment {
5526        let Token::Ident(s) = self.peek().clone() else {
5527            return NullTreatment::Respect;
5528        };
5529        let is_ignore = s.eq_ignore_ascii_case("ignore");
5530        let is_respect = s.eq_ignore_ascii_case("respect");
5531        if !is_ignore && !is_respect {
5532            return NullTreatment::Respect;
5533        }
5534        // Lookahead for NULLS — only consume both tokens together.
5535        // pos+1 must hold a "nulls" ident.
5536        if self.pos + 1 < self.tokens.len()
5537            && let Token::Ident(s2) = &self.tokens[self.pos + 1]
5538            && s2.eq_ignore_ascii_case("nulls")
5539        {
5540            self.advance();
5541            self.advance();
5542            return if is_ignore {
5543                NullTreatment::Ignore
5544            } else {
5545                NullTreatment::Respect
5546            };
5547        }
5548        NullTreatment::Respect
5549    }
5550
5551    /// No frame clause is supported.
5552    #[allow(clippy::type_complexity)] // (partitions, ordered-keys-with-desc) is the natural shape
5553    fn parse_over_clause(
5554        &mut self,
5555    ) -> Result<(Vec<Expr>, Vec<(Expr, bool)>, Option<WindowFrame>), ParseError> {
5556        if !matches!(self.peek(), Token::LParen) {
5557            return Err(self.err(format!("expected '(' after OVER, got {:?}", self.peek())));
5558        }
5559        self.advance();
5560        let mut partition_by = Vec::new();
5561        let mut order_by = Vec::new();
5562        // PARTITION BY ?
5563        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
5564            && s.eq_ignore_ascii_case("partition")
5565        {
5566            self.advance();
5567            if !matches!(self.peek(), Token::By) {
5568                return Err(self.err(format!(
5569                    "expected BY after PARTITION, got {:?}",
5570                    self.peek()
5571                )));
5572            }
5573            self.advance();
5574            loop {
5575                partition_by.push(self.parse_expr(0)?);
5576                if matches!(self.peek(), Token::Comma) {
5577                    self.advance();
5578                    continue;
5579                }
5580                break;
5581            }
5582        }
5583        // ORDER BY ?
5584        if matches!(self.peek(), Token::Order) {
5585            self.advance();
5586            if !matches!(self.peek(), Token::By) {
5587                return Err(self.err(format!("expected BY after ORDER, got {:?}", self.peek())));
5588            }
5589            self.advance();
5590            loop {
5591                let e = self.parse_expr(0)?;
5592                let desc = if matches!(self.peek(), Token::Desc) {
5593                    self.advance();
5594                    true
5595                } else if matches!(self.peek(), Token::Asc) {
5596                    self.advance();
5597                    false
5598                } else {
5599                    false
5600                };
5601                order_by.push((e, desc));
5602                if matches!(self.peek(), Token::Comma) {
5603                    self.advance();
5604                    continue;
5605                }
5606                break;
5607            }
5608        }
5609        // v4.20: optional explicit frame, `ROWS ...` / `RANGE ...`.
5610        // Both keywords come through the lexer as identifiers; match
5611        // case-insensitively.
5612        let mut frame: Option<WindowFrame> = None;
5613        if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek() {
5614            let kind = if s.eq_ignore_ascii_case("rows") {
5615                Some(FrameKind::Rows)
5616            } else if s.eq_ignore_ascii_case("range") {
5617                Some(FrameKind::Range)
5618            } else {
5619                None
5620            };
5621            if let Some(kind) = kind {
5622                self.advance();
5623                frame = Some(self.parse_frame_tail(kind)?);
5624            }
5625        }
5626        if !matches!(self.peek(), Token::RParen) {
5627            return Err(self.err(format!(
5628                "expected ')' to close OVER clause, got {:?}",
5629                self.peek()
5630            )));
5631        }
5632        self.advance();
5633        Ok((partition_by, order_by, frame))
5634    }
5635
5636    /// v4.20: parse the tail of an explicit frame, given the `ROWS`
5637    /// or `RANGE` keyword was just consumed. Accepts both
5638    /// `BETWEEN <bound> AND <bound>` and the single-bound shorthand
5639    /// (`ROWS UNBOUNDED PRECEDING`, `ROWS 5 PRECEDING`, etc.) which
5640    /// PG normalises to `BETWEEN <bound> AND CURRENT ROW`.
5641    fn parse_frame_tail(&mut self, kind: FrameKind) -> Result<WindowFrame, ParseError> {
5642        if matches!(self.peek(), Token::Between) {
5643            self.advance();
5644            let start = self.parse_frame_bound()?;
5645            if !matches!(self.peek(), Token::And) {
5646                return Err(self.err(format!("expected AND in frame spec, got {:?}", self.peek())));
5647            }
5648            self.advance();
5649            let end = self.parse_frame_bound()?;
5650            Ok(WindowFrame {
5651                kind,
5652                start,
5653                end: Some(end),
5654            })
5655        } else {
5656            let start = self.parse_frame_bound()?;
5657            Ok(WindowFrame {
5658                kind,
5659                start,
5660                end: None,
5661            })
5662        }
5663    }
5664
5665    /// Parse one frame bound: `UNBOUNDED PRECEDING`, `<n> PRECEDING`,
5666    /// `CURRENT ROW`, `<n> FOLLOWING`, `UNBOUNDED FOLLOWING`.
5667    fn parse_frame_bound(&mut self) -> Result<FrameBound, ParseError> {
5668        // Number-led: "<n> PRECEDING" / "<n> FOLLOWING".
5669        if let Token::Integer(n) = *self.peek() {
5670            self.advance();
5671            let n: u64 = u64::try_from(n).map_err(|_| {
5672                self.err(format!(
5673                    "invalid frame offset {n} — expected non-negative integer"
5674                ))
5675            })?;
5676            let dir = self.expect_ident_like()?;
5677            return if dir.eq_ignore_ascii_case("preceding") {
5678                Ok(FrameBound::OffsetPreceding(n))
5679            } else if dir.eq_ignore_ascii_case("following") {
5680                Ok(FrameBound::OffsetFollowing(n))
5681            } else {
5682                Err(self.err(format!(
5683                    "expected PRECEDING or FOLLOWING after offset, got {dir:?}"
5684                )))
5685            };
5686        }
5687        let first = self.expect_ident_like()?;
5688        if first.eq_ignore_ascii_case("unbounded") {
5689            let dir = self.expect_ident_like()?;
5690            return if dir.eq_ignore_ascii_case("preceding") {
5691                Ok(FrameBound::UnboundedPreceding)
5692            } else if dir.eq_ignore_ascii_case("following") {
5693                Ok(FrameBound::UnboundedFollowing)
5694            } else {
5695                Err(self.err(format!(
5696                    "expected PRECEDING or FOLLOWING after UNBOUNDED, got {dir:?}"
5697                )))
5698            };
5699        }
5700        if first.eq_ignore_ascii_case("current") {
5701            let row = self.expect_ident_like()?;
5702            if !row.eq_ignore_ascii_case("row") {
5703                return Err(self.err(format!("expected ROW after CURRENT, got {row:?}")));
5704            }
5705            return Ok(FrameBound::CurrentRow);
5706        }
5707        Err(self.err(format!(
5708            "expected frame bound (UNBOUNDED/CURRENT/<n>), got {first:?}"
5709        )))
5710    }
5711
5712    fn finish_ident_atom(&mut self, first: String) -> Result<Expr, ParseError> {
5713        if matches!(self.peek(), Token::Dot) {
5714            self.advance();
5715            let name = self.expect_ident_like()?;
5716            // v7.14.0 — schema-qualified function call
5717            // `<schema>.<fn>(args)`. PG dumps emit
5718            // `pg_catalog.set_config(...)` in the preamble. SPG
5719            // is single-namespace: drop the schema prefix and
5720            // route the dispatch on the bare function name.
5721            if matches!(self.peek(), Token::LParen) {
5722                return self.finish_ident_atom(name);
5723            }
5724            return Ok(Expr::Column(ColumnName {
5725                qualifier: Some(first),
5726                name,
5727            }));
5728        }
5729        if matches!(self.peek(), Token::LParen) {
5730            self.advance();
5731            // `COUNT(*)` — special-cased here because `*` isn't a normal
5732            // expression token. Lower-case match on `first` since the lexer
5733            // folds identifiers.
5734            if first.eq_ignore_ascii_case("count") && matches!(self.peek(), Token::Star) {
5735                self.advance();
5736                if !matches!(self.peek(), Token::RParen) {
5737                    return Err(self.err(format!(
5738                        "expected ')' after COUNT(*), got {:?}",
5739                        self.peek()
5740                    )));
5741                }
5742                self.advance();
5743                // v4.12: COUNT(*) OVER (...) — same window tail.
5744                let null_treatment = self.parse_null_treatment_modifier();
5745                if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
5746                    && s.eq_ignore_ascii_case("over")
5747                {
5748                    self.advance();
5749                    let (partition_by, order_by, frame) = self.parse_over_clause()?;
5750                    return Ok(Expr::WindowFunction {
5751                        name: "count_star".into(),
5752                        args: Vec::new(),
5753                        partition_by,
5754                        order_by,
5755                        frame,
5756                        null_treatment,
5757                    });
5758                }
5759                return Ok(Expr::FunctionCall {
5760                    name: "count_star".into(),
5761                    args: Vec::new(),
5762                });
5763            }
5764            // Function call. PG-style: zero-or-more comma-separated args.
5765            let mut args = Vec::new();
5766            if !matches!(self.peek(), Token::RParen) {
5767                loop {
5768                    args.push(self.parse_expr(0)?);
5769                    match self.peek() {
5770                        Token::Comma => {
5771                            self.advance();
5772                        }
5773                        Token::RParen => break,
5774                        other => {
5775                            return Err(self.err(format!(
5776                                "expected ',' or ')' in function args, got {other:?}"
5777                            )));
5778                        }
5779                    }
5780                }
5781            }
5782            self.advance(); // consume ')'
5783            // v4.12: window-function tail — `name(args) OVER (...)`.
5784            // Promotes the just-parsed FunctionCall into a
5785            // WindowFunction node carrying partition + order.
5786            // v6.4.2: also accepts `name(args) IGNORE NULLS OVER (...)`
5787            // / `RESPECT NULLS OVER (...)` between the closing paren
5788            // and `OVER`.
5789            let null_treatment = self.parse_null_treatment_modifier();
5790            if let Token::Ident(s) | Token::QuotedIdent(s) = self.peek()
5791                && s.eq_ignore_ascii_case("over")
5792            {
5793                self.advance();
5794                let (partition_by, order_by, frame) = self.parse_over_clause()?;
5795                return Ok(Expr::WindowFunction {
5796                    name: first,
5797                    args,
5798                    partition_by,
5799                    order_by,
5800                    frame,
5801                    null_treatment,
5802                });
5803            }
5804            return Ok(Expr::FunctionCall { name: first, args });
5805        }
5806        // v7.9.20 — SQL-standard parenless keyword expressions
5807        // (PG treats these as functions called without parens).
5808        // Resolve to a synthetic FunctionCall so the engine's
5809        // eval path reuses the existing function-call routing.
5810        // mailrs G3.
5811        let lc = first.to_ascii_lowercase();
5812        if matches!(
5813            lc.as_str(),
5814            "current_date" | "current_time" | "current_timestamp" | "localtimestamp" | "localtime"
5815        ) {
5816            return Ok(Expr::FunctionCall {
5817                name: lc,
5818                args: Vec::new(),
5819            });
5820        }
5821        Ok(Expr::Column(ColumnName {
5822            qualifier: None,
5823            name: first,
5824        }))
5825    }
5826}
5827
5828/// v6.8.2 — walk an expression tree and return the first column
5829/// reference's bare name. Used by `parse_create_index_stmt_after_create`
5830/// to derive `CreateIndexStatement.column` from an expression
5831/// key (so downstream planner code resolving a primary column
5832/// position keeps working with expression indexes). Returns
5833/// `None` when the expression has no column ref at all — caller
5834/// surfaces that as a parse error.
5835fn extract_first_column(expr: &Expr) -> Option<String> {
5836    match expr {
5837        Expr::Column(cn) => Some(cn.name.clone()),
5838        Expr::FunctionCall { args, .. } => args.iter().find_map(extract_first_column),
5839        Expr::Binary { lhs, rhs, .. } => {
5840            extract_first_column(lhs).or_else(|| extract_first_column(rhs))
5841        }
5842        Expr::Unary { expr: e, .. } => extract_first_column(e),
5843        _ => None,
5844    }
5845}
5846
5847fn maybe_not(expr: Expr, negated: bool) -> Expr {
5848    if negated {
5849        Expr::Unary {
5850            op: UnOp::Not,
5851            expr: Box::new(expr),
5852        }
5853    } else {
5854        expr
5855    }
5856}
5857
5858fn binop_from(tok: &Token) -> Option<(BinOp, u8)> {
5859    let pair = match tok {
5860        Token::Or => (BinOp::Or, 1),
5861        Token::And => (BinOp::And, 2),
5862        Token::Eq => (BinOp::Eq, 4),
5863        Token::NotEq => (BinOp::NotEq, 4),
5864        Token::Lt => (BinOp::Lt, 4),
5865        Token::LtEq => (BinOp::LtEq, 4),
5866        Token::Gt => (BinOp::Gt, 4),
5867        Token::GtEq => (BinOp::GtEq, 4),
5868        // pgvector distance ops all sit on the same rung — tighter than
5869        // comparisons (4) so `col <-> v < threshold` parses correctly.
5870        Token::L2Distance => (BinOp::L2Distance, 5),
5871        Token::InnerProduct => (BinOp::InnerProduct, 5),
5872        Token::CosineDistance => (BinOp::CosineDistance, 5),
5873        Token::Plus => (BinOp::Add, 6),
5874        Token::Minus => (BinOp::Sub, 6),
5875        // `||` sits beside `+`/`-` (matches PG conceptually — concat groups
5876        // by the same level as binary additive arithmetic).
5877        Token::Concat => (BinOp::Concat, 6),
5878        Token::Star => (BinOp::Mul, 7),
5879        Token::Slash => (BinOp::Div, 7),
5880        // v4.14: JSON path ops bind tighter than comparisons (4)
5881        // and additive (6) so `doc->'k' = 'v'` parses correctly.
5882        // Same rung as the multiplicative ops.
5883        Token::JsonGet => (BinOp::JsonGet, 7),
5884        Token::JsonGetText => (BinOp::JsonGetText, 7),
5885        Token::JsonGetPath => (BinOp::JsonGetPath, 7),
5886        Token::JsonGetPathText => (BinOp::JsonGetPathText, 7),
5887        Token::JsonContains => (BinOp::JsonContains, 7),
5888        // v7.12.2 — `@@` binds at the comparison rung (looser than
5889        // arithmetic, tighter than AND / OR). PG places `@@` at
5890        // the same precedence as `=` / `<`, so we follow.
5891        Token::TsMatch => (BinOp::TsMatch, 4),
5892        _ => return None,
5893    };
5894    Some(pair)
5895}
5896
5897#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
5898// `as f32` here is intentional: vector elements widen / narrow into f32 on
5899// purpose. i64 → f32 loses precision past 2^24, f64 → f32 loses precision
5900// past ~15 decimal digits — both are acceptable for a fixed-precision
5901// pgvector column.
5902fn extract_numeric_literal(e: &Expr) -> Option<f32> {
5903    match e {
5904        Expr::Literal(Literal::Integer(n)) => Some(*n as f32),
5905        Expr::Literal(Literal::Float(x)) => Some(*x as f32),
5906        Expr::Unary {
5907            op: UnOp::Neg,
5908            expr,
5909        } => extract_numeric_literal(expr).map(|x| -x),
5910        _ => None,
5911    }
5912}
5913
5914/// Parse the text inside `INTERVAL '...'` into `(months, micros)`. Accepts
5915/// one or more `<n> <unit>` pairs separated by whitespace. `<n>` may be
5916/// negative. Returns `None` if any pair fails to parse or no pair is found.
5917///
5918/// Recognised units (case-insensitive, optional trailing `s`):
5919/// `microsecond`, `millisecond`, `second`, `minute`, `hour`, `day`, `week`,
5920/// `month`, `year`. `week` widens to 7 days; `year` widens to 12 months.
5921pub fn parse_interval_text(s: &str) -> Option<(i32, i64)> {
5922    let parts: Vec<&str> = s.split_whitespace().collect();
5923    if parts.is_empty() || !parts.len().is_multiple_of(2) {
5924        return None;
5925    }
5926    let mut months: i32 = 0;
5927    let mut micros: i64 = 0;
5928    let mut i = 0;
5929    while i < parts.len() {
5930        let n: i64 = parts[i].parse().ok()?;
5931        let unit = parts[i + 1].to_ascii_lowercase();
5932        let unit_stripped = unit.strip_suffix('s').unwrap_or(&unit);
5933        match unit_stripped {
5934            "microsecond" => micros = micros.checked_add(n)?,
5935            "millisecond" => micros = micros.checked_add(n.checked_mul(1_000)?)?,
5936            "second" => micros = micros.checked_add(n.checked_mul(1_000_000)?)?,
5937            "minute" => micros = micros.checked_add(n.checked_mul(60_000_000)?)?,
5938            "hour" => micros = micros.checked_add(n.checked_mul(3_600_000_000)?)?,
5939            "day" => micros = micros.checked_add(n.checked_mul(86_400_000_000)?)?,
5940            "week" => micros = micros.checked_add(n.checked_mul(604_800_000_000)?)?,
5941            "month" => {
5942                let n32 = i32::try_from(n).ok()?;
5943                months = months.checked_add(n32)?;
5944            }
5945            "year" => {
5946                let n32 = i32::try_from(n).ok()?;
5947                months = months.checked_add(n32.checked_mul(12)?)?;
5948            }
5949            _ => return None,
5950        }
5951        i += 2;
5952    }
5953    Some((months, micros))
5954}
5955
5956/// v7.12.4 — map a bare type-name identifier (the form that
5957/// appears in a function arg list or RETURNS clause) to a
5958/// [`ColumnTypeName`]. Returns `None` for unknown / extension
5959/// types so the caller can preserve them as
5960/// [`FunctionArgType::Raw`] / [`FunctionReturn::Other`].
5961///
5962/// Subset of the full column-type grammar — we deliberately
5963/// don't parse parameterised forms (`VARCHAR(n)`, `NUMERIC(p,s)`)
5964/// here because function-arg types in v7.12.4 are mostly the
5965/// bare form (`text`, `int`, `bytea`, …).
5966fn map_type_ident_to_column_type_name(ident: &str) -> Option<ColumnTypeName> {
5967    Some(match ident.to_ascii_lowercase().as_str() {
5968        "smallint" | "tinyint" => ColumnTypeName::SmallInt,
5969        "int" | "integer" | "mediumint" => ColumnTypeName::Int,
5970        "bigint" => ColumnTypeName::BigInt,
5971        "float" | "double" | "real" => ColumnTypeName::Float,
5972        "text" => ColumnTypeName::Text,
5973        "bool" | "boolean" => ColumnTypeName::Bool,
5974        "date" => ColumnTypeName::Date,
5975        "timestamp" | "datetime" => ColumnTypeName::Timestamp,
5976        "timestamptz" => ColumnTypeName::Timestamptz,
5977        "json" => ColumnTypeName::Json,
5978        "jsonb" => ColumnTypeName::Jsonb,
5979        "bytea" | "bytes" => ColumnTypeName::Bytes,
5980        "tsvector" => ColumnTypeName::TsVector,
5981        "tsquery" => ColumnTypeName::TsQuery,
5982        _ => return None,
5983    })
5984}
5985
5986/// v7.12.4 — parse a PL/pgSQL function body (the bytes between
5987/// `$$ ... $$`). Returns the parsed `BEGIN ... END;` block.
5988///
5989/// v7.12.4 grammar (strict subset — IF / LOOP / DECLARE / RAISE
5990/// / embedded SQL land in v7.12.5+):
5991///
5992/// ```text
5993///   body          := [ws] block [ws]
5994///   block         := BEGIN stmt ( ; stmt )* [ ; ] END [ ; ]
5995///   stmt          := assign | return
5996///   assign        := assign_target := expr
5997///   assign_target := ( NEW | OLD ) . ident | ident
5998///   return        := RETURN ( NEW | OLD | NULL | expr )
5999/// ```
6000///
6001/// `expr` is parsed by recursing into the regular `Parser` — so a
6002/// PL/pgSQL `NEW.search_vector := to_tsvector('english',
6003/// NEW.subject || ' ' || NEW.sender)` body shape works without
6004/// the body parser knowing what `to_tsvector` is.
6005///
6006/// Errors here cause the caller to fall back to
6007/// `FunctionBody::Raw` — keeping the CREATE FUNCTION DDL itself
6008/// successful, but the executor will refuse to invoke the
6009/// function with an "unparseable body" error.
6010/// v7.12.4 — public alias for [`parse_plpgsql_body`] re-exported
6011/// from the crate root as `spg_sql::parse_function_body`.
6012pub fn parse_function_body(body: &str) -> Result<PlPgSqlBlock, ParseError> {
6013    parse_plpgsql_body(body)
6014}
6015
6016fn parse_plpgsql_body(body: &str) -> Result<PlPgSqlBlock, ParseError> {
6017    // Use the regular lexer on the body text. The trailing
6018    // `END;` may or may not have a semicolon; the lexer treats
6019    // both forms identically.
6020    let tokens = lexer::tokenize(body).map_err(|e| ParseError {
6021        message: alloc::format!("plpgsql body lex error: {e}"),
6022        token_pos: 0,
6023    })?;
6024    let mut parser = Parser::new(tokens);
6025    parser.parse_plpgsql_block()
6026}
6027
6028#[cfg(test)]
6029mod tests {
6030    use super::*;
6031    use alloc::string::ToString;
6032
6033    fn parse(s: &str) -> Statement {
6034        parse_statement(s).expect("parse ok")
6035    }
6036
6037    fn lit_int(n: i64) -> Expr {
6038        Expr::Literal(Literal::Integer(n))
6039    }
6040
6041    fn col(name: &str) -> Expr {
6042        Expr::Column(ColumnName {
6043            qualifier: None,
6044            name: name.into(),
6045        })
6046    }
6047
6048    #[test]
6049    fn select_single_integer() {
6050        let s = parse("SELECT 1");
6051        let Statement::Select(s) = s else {
6052            panic!("expected SELECT")
6053        };
6054        assert_eq!(s.items.len(), 1);
6055        assert!(s.from.is_none());
6056        assert!(s.where_.is_none());
6057    }
6058
6059    #[test]
6060    fn select_multiple_literal_kinds() {
6061        let s = parse("SELECT 1, 'hi', NULL, TRUE, 1.5");
6062        let Statement::Select(s) = s else {
6063            panic!("expected SELECT")
6064        };
6065        assert_eq!(s.items.len(), 5);
6066    }
6067
6068    #[test]
6069    fn select_wildcard_from_table() {
6070        let s = parse("SELECT * FROM users");
6071        let Statement::Select(s) = s else {
6072            panic!("expected SELECT")
6073        };
6074        assert!(matches!(s.items[..], [SelectItem::Wildcard]));
6075        assert_eq!(s.from.as_ref().unwrap().primary.name, "users");
6076    }
6077
6078    #[test]
6079    fn select_with_table_alias() {
6080        let s = parse("SELECT * FROM users AS u");
6081        let Statement::Select(s) = s else {
6082            panic!("expected SELECT")
6083        };
6084        let t = &s.from.as_ref().unwrap().primary;
6085        assert_eq!(t.name, "users");
6086        assert_eq!(t.alias.as_deref(), Some("u"));
6087    }
6088
6089    #[test]
6090    fn select_with_where_eq() {
6091        let s = parse("SELECT a FROM t WHERE a = 1");
6092        let Statement::Select(s) = s else {
6093            panic!("expected SELECT")
6094        };
6095        let w = s.where_.unwrap();
6096        assert_eq!(
6097            w,
6098            Expr::Binary {
6099                lhs: Box::new(col("a")),
6100                op: BinOp::Eq,
6101                rhs: Box::new(lit_int(1)),
6102            }
6103        );
6104    }
6105
6106    #[test]
6107    fn arithmetic_precedence() {
6108        let s = parse("SELECT 1 + 2 * 3");
6109        let Statement::Select(s) = s else {
6110            panic!("expected SELECT")
6111        };
6112        let SelectItem::Expr { expr, .. } = &s.items[0] else {
6113            panic!("wildcard?")
6114        };
6115        assert_eq!(
6116            expr,
6117            &Expr::Binary {
6118                lhs: Box::new(lit_int(1)),
6119                op: BinOp::Add,
6120                rhs: Box::new(Expr::Binary {
6121                    lhs: Box::new(lit_int(2)),
6122                    op: BinOp::Mul,
6123                    rhs: Box::new(lit_int(3)),
6124                }),
6125            }
6126        );
6127    }
6128
6129    #[test]
6130    fn parentheses_override_precedence() {
6131        let s = parse("SELECT (1 + 2) * 3");
6132        let Statement::Select(s) = s else {
6133            panic!("expected SELECT")
6134        };
6135        let SelectItem::Expr { expr, .. } = &s.items[0] else {
6136            panic!()
6137        };
6138        assert_eq!(
6139            expr,
6140            &Expr::Binary {
6141                lhs: Box::new(Expr::Binary {
6142                    lhs: Box::new(lit_int(1)),
6143                    op: BinOp::Add,
6144                    rhs: Box::new(lit_int(2)),
6145                }),
6146                op: BinOp::Mul,
6147                rhs: Box::new(lit_int(3)),
6148            }
6149        );
6150    }
6151
6152    #[test]
6153    fn not_binds_below_comparison() {
6154        // `NOT a = 1` should parse as `NOT (a = 1)`.
6155        let s = parse("SELECT NOT a = 1 FROM t");
6156        let Statement::Select(s) = s else {
6157            panic!("expected SELECT")
6158        };
6159        let SelectItem::Expr { expr, .. } = &s.items[0] else {
6160            panic!()
6161        };
6162        assert_eq!(
6163            expr,
6164            &Expr::Unary {
6165                op: UnOp::Not,
6166                expr: Box::new(Expr::Binary {
6167                    lhs: Box::new(col("a")),
6168                    op: BinOp::Eq,
6169                    rhs: Box::new(lit_int(1)),
6170                }),
6171            }
6172        );
6173    }
6174
6175    #[test]
6176    fn unary_minus_binds_above_multiplication() {
6177        // `-a * 2` should be `(-a) * 2`.
6178        let s = parse("SELECT -a * 2 FROM t");
6179        let Statement::Select(s) = s else {
6180            panic!("expected SELECT")
6181        };
6182        let SelectItem::Expr { expr, .. } = &s.items[0] else {
6183            panic!()
6184        };
6185        assert_eq!(
6186            expr,
6187            &Expr::Binary {
6188                lhs: Box::new(Expr::Unary {
6189                    op: UnOp::Neg,
6190                    expr: Box::new(col("a")),
6191                }),
6192                op: BinOp::Mul,
6193                rhs: Box::new(lit_int(2)),
6194            }
6195        );
6196    }
6197
6198    #[test]
6199    fn qualified_column() {
6200        let s = parse("SELECT t.col FROM t");
6201        let Statement::Select(s) = s else {
6202            panic!("expected SELECT")
6203        };
6204        let SelectItem::Expr { expr, .. } = &s.items[0] else {
6205            panic!()
6206        };
6207        assert_eq!(
6208            expr,
6209            &Expr::Column(ColumnName {
6210                qualifier: Some("t".into()),
6211                name: "col".into()
6212            })
6213        );
6214    }
6215
6216    #[test]
6217    fn select_item_alias_with_as() {
6218        let s = parse("SELECT a AS y FROM t");
6219        let Statement::Select(s) = s else {
6220            panic!("expected SELECT")
6221        };
6222        let SelectItem::Expr { alias, .. } = &s.items[0] else {
6223            panic!()
6224        };
6225        assert_eq!(alias.as_deref(), Some("y"));
6226    }
6227
6228    #[test]
6229    fn trailing_semicolon_accepted() {
6230        let s = parse("SELECT 1;");
6231        let Statement::Select(s) = s else {
6232            panic!("expected SELECT")
6233        };
6234        assert_eq!(s.items.len(), 1);
6235    }
6236
6237    #[test]
6238    fn boolean_chain_with_and_or_not() {
6239        // (NOT a) OR (b AND (NOT c))
6240        let s = parse("SELECT NOT a OR b AND NOT c FROM t");
6241        let Statement::Select(s) = s else {
6242            panic!("expected SELECT")
6243        };
6244        let SelectItem::Expr { expr, .. } = &s.items[0] else {
6245            panic!()
6246        };
6247        let expected = Expr::Binary {
6248            lhs: Box::new(Expr::Unary {
6249                op: UnOp::Not,
6250                expr: Box::new(col("a")),
6251            }),
6252            op: BinOp::Or,
6253            rhs: Box::new(Expr::Binary {
6254                lhs: Box::new(col("b")),
6255                op: BinOp::And,
6256                rhs: Box::new(Expr::Unary {
6257                    op: UnOp::Not,
6258                    expr: Box::new(col("c")),
6259                }),
6260            }),
6261        };
6262        assert_eq!(expr, &expected);
6263    }
6264
6265    #[test]
6266    fn empty_input_errors() {
6267        let err = parse_statement("").unwrap_err();
6268        assert!(err.message.contains("SELECT"));
6269    }
6270
6271    #[test]
6272    fn unmatched_paren_errors() {
6273        assert!(parse_statement("SELECT (1 + 2").is_err());
6274    }
6275
6276    #[test]
6277    fn display_round_trip_simple_select() {
6278        let original = parse("SELECT a + 1 FROM t WHERE a > 0");
6279        let text = original.to_string();
6280        let again = parse_statement(&text).expect("re-parse");
6281        assert_eq!(original, again);
6282    }
6283
6284    // --- CREATE TABLE & INSERT (v0.3) ---------------------------------------
6285
6286    #[test]
6287    fn create_table_single_column() {
6288        let s = parse("CREATE TABLE foo (a INT)");
6289        let Statement::CreateTable(c) = s else {
6290            panic!("expected CreateTable")
6291        };
6292        assert_eq!(c.name, "foo");
6293        assert_eq!(c.columns.len(), 1);
6294        assert_eq!(c.columns[0].name, "a");
6295        assert_eq!(c.columns[0].ty, ColumnTypeName::Int);
6296        assert!(c.columns[0].nullable);
6297    }
6298
6299    #[test]
6300    fn create_table_multi_column_with_not_null_mix() {
6301        let s = parse("CREATE TABLE u (id INT NOT NULL, name TEXT, score FLOAT NOT NULL, ok BOOL)");
6302        let Statement::CreateTable(c) = s else {
6303            panic!()
6304        };
6305        assert_eq!(c.columns.len(), 4);
6306        assert_eq!(c.columns[0].ty, ColumnTypeName::Int);
6307        assert!(!c.columns[0].nullable);
6308        assert_eq!(c.columns[1].ty, ColumnTypeName::Text);
6309        assert!(c.columns[1].nullable);
6310        assert_eq!(c.columns[2].ty, ColumnTypeName::Float);
6311        assert!(!c.columns[2].nullable);
6312        assert_eq!(c.columns[3].ty, ColumnTypeName::Bool);
6313    }
6314
6315    #[test]
6316    fn create_table_bigint_supported() {
6317        let s = parse("CREATE TABLE accounts (id BIGINT NOT NULL)");
6318        let Statement::CreateTable(c) = s else {
6319            panic!()
6320        };
6321        assert_eq!(c.columns[0].ty, ColumnTypeName::BigInt);
6322    }
6323
6324    #[test]
6325    fn create_table_vector_default_is_f32() {
6326        let s = parse("CREATE TABLE t (v VECTOR(128))");
6327        let Statement::CreateTable(c) = s else {
6328            panic!()
6329        };
6330        assert_eq!(
6331            c.columns[0].ty,
6332            ColumnTypeName::Vector {
6333                dim: 128,
6334                encoding: VecEncoding::F32,
6335            },
6336        );
6337    }
6338
6339    #[test]
6340    fn create_table_vector_using_sq8() {
6341        // v6.0.1: `USING SQ8` selects scalar-quantised encoding.
6342        // Case-insensitive on both `USING` and the encoding name.
6343        for sql in [
6344            "CREATE TABLE t (v VECTOR(128) USING SQ8)",
6345            "CREATE TABLE t (v VECTOR(128) using sq8)",
6346        ] {
6347            let s = parse(sql);
6348            let Statement::CreateTable(c) = s else {
6349                panic!()
6350            };
6351            assert_eq!(
6352                c.columns[0].ty,
6353                ColumnTypeName::Vector {
6354                    dim: 128,
6355                    encoding: VecEncoding::Sq8,
6356                },
6357                "{sql}",
6358            );
6359        }
6360    }
6361
6362    #[test]
6363    fn create_table_vector_using_unknown_errors() {
6364        let err = parse_statement("CREATE TABLE t (v VECTOR(8) USING PQ8)").unwrap_err();
6365        assert!(
6366            err.message.contains("unknown vector encoding"),
6367            "got: {}",
6368            err.message
6369        );
6370    }
6371
6372    #[test]
6373    fn vector_using_sq8_display_roundtrips() {
6374        // The Display impl must produce text that re-parses to the
6375        // same AST. Guard for the v6.0.1 `USING SQ8` suffix.
6376        let s = parse("CREATE TABLE t (v VECTOR(64) USING SQ8)");
6377        let Statement::CreateTable(c) = s else {
6378            panic!()
6379        };
6380        assert_eq!(c.columns[0].ty.to_string(), "VECTOR(64) USING SQ8");
6381    }
6382
6383    #[test]
6384    fn parser_recognises_placeholders() {
6385        use crate::ast::{Expr, SelectItem, Statement};
6386        // $N in expression position parses as Expr::Placeholder(N).
6387        let s = parse("SELECT $1, $2 + 1 FROM t WHERE x = $3");
6388        let Statement::Select(sel) = s else { panic!() };
6389        assert!(matches!(
6390            sel.items[0],
6391            SelectItem::Expr {
6392                expr: Expr::Placeholder(1),
6393                alias: None
6394            }
6395        ));
6396        // $2 + 1
6397        let SelectItem::Expr {
6398            expr: Expr::Binary { lhs, rhs, .. },
6399            ..
6400        } = &sel.items[1]
6401        else {
6402            panic!()
6403        };
6404        assert!(matches!(**lhs, Expr::Placeholder(2)));
6405        assert!(matches!(**rhs, Expr::Literal(Literal::Integer(1))));
6406        // WHERE x = $3
6407        let Some(Expr::Binary { rhs, .. }) = sel.where_.as_ref() else {
6408            panic!()
6409        };
6410        assert!(matches!(**rhs, Expr::Placeholder(3)));
6411    }
6412
6413    #[test]
6414    fn parser_rejects_dollar_zero() {
6415        // $0 is not valid in PG; the lexer rejects it.
6416        assert!(parse_statement("SELECT $0").is_err());
6417    }
6418
6419    #[test]
6420    fn placeholder_display_roundtrips() {
6421        // The Display impl must produce text that re-lexes to the
6422        // same Placeholder token.
6423        let s = parse("SELECT $42 FROM t");
6424        let printed = s.to_string();
6425        assert!(printed.contains("$42"));
6426        let again = parse(&printed);
6427        assert_eq!(s, again);
6428    }
6429
6430    #[test]
6431    fn alter_index_rebuild_bare() {
6432        use crate::ast::{AlterIndexTarget, Statement};
6433        let s = parse("ALTER INDEX my_idx REBUILD");
6434        let Statement::AlterIndex(a) = s else {
6435            panic!("expected AlterIndex, got {s:?}")
6436        };
6437        assert_eq!(a.name, "my_idx");
6438        assert_eq!(a.target, AlterIndexTarget::Rebuild { encoding: None });
6439    }
6440
6441    #[test]
6442    fn alter_index_rebuild_with_encoding() {
6443        use crate::ast::{AlterIndexTarget, Statement};
6444        for (sql, want) in [
6445            (
6446                "ALTER INDEX my_idx REBUILD WITH (encoding = F32)",
6447                VecEncoding::F32,
6448            ),
6449            (
6450                "ALTER INDEX my_idx REBUILD WITH (encoding = sq8)",
6451                VecEncoding::Sq8,
6452            ),
6453            (
6454                "ALTER INDEX my_idx REBUILD WITH (encoding = HALF)",
6455                VecEncoding::F16,
6456            ),
6457        ] {
6458            let s = parse(sql);
6459            let Statement::AlterIndex(a) = s else {
6460                panic!("{sql}: expected AlterIndex")
6461            };
6462            assert_eq!(a.name, "my_idx");
6463            assert_eq!(
6464                a.target,
6465                AlterIndexTarget::Rebuild {
6466                    encoding: Some(want)
6467                },
6468                "{sql}"
6469            );
6470        }
6471    }
6472
6473    #[test]
6474    fn alter_index_rebuild_unknown_encoding_errors() {
6475        let err = parse_statement("ALTER INDEX my_idx REBUILD WITH (encoding = PQ8)").unwrap_err();
6476        assert!(
6477            err.message.contains("unknown vector encoding"),
6478            "got: {}",
6479            err.message
6480        );
6481    }
6482
6483    #[test]
6484    fn alter_index_rebuild_display_roundtrips() {
6485        for (input, want) in [
6486            ("ALTER INDEX my_idx REBUILD", "ALTER INDEX my_idx REBUILD"),
6487            (
6488                "ALTER INDEX my_idx REBUILD WITH (encoding = SQ8)",
6489                "ALTER INDEX my_idx REBUILD WITH (encoding = SQ8)",
6490            ),
6491            (
6492                "ALTER INDEX my_idx REBUILD WITH (encoding = HALF)",
6493                "ALTER INDEX my_idx REBUILD WITH (encoding = HALF)",
6494            ),
6495        ] {
6496            let s = parse(input);
6497            assert_eq!(s.to_string(), want);
6498        }
6499    }
6500
6501    #[test]
6502    fn create_table_unknown_type_errors() {
6503        // v4.9: JSON is now real; pick an actually unsupported keyword
6504        // (XML never landed and isn't planned).
6505        let err = parse_statement("CREATE TABLE x (a xml)").unwrap_err();
6506        assert!(err.message.contains("unsupported column type"));
6507    }
6508
6509    #[test]
6510    fn create_table_missing_table_keyword_errors() {
6511        assert!(parse_statement("CREATE x (a INT)").is_err());
6512    }
6513
6514    #[test]
6515    fn insert_single_value() {
6516        let s = parse("INSERT INTO foo VALUES (42)");
6517        let Statement::Insert(i) = s else {
6518            panic!("expected Insert")
6519        };
6520        assert_eq!(i.table, "foo");
6521        assert_eq!(i.rows.len(), 1);
6522        assert_eq!(i.rows[0].len(), 1);
6523        assert!(matches!(i.rows[0][0], Expr::Literal(Literal::Integer(42))));
6524    }
6525
6526    #[test]
6527    fn insert_multi_value_with_mixed_literals() {
6528        let s = parse("INSERT INTO foo VALUES (1, 'hi', 3.14, TRUE, NULL)");
6529        let Statement::Insert(i) = s else { panic!() };
6530        assert_eq!(i.rows.len(), 1);
6531        assert_eq!(i.rows[0].len(), 5);
6532    }
6533
6534    #[test]
6535    fn insert_missing_into_errors() {
6536        assert!(parse_statement("INSERT foo VALUES (1)").is_err());
6537    }
6538
6539    #[test]
6540    fn create_table_round_trip() {
6541        let original =
6542            parse("CREATE TABLE foo (id BIGINT NOT NULL, label TEXT, score FLOAT NOT NULL)");
6543        let text = original.to_string();
6544        let again = parse_statement(&text).expect("re-parse");
6545        assert_eq!(original, again);
6546    }
6547
6548    #[test]
6549    fn insert_round_trip_with_negation_and_string() {
6550        let original = parse("INSERT INTO t VALUES (-1, 'it''s', NULL)");
6551        let text = original.to_string();
6552        let again = parse_statement(&text).expect("re-parse");
6553        assert_eq!(original, again);
6554    }
6555
6556    #[test]
6557    fn unknown_keyword_at_statement_start_errors() {
6558        // v4.4: UPDATE is real SQL now. Use a fabricated keyword so
6559        // the top-level dispatch still has no branch to take.
6560        let err = parse_statement("FROBNICATE foo SET x = 1").unwrap_err();
6561        assert!(err.message.contains("expected SELECT"));
6562    }
6563
6564    // --- v0.8 CREATE INDEX --------------------------------------------------
6565
6566    #[test]
6567    fn create_index_basic() {
6568        let s = parse("CREATE INDEX idx_id ON users (id)");
6569        let Statement::CreateIndex(c) = s else {
6570            panic!("expected CreateIndex")
6571        };
6572        assert_eq!(c.name, "idx_id");
6573        assert_eq!(c.table, "users");
6574        assert_eq!(c.column, "id");
6575    }
6576
6577    #[test]
6578    fn create_index_missing_on_errors() {
6579        assert!(parse_statement("CREATE INDEX foo users (id)").is_err());
6580    }
6581
6582    #[test]
6583    fn create_index_missing_paren_errors() {
6584        assert!(parse_statement("CREATE INDEX foo ON users id").is_err());
6585    }
6586
6587    #[test]
6588    fn create_index_round_trip() {
6589        let original = parse("CREATE INDEX by_name ON users (name)");
6590        let again = parse_statement(&original.to_string()).unwrap();
6591        assert_eq!(original, again);
6592    }
6593
6594    // --- v7.9.29 CREATE UNIQUE INDEX [WHERE pred] (mailrs K1) -------------
6595
6596    #[test]
6597    fn create_unique_index_basic() {
6598        let s = parse("CREATE UNIQUE INDEX uq_x ON t (a)");
6599        let Statement::CreateIndex(c) = s else {
6600            panic!("expected CreateIndex");
6601        };
6602        assert!(c.is_unique);
6603        assert_eq!(c.column, "a");
6604        assert!(c.partial_predicate.is_none());
6605    }
6606
6607    #[test]
6608    fn create_unique_index_partial() {
6609        // mailrs's email_templates "one default per user" shape.
6610        let s = parse(
6611            "CREATE UNIQUE INDEX idx_email_templates_user_default \
6612             ON email_templates (user_address) WHERE is_default = true",
6613        );
6614        let Statement::CreateIndex(c) = s else {
6615            panic!("expected CreateIndex");
6616        };
6617        assert!(c.is_unique);
6618        assert_eq!(c.table, "email_templates");
6619        assert_eq!(c.column, "user_address");
6620        assert!(c.partial_predicate.is_some());
6621    }
6622
6623    #[test]
6624    fn create_unique_index_composite_with_predicate() {
6625        // mailrs's calendar_events instance: composite columns.
6626        let s = parse(
6627            "CREATE UNIQUE INDEX uq_calendar_events_instance \
6628             ON calendar_events (calendar_id, uid, recurrence_id) \
6629             WHERE recurrence_id IS NOT NULL",
6630        );
6631        let Statement::CreateIndex(c) = s else {
6632            panic!("expected CreateIndex");
6633        };
6634        assert!(c.is_unique);
6635        assert_eq!(c.column, "calendar_id");
6636        assert_eq!(
6637            c.extra_columns,
6638            vec!["uid".to_string(), "recurrence_id".to_string()]
6639        );
6640        assert!(c.partial_predicate.is_some());
6641    }
6642
6643    #[test]
6644    fn create_unique_index_using_btree_ok() {
6645        let s = parse("CREATE UNIQUE INDEX uq_x ON t USING btree (a)");
6646        assert!(matches!(s, Statement::CreateIndex(ref c) if c.is_unique));
6647    }
6648
6649    #[test]
6650    fn create_unique_index_using_hnsw_rejected() {
6651        let err =
6652            parse_statement("CREATE UNIQUE INDEX uq_v ON t USING hnsw (embedding)").unwrap_err();
6653        assert!(err.message.contains("UNIQUE"), "{}", err.message);
6654    }
6655
6656    #[test]
6657    fn create_unique_index_round_trip() {
6658        let original = parse(
6659            "CREATE UNIQUE INDEX uq_calendar_events_master \
6660             ON calendar_events (calendar_id, uid) WHERE recurrence_id IS NULL",
6661        );
6662        let again = parse_statement(&original.to_string()).unwrap();
6663        assert_eq!(original, again);
6664    }
6665
6666    #[test]
6667    fn create_unique_without_index_errors() {
6668        let err = parse_statement("CREATE UNIQUE TABLE t (a INT)").unwrap_err();
6669        assert!(err.message.contains("INDEX"), "{}", err.message);
6670    }
6671
6672    // --- v7.10.4 BYTES / BYTEA column type (Epic 1) ----------------------
6673
6674    #[test]
6675    fn create_table_bytea_column() {
6676        let s = parse("CREATE TABLE t (id INT NOT NULL, payload BYTEA NOT NULL)");
6677        let Statement::CreateTable(c) = s else {
6678            panic!("expected CreateTable");
6679        };
6680        assert_eq!(c.columns.len(), 2);
6681        assert_eq!(c.columns[1].ty, ColumnTypeName::Bytes);
6682        assert!(!c.columns[1].nullable);
6683    }
6684
6685    #[test]
6686    fn create_table_bytes_alias_column() {
6687        let s = parse("CREATE TABLE t (blob BYTES)");
6688        let Statement::CreateTable(c) = s else {
6689            panic!("expected CreateTable");
6690        };
6691        assert_eq!(c.columns[0].ty, ColumnTypeName::Bytes);
6692    }
6693
6694    #[test]
6695    fn bytea_round_trip_display() {
6696        let original = parse("CREATE TABLE t (a BYTEA NOT NULL)");
6697        let again = parse_statement(&original.to_string()).unwrap();
6698        assert_eq!(original, again);
6699    }
6700
6701    // --- v0.9 transactions -------------------------------------------------
6702
6703    #[test]
6704    fn begin_commit_rollback_parse_as_unit_variants() {
6705        assert_eq!(parse("BEGIN"), Statement::Begin);
6706        assert_eq!(parse("COMMIT"), Statement::Commit);
6707        assert_eq!(parse("ROLLBACK"), Statement::Rollback);
6708        // Trailing semicolons accepted too.
6709        assert_eq!(parse("BEGIN;"), Statement::Begin);
6710    }
6711
6712    // --- v1.2: pgvector distance ops + ::vector cast --------------------
6713
6714    #[test]
6715    fn inner_product_binop_parses() {
6716        let s = parse("SELECT v <#> [1.0, 2.0] FROM t");
6717        let Statement::Select(s) = s else { panic!() };
6718        let SelectItem::Expr { expr, .. } = &s.items[0] else {
6719            panic!()
6720        };
6721        assert!(matches!(
6722            expr,
6723            Expr::Binary {
6724                op: BinOp::InnerProduct,
6725                ..
6726            }
6727        ));
6728    }
6729
6730    #[test]
6731    fn cosine_distance_binop_parses() {
6732        let s = parse("SELECT v <=> [1.0, 2.0] FROM t");
6733        let Statement::Select(s) = s else { panic!() };
6734        let SelectItem::Expr { expr, .. } = &s.items[0] else {
6735            panic!()
6736        };
6737        assert!(matches!(
6738            expr,
6739            Expr::Binary {
6740                op: BinOp::CosineDistance,
6741                ..
6742            }
6743        ));
6744    }
6745
6746    #[test]
6747    fn vector_cast_postfix_wraps_string_literal() {
6748        let s = parse("SELECT '[1,2,3]'::vector FROM t");
6749        let Statement::Select(s) = s else { panic!() };
6750        let SelectItem::Expr { expr, .. } = &s.items[0] else {
6751            panic!()
6752        };
6753        assert!(matches!(
6754            expr,
6755            Expr::Cast {
6756                target: CastTarget::Vector,
6757                ..
6758            }
6759        ));
6760    }
6761
6762    #[test]
6763    fn unsupported_cast_target_errors() {
6764        // `::numeric` isn't in the v1.3 cast target set.
6765        let err = parse_statement("SELECT 1::numeric FROM t").unwrap_err();
6766        assert!(err.message.contains("unsupported cast target"));
6767    }
6768
6769    #[test]
6770    fn tx_statements_round_trip() {
6771        for q in ["BEGIN", "COMMIT", "ROLLBACK"] {
6772            let original = parse(q);
6773            let again = parse_statement(&original.to_string()).unwrap();
6774            assert_eq!(original, again);
6775        }
6776    }
6777
6778    #[test]
6779    fn interval_text_parsing_units() {
6780        // Single unit.
6781        assert_eq!(parse_interval_text("1 day"), Some((0, 86_400_000_000)));
6782        assert_eq!(parse_interval_text("1 second"), Some((0, 1_000_000)));
6783        assert_eq!(parse_interval_text("1 month"), Some((1, 0)));
6784        assert_eq!(parse_interval_text("2 years"), Some((24, 0)));
6785        // Compound spans accumulate.
6786        assert_eq!(parse_interval_text("1 year 6 months"), Some((18, 0)));
6787        assert_eq!(
6788            parse_interval_text("1 day 2 hours"),
6789            Some((0, 86_400_000_000 + 7_200_000_000))
6790        );
6791        // Negative numbers carry through.
6792        assert_eq!(parse_interval_text("-1 day"), Some((0, -86_400_000_000)));
6793        // Bad shapes return None.
6794        assert_eq!(parse_interval_text(""), None);
6795        assert_eq!(parse_interval_text("garbage"), None);
6796        assert_eq!(parse_interval_text("1 fortnight"), None);
6797        assert_eq!(parse_interval_text("1"), None);
6798    }
6799
6800    #[test]
6801    fn interval_literal_roundtrips_via_display() {
6802        let parsed = parse("SELECT INTERVAL '1 day 2 hours'");
6803        let s = parsed.to_string();
6804        // Display preserves the original text verbatim.
6805        assert!(s.contains("INTERVAL '1 day 2 hours'"), "got: {s}");
6806        // And re-parsing yields a structurally equal statement.
6807        let again = parse_statement(&s).unwrap();
6808        assert_eq!(parsed, again);
6809    }
6810
6811    // ── v6.1.2: CREATE / DROP PUBLICATION ────────────────────
6812
6813    #[test]
6814    fn parser_recognises_create_publication_bare() {
6815        let s = parse("CREATE PUBLICATION pub_a");
6816        let Statement::CreatePublication(p) = s else {
6817            panic!("expected CreatePublication, got {s:?}")
6818        };
6819        assert_eq!(p.name, "pub_a");
6820        assert_eq!(p.scope, PublicationScope::AllTables);
6821    }
6822
6823    #[test]
6824    fn parser_recognises_create_publication_for_all_tables() {
6825        let s = parse("CREATE PUBLICATION pub_a FOR ALL TABLES");
6826        let Statement::CreatePublication(p) = s else {
6827            panic!("expected CreatePublication, got {s:?}")
6828        };
6829        assert_eq!(p.name, "pub_a");
6830        assert_eq!(p.scope, PublicationScope::AllTables);
6831    }
6832
6833    #[test]
6834    fn parser_recognises_drop_publication() {
6835        let s = parse("DROP PUBLICATION pub_a");
6836        let Statement::DropPublication(name) = s else {
6837            panic!("expected DropPublication, got {s:?}")
6838        };
6839        assert_eq!(name, "pub_a");
6840    }
6841
6842    #[test]
6843    fn parser_recognises_for_table_list() {
6844        let s = parse("CREATE PUBLICATION pub_a FOR TABLE t1, t2, t3");
6845        let Statement::CreatePublication(p) = s else {
6846            panic!("expected CreatePublication, got {s:?}")
6847        };
6848        assert_eq!(p.name, "pub_a");
6849        let PublicationScope::ForTables(ts) = p.scope else {
6850            panic!("expected ForTables scope")
6851        };
6852        assert_eq!(ts, alloc::vec!["t1", "t2", "t3"]);
6853    }
6854
6855    #[test]
6856    fn parser_recognises_for_tables_plural() {
6857        // PG 19 accepts both `FOR TABLE` and `FOR TABLES` — match.
6858        let s = parse("CREATE PUBLICATION pub_a FOR TABLES t1, t2");
6859        let Statement::CreatePublication(p) = s else {
6860            panic!("expected CreatePublication, got {s:?}")
6861        };
6862        let PublicationScope::ForTables(ts) = p.scope else {
6863            panic!("expected ForTables")
6864        };
6865        assert_eq!(ts, alloc::vec!["t1", "t2"]);
6866    }
6867
6868    #[test]
6869    fn parser_recognises_for_all_tables_except_list() {
6870        let s = parse("CREATE PUBLICATION p FOR ALL TABLES EXCEPT t1, t2");
6871        let Statement::CreatePublication(p) = s else {
6872            panic!()
6873        };
6874        let PublicationScope::AllTablesExcept(ts) = p.scope else {
6875            panic!("expected AllTablesExcept")
6876        };
6877        assert_eq!(ts, alloc::vec!["t1", "t2"]);
6878    }
6879
6880    #[test]
6881    fn parser_rejects_for_table_with_empty_list() {
6882        // `FOR TABLE` with nothing after is a parse error.
6883        let err = parse_statement("CREATE PUBLICATION p FOR TABLE")
6884            .expect_err("must error on empty list");
6885        // No specific message asserted — the call falls through to
6886        // expect_ident_like which yields "expected identifier, got …".
6887        assert!(!err.message.is_empty());
6888    }
6889
6890    #[test]
6891    fn parser_recognises_show_publications() {
6892        // v6.1.3 — SHOW PUBLICATIONS lands here. PUBLICATIONS is a
6893        // bare ident in this position, NOT a reserved keyword.
6894        let s = parse("SHOW PUBLICATIONS");
6895        assert!(matches!(s, Statement::ShowPublications));
6896    }
6897
6898    // ── v6.1.4: CREATE / DROP SUBSCRIPTION + SHOW SUBSCRIPTIONS ─
6899
6900    #[test]
6901    fn parser_recognises_create_subscription_single_publication() {
6902        let s = parse(
6903            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=127.0.0.1 port=20002' PUBLICATION pub_a",
6904        );
6905        let Statement::CreateSubscription(c) = s else {
6906            panic!("expected CreateSubscription, got {s:?}")
6907        };
6908        assert_eq!(c.name, "sub_a");
6909        assert_eq!(c.conn_str, "host=127.0.0.1 port=20002");
6910        assert_eq!(c.publications, alloc::vec!["pub_a"]);
6911    }
6912
6913    #[test]
6914    fn parser_recognises_create_subscription_multi_publication() {
6915        let s = parse("CREATE SUBSCRIPTION sub_a CONNECTION 'host=h' PUBLICATION p1, p2, p3");
6916        let Statement::CreateSubscription(c) = s else {
6917            panic!()
6918        };
6919        assert_eq!(c.publications, alloc::vec!["p1", "p2", "p3"]);
6920    }
6921
6922    #[test]
6923    fn parser_rejects_create_subscription_missing_connection() {
6924        let err = parse_statement("CREATE SUBSCRIPTION s PUBLICATION p")
6925            .expect_err("must error on missing CONNECTION");
6926        assert!(err.message.contains("CONNECTION"), "got: {}", err.message);
6927    }
6928
6929    #[test]
6930    fn parser_rejects_create_subscription_missing_publication() {
6931        let err = parse_statement("CREATE SUBSCRIPTION s CONNECTION 'host=x'")
6932            .expect_err("must error on missing PUBLICATION");
6933        assert!(err.message.contains("PUBLICATION"), "got: {}", err.message);
6934    }
6935
6936    #[test]
6937    fn parser_recognises_drop_subscription() {
6938        let s = parse("DROP SUBSCRIPTION sub_a");
6939        let Statement::DropSubscription(name) = s else {
6940            panic!("expected DropSubscription, got {s:?}")
6941        };
6942        assert_eq!(name, "sub_a");
6943    }
6944
6945    #[test]
6946    fn parser_recognises_show_subscriptions() {
6947        let s = parse("SHOW SUBSCRIPTIONS");
6948        assert!(matches!(s, Statement::ShowSubscriptions));
6949    }
6950
6951    #[test]
6952    fn parser_recognises_wait_for_wal_position_no_timeout() {
6953        let s = parse("WAIT FOR WAL POSITION 12345");
6954        let Statement::WaitForWalPosition { pos, timeout_ms } = s else {
6955            panic!("expected WaitForWalPosition, got {s:?}")
6956        };
6957        assert_eq!(pos, 12345);
6958        assert!(timeout_ms.is_none());
6959    }
6960
6961    #[test]
6962    fn parser_recognises_wait_for_wal_position_with_timeout() {
6963        let s = parse("WAIT FOR WAL POSITION 67890 WITH TIMEOUT 5000");
6964        let Statement::WaitForWalPosition { pos, timeout_ms } = s else {
6965            panic!()
6966        };
6967        assert_eq!(pos, 67890);
6968        assert_eq!(timeout_ms, Some(5000));
6969    }
6970
6971    #[test]
6972    fn parser_rejects_wait_with_negative_position() {
6973        // The lexer treats `-` as a token; `expect_u64_literal`
6974        // only sees the Integer that follows, so the negative
6975        // arrives as a unary-minus expression at higher levels.
6976        // Bare `WAIT FOR WAL POSITION -1` thus surfaces as a
6977        // parse error one way or another.
6978        let err = parse_statement("WAIT FOR WAL POSITION -1").unwrap_err();
6979        assert!(!err.message.is_empty());
6980    }
6981
6982    #[test]
6983    fn parser_recognises_bare_analyze() {
6984        let s = parse("ANALYZE");
6985        assert!(matches!(s, Statement::Analyze(None)));
6986    }
6987
6988    #[test]
6989    fn parser_recognises_analyze_with_table() {
6990        let s = parse("ANALYZE users");
6991        let Statement::Analyze(Some(name)) = s else {
6992            panic!("expected Analyze, got {s:?}")
6993        };
6994        assert_eq!(name, "users");
6995    }
6996
6997    #[test]
6998    fn parser_recognises_analyze_with_quoted_table() {
6999        let s = parse("ANALYZE \"Mixed Case\"");
7000        let Statement::Analyze(Some(name)) = s else {
7001            panic!()
7002        };
7003        assert_eq!(name, "Mixed Case");
7004    }
7005
7006    #[test]
7007    fn parser_rejects_analyze_with_garbage_token() {
7008        let err = parse_statement("ANALYZE 42").expect_err("must error");
7009        assert!(!err.message.is_empty());
7010    }
7011
7012    #[test]
7013    fn analyze_display_roundtrips() {
7014        for sql in ["ANALYZE", "ANALYZE users"] {
7015            let s = parse(sql);
7016            let printed = s.to_string();
7017            let again = parse_statement(&printed)
7018                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
7019            assert_eq!(s, again);
7020        }
7021    }
7022
7023    #[test]
7024    fn wait_for_display_roundtrips() {
7025        for sql in [
7026            "WAIT FOR WAL POSITION 12345",
7027            "WAIT FOR WAL POSITION 67890 WITH TIMEOUT 5000",
7028        ] {
7029            let s = parse(sql);
7030            let printed = s.to_string();
7031            let again = parse_statement(&printed)
7032                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
7033            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
7034        }
7035    }
7036
7037    #[test]
7038    fn subscription_ddl_display_roundtrips() {
7039        for sql in [
7040            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=h port=20002' PUBLICATION pub_a",
7041            "CREATE SUBSCRIPTION sub_b CONNECTION 'host=h' PUBLICATION p1, p2",
7042            "DROP SUBSCRIPTION sub_a",
7043            "SHOW SUBSCRIPTIONS",
7044        ] {
7045            let s = parse(sql);
7046            let printed = s.to_string();
7047            let again = parse_statement(&printed)
7048                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
7049            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
7050        }
7051    }
7052
7053    #[test]
7054    fn parser_drop_dispatches_user_vs_publication() {
7055        // Pre-v6.1.2 DROP USER took the bare-ident path; v6.1.2
7056        // tokenises DROP. Both targets must still parse.
7057        let s = parse("DROP USER 'alice'");
7058        let Statement::DropUser(name) = s else {
7059            panic!("expected DropUser, got {s:?}")
7060        };
7061        assert_eq!(name, "alice");
7062        // And DROP PUBLICATION lands the new variant.
7063        let s = parse("DROP PUBLICATION p1");
7064        assert!(matches!(s, Statement::DropPublication(_)));
7065    }
7066
7067    #[test]
7068    fn publication_ddl_display_roundtrips() {
7069        // Every CREATE PUBLICATION variant must Display → parse →
7070        // same AST. v6.1.3 covers all three scope shapes.
7071        for sql in [
7072            "CREATE PUBLICATION pub_a",
7073            "CREATE PUBLICATION pub_a FOR ALL TABLES",
7074            "CREATE PUBLICATION pub_a FOR TABLE t1, t2",
7075            "CREATE PUBLICATION pub_a FOR ALL TABLES EXCEPT t1",
7076            "DROP PUBLICATION pub_a",
7077            "SHOW PUBLICATIONS",
7078        ] {
7079            let s = parse(sql);
7080            let printed = s.to_string();
7081            let again = parse_statement(&printed)
7082                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
7083            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
7084        }
7085    }
7086
7087    // --- v7.12.4: CREATE FUNCTION + CREATE TRIGGER + PL/pgSQL ---
7088
7089    #[test]
7090    fn create_function_returns_trigger_plpgsql_minimal() {
7091        let sql = "CREATE FUNCTION noop() RETURNS TRIGGER LANGUAGE plpgsql AS $$ BEGIN RETURN NEW; END; $$";
7092        let s = parse(sql);
7093        let Statement::CreateFunction(f) = s else {
7094            panic!("expected CreateFunction");
7095        };
7096        assert_eq!(f.name, "noop");
7097        assert!(!f.or_replace);
7098        assert!(f.args.is_empty());
7099        assert!(matches!(f.returns, FunctionReturn::Trigger));
7100        assert_eq!(f.language, "plpgsql");
7101        let FunctionBody::PlPgSql(block) = f.body else {
7102            panic!("expected PlPgSql body");
7103        };
7104        assert_eq!(block.statements.len(), 1);
7105        assert!(matches!(
7106            block.statements[0],
7107            PlPgSqlStmt::Return(ReturnTarget::New)
7108        ));
7109    }
7110
7111    #[test]
7112    fn create_function_or_replace_with_assignment() {
7113        // mailrs-shape trigger function: NEW.col := to_tsvector(...);
7114        // RETURN NEW.
7115        let sql = "CREATE OR REPLACE FUNCTION update_sv() RETURNS TRIGGER LANGUAGE plpgsql AS $$
7116BEGIN
7117  NEW.search_vector := to_tsvector('english', NEW.subject);
7118  RETURN NEW;
7119END;
7120$$";
7121        let s = parse(sql);
7122        let Statement::CreateFunction(f) = s else {
7123            panic!("expected CreateFunction");
7124        };
7125        assert!(f.or_replace);
7126        let FunctionBody::PlPgSql(block) = &f.body else {
7127            panic!("expected PlPgSql body");
7128        };
7129        assert_eq!(block.statements.len(), 2);
7130        // First statement: NEW.search_vector := to_tsvector(...)
7131        let PlPgSqlStmt::Assign { target, .. } = &block.statements[0] else {
7132            panic!("expected Assign as first stmt");
7133        };
7134        match target {
7135            AssignTarget::NewColumn(c) => assert_eq!(c, "search_vector"),
7136            other => panic!("expected NEW.col, got {other:?}"),
7137        }
7138        // Second statement: RETURN NEW
7139        assert!(matches!(
7140            block.statements[1],
7141            PlPgSqlStmt::Return(ReturnTarget::New)
7142        ));
7143    }
7144
7145    #[test]
7146    fn create_trigger_after_insert_or_update() {
7147        let sql = "CREATE TRIGGER tg AFTER INSERT OR UPDATE ON messages FOR EACH ROW EXECUTE FUNCTION update_sv()";
7148        let s = parse(sql);
7149        let Statement::CreateTrigger(t) = s else {
7150            panic!("expected CreateTrigger");
7151        };
7152        assert_eq!(t.name, "tg");
7153        assert_eq!(t.table, "messages");
7154        assert_eq!(t.timing, TriggerTiming::After);
7155        assert_eq!(t.events, vec![TriggerEvent::Insert, TriggerEvent::Update]);
7156        assert_eq!(t.for_each, TriggerForEach::Row);
7157        assert_eq!(t.function, "update_sv");
7158    }
7159
7160    #[test]
7161    fn create_trigger_before_delete_execute_procedure_alias() {
7162        // PG also accepts the legacy `EXECUTE PROCEDURE` spelling.
7163        let sql =
7164            "CREATE TRIGGER guard BEFORE DELETE ON t FOR EACH ROW EXECUTE PROCEDURE block_delete()";
7165        let s = parse(sql);
7166        let Statement::CreateTrigger(t) = s else {
7167            panic!("expected CreateTrigger");
7168        };
7169        assert_eq!(t.timing, TriggerTiming::Before);
7170        assert_eq!(t.events, vec![TriggerEvent::Delete]);
7171    }
7172
7173    #[test]
7174    fn drop_trigger_if_exists_round_trips() {
7175        // No parser support for DROP TRIGGER yet — added in v7.12.5
7176        // alongside the broader DROP …{IF EXISTS} cleanup. The
7177        // AST + Display impls are in place so we round-trip via
7178        // construction:
7179        let s = Statement::DropTrigger {
7180            name: "tg".into(),
7181            table: "messages".into(),
7182            if_exists: true,
7183        };
7184        assert_eq!(s.to_string(), "DROP TRIGGER IF EXISTS tg ON messages");
7185    }
7186
7187    #[test]
7188    fn trigger_ddl_display_roundtrips_through_parser() {
7189        // CREATE TRIGGER + its referenced CREATE FUNCTION must
7190        // Display → parse → same AST (modulo PL/pgSQL body
7191        // formatting which is parser-canonicalised).
7192        for sql in [
7193            "CREATE TRIGGER tg AFTER INSERT ON t FOR EACH ROW EXECUTE FUNCTION f()",
7194            "CREATE TRIGGER tg2 BEFORE UPDATE OR DELETE ON t FOR EACH ROW EXECUTE FUNCTION g()",
7195        ] {
7196            let s = parse(sql);
7197            let printed = s.to_string();
7198            let again = parse_statement(&printed)
7199                .unwrap_or_else(|e| panic!("re-parse failed for {printed:?}: {e}"));
7200            assert_eq!(s, again, "round-trip mismatch for {sql:?}");
7201        }
7202    }
7203}