Skip to main content

sqlglot_rust/parser/
sql_parser.rs

1use crate::ast::*;
2use crate::errors::{Result, SqlglotError};
3use crate::tokens::{Token, TokenType, Tokenizer};
4
5/// Convert a token's `quote_char` into a `QuoteStyle`.
6fn quote_style_from_char(c: char) -> QuoteStyle {
7    match c {
8        '"' => QuoteStyle::DoubleQuote,
9        '`' => QuoteStyle::Backtick,
10        '[' => QuoteStyle::Bracket,
11        _ => QuoteStyle::None,
12    }
13}
14
15/// A recursive-descent SQL parser.
16///
17/// Supports CTEs (WITH), subqueries, UNION/INTERSECT/EXCEPT, CAST,
18/// window functions (OVER), EXISTS, EXTRACT, INTERVAL, and more.
19pub struct Parser {
20    tokens: Vec<Token>,
21    pos: usize,
22    /// Whether to preserve comments during parsing.
23    #[allow(dead_code)]
24    preserve_comments: bool,
25    /// Accumulated comments pending attachment to the next AST node.
26    pending_comments: Vec<String>,
27}
28
29impl Parser {
30    /// Create a new parser from a SQL string.
31    pub fn new(sql: &str) -> Result<Self> {
32        let mut tokenizer = Tokenizer::new(sql);
33        let tokens = tokenizer.tokenize()?;
34        Ok(Self {
35            tokens,
36            pos: 0,
37            preserve_comments: false,
38            pending_comments: Vec::new(),
39        })
40    }
41
42    /// Create a new parser that preserves SQL comments in the AST.
43    pub fn new_with_comments(sql: &str) -> Result<Self> {
44        let mut tokenizer = Tokenizer::with_comments(sql);
45        let tokens = tokenizer.tokenize()?;
46        Ok(Self {
47            tokens,
48            pos: 0,
49            preserve_comments: true,
50            pending_comments: Vec::new(),
51        })
52    }
53
54    // ── Comment helpers ────────────────────────────────────────────
55
56    /// Consume any comment tokens at the current position, accumulating
57    /// their text into `pending_comments`.
58    fn collect_comments(&mut self) {
59        while self.pos < self.tokens.len() {
60            match self.tokens[self.pos].token_type {
61                TokenType::LineComment | TokenType::BlockComment => {
62                    let token = &self.tokens[self.pos];
63                    self.pending_comments.push(token.value.clone());
64                    self.pos += 1;
65                }
66                _ => break,
67            }
68        }
69    }
70
71    /// Take all pending comments, leaving the buffer empty.
72    fn take_comments(&mut self) -> Vec<String> {
73        std::mem::take(&mut self.pending_comments)
74    }
75
76    // ── Token helpers ──────────────────────────────────────────────
77
78    fn peek(&self) -> &Token {
79        &self.tokens[self.pos.min(self.tokens.len() - 1)]
80    }
81
82    fn peek_type(&self) -> &TokenType {
83        &self.peek().token_type
84    }
85
86    fn advance(&mut self) -> &Token {
87        let token = &self.tokens[self.pos.min(self.tokens.len() - 1)];
88        if self.pos < self.tokens.len() {
89            self.pos += 1;
90        }
91        token
92    }
93
94    fn expect(&mut self, expected: TokenType) -> Result<Token> {
95        let token = self.peek().clone();
96        if token.token_type == expected {
97            self.advance();
98            Ok(token)
99        } else {
100            Err(SqlglotError::ParserError {
101                message: format!(
102                    "Expected {expected:?}, got {:?} ('{}') at line {} col {}",
103                    token.token_type, token.value, token.line, token.col
104                ),
105            })
106        }
107    }
108
109    fn match_token(&mut self, expected: TokenType) -> bool {
110        if self.peek().token_type == expected {
111            self.advance();
112            true
113        } else {
114            false
115        }
116    }
117
118    /// Check if the current token's uppercased value matches a keyword string.
119    fn check_keyword(&self, keyword: &str) -> bool {
120        self.peek().value.to_uppercase() == keyword
121    }
122
123    /// Check if the token at `current + offset` matches a keyword string.
124    fn check_keyword_offset(&self, keyword: &str, offset: usize) -> bool {
125        let idx = self.pos + offset;
126        if idx < self.tokens.len() {
127            self.tokens[idx].value.to_uppercase() == keyword
128        } else {
129            false
130        }
131    }
132
133    /// Match a keyword by string value (for multi-word context-sensitive keywords).
134    fn match_keyword(&mut self, keyword: &str) -> bool {
135        if self.check_keyword(keyword) {
136            self.advance();
137            true
138        } else {
139            false
140        }
141    }
142
143    /// Expect a keyword by string value, returning an error if not found.
144    fn expect_keyword(&mut self, keyword: &str) -> Result<()> {
145        if self.check_keyword(keyword) {
146            self.advance();
147            Ok(())
148        } else {
149            let token = self.peek().clone();
150            Err(SqlglotError::ParserError {
151                message: format!(
152                    "Expected keyword '{keyword}', got '{value}' at line {line} col {col}",
153                    value = token.value,
154                    line = token.line,
155                    col = token.col
156                ),
157            })
158        }
159    }
160
161    /// Reconstruct a single token's surface representation for raw command
162    /// preservation. String literals are wrapped in their original quotes;
163    /// identifiers may carry a quote_char from the tokenizer.
164    fn token_text(token: &Token) -> String {
165        match token.token_type {
166            TokenType::String => format!("'{}'", token.value.replace('\'', "''")),
167            TokenType::Identifier if token.quote_char != '\0' => {
168                let (l, r) = match token.quote_char {
169                    '[' => ('[', ']'),
170                    c => (c, c),
171                };
172                format!("{l}{}{r}", token.value)
173            }
174            _ => token.value.clone(),
175        }
176    }
177
178    /// Join a slice of tokens with whitespace tuned for SQL — no space
179    /// before `,` `)` `;` `.`, no space after `(` or `.`.
180    fn join_tokens_for_raw(tokens: &[Token]) -> String {
181        let mut out = String::new();
182        let mut prev_no_space_after = true; // suppress leading space
183        for t in tokens {
184            let no_space_before = matches!(
185                t.token_type,
186                TokenType::Comma
187                    | TokenType::RParen
188                    | TokenType::Semicolon
189                    | TokenType::Dot
190                    | TokenType::RBracket
191            );
192            if !out.is_empty() && !prev_no_space_after && !no_space_before {
193                out.push(' ');
194            }
195            out.push_str(&Self::token_text(t));
196            prev_no_space_after = matches!(
197                t.token_type,
198                TokenType::LParen | TokenType::Dot | TokenType::LBracket
199            );
200        }
201        out
202    }
203
204    /// Consume tokens up to (but not including) the next top-level `;` or EOF,
205    /// returning the raw text of the consumed tokens with whitespace
206    /// reconstructed by [`join_tokens_for_raw`]. Honors parenthesis depth so
207    /// embedded `;` inside `(...)` does not terminate the statement.
208    fn consume_raw_to_statement_end(&mut self) -> String {
209        let start = self.pos;
210        let mut depth: i32 = 0;
211        while self.pos < self.tokens.len() {
212            let tt = &self.tokens[self.pos].token_type;
213            match tt {
214                TokenType::Eof => break,
215                TokenType::Semicolon if depth == 0 => break,
216                TokenType::LParen | TokenType::LBracket => {
217                    depth += 1;
218                    self.pos += 1;
219                }
220                TokenType::RParen | TokenType::RBracket => {
221                    // A closing paren at depth 0 belongs to an enclosing
222                    // context (e.g. CTE body, subquery) — stop without
223                    // consuming it.
224                    if depth == 0 {
225                        break;
226                    }
227                    depth -= 1;
228                    self.pos += 1;
229                }
230                _ => self.pos += 1,
231            }
232        }
233        Self::join_tokens_for_raw(&self.tokens[start..self.pos])
234    }
235
236    /// Parse a comma-separated list of raw items inside an already-opened
237    /// parenthesized context. Stops at the matching `)` and returns each item
238    /// reconstructed from tokens.
239    fn parse_parenthesized_raw_items(&mut self) -> Result<Vec<String>> {
240        let mut items = Vec::new();
241
242        // Allow empty parens for tolerance.
243        if self.match_token(TokenType::RParen) {
244            return Ok(items);
245        }
246
247        loop {
248            let start = self.pos;
249            let mut paren_depth: i32 = 0;
250            let mut bracket_depth: i32 = 0;
251
252            while self.pos < self.tokens.len() {
253                match self.peek_type() {
254                    TokenType::Eof => break,
255                    TokenType::LParen => {
256                        paren_depth += 1;
257                        self.pos += 1;
258                    }
259                    TokenType::RParen => {
260                        if paren_depth == 0 && bracket_depth == 0 {
261                            break;
262                        }
263                        if paren_depth > 0 {
264                            paren_depth -= 1;
265                        }
266                        self.pos += 1;
267                    }
268                    TokenType::LBracket => {
269                        bracket_depth += 1;
270                        self.pos += 1;
271                    }
272                    TokenType::RBracket => {
273                        if bracket_depth > 0 {
274                            bracket_depth -= 1;
275                        }
276                        self.pos += 1;
277                    }
278                    TokenType::Comma if paren_depth == 0 && bracket_depth == 0 => break,
279                    _ => self.pos += 1,
280                }
281            }
282
283            if start == self.pos {
284                let token = self.peek().clone();
285                return Err(SqlglotError::ParserError {
286                    message: format!(
287                        "Expected expression inside parenthesized list, got '{}' at line {} col {}",
288                        token.value, token.line, token.col
289                    ),
290                });
291            }
292
293            items.push(Self::join_tokens_for_raw(&self.tokens[start..self.pos]));
294
295            if self.match_token(TokenType::Comma) {
296                continue;
297            }
298
299            self.expect(TokenType::RParen)?;
300            break;
301        }
302
303        Ok(items)
304    }
305
306    /// Helper for the dispatcher: consume one verb token (already known) and
307    /// then capture the entire tail as a [`CommandStatement`].
308    fn parse_command_kind(&mut self, kind: &str) -> Result<Statement> {
309        self.advance(); // consume the verb token
310        let body = self.consume_raw_to_statement_end();
311        Ok(Statement::Command(CommandStatement {
312            comments: vec![],
313            kind: kind.to_string(),
314            body,
315        }))
316    }
317
318    /// `COMMENT ON {TABLE|COLUMN|...} <name> IS '...'` — preserved as raw.
319    /// `COMMENT` can also appear inside `CREATE TABLE` column definitions and
320    /// in other positions; only the standalone DDL form lands here because
321    /// the dispatcher peeks at the *first* token.
322    fn parse_comment_on_command(&mut self) -> Result<Statement> {
323        // Look ahead for "COMMENT ON" — if not "ON", fall back to parser error
324        // (the COMMENT token would otherwise have been consumed inside an
325        // expression / column-def parser, not at statement boundary).
326        if self.peek_offset(1).map(|t| t.value.to_uppercase()) != Some("ON".to_string()) {
327            return Err(SqlglotError::UnexpectedToken {
328                token: self.peek().clone(),
329            });
330        }
331        self.advance(); // COMMENT
332        let body = self.consume_raw_to_statement_end();
333        Ok(Statement::Command(CommandStatement {
334            comments: vec![],
335            kind: "COMMENT".to_string(),
336            body,
337        }))
338    }
339
340    /// Returns `true` when the current Identifier token is a known
341    /// statement-starting verb that we preserve verbatim.
342    fn match_command_keyword(&self) -> bool {
343        let v = self.peek().value.to_uppercase();
344        matches!(
345            v.as_str(),
346            "GO" | "DECLARE"
347                | "LOAD"
348                | "REM"
349                | "REMARK"
350                | "RESET"
351                | "PRAGMA"
352                | "VACUUM"
353                | "REINDEX"
354                | "CALL"
355                | "LOCK"
356                | "UNLOCK"
357                | "CLUSTER"
358                | "REFRESH"
359                | "CHECKPOINT"
360                | "LISTEN"
361                | "NOTIFY"
362                | "PREPARE"
363                | "EXECUTE"
364                | "DEALLOCATE"
365                | "DISCARD"
366                | "COPY"
367                | "ATTACH"
368                | "DETACH"
369                | "COMMENT"
370                | "DESCRIBE"
371                | "DESC"
372                | "OPTIMIZE"
373                | "SYSTEM"
374                | "KILL"
375                | "FLUSH"
376                | "RESTORE"
377                | "BACKUP"
378                | "EXCHANGE"
379                | "RENAME"
380                | "WATCH"
381                | "MSCK"
382                | "UNLOAD"
383                | "ASSERT"
384                | "REPAIR"
385                | "PURGE"
386                | "ABORT"
387                | "VALIDATE"
388                | "MOVE"
389                | "CLOSE"
390                | "FETCH"
391                | "REPLICATE"
392                | "START"
393                | "RAISE"
394                | "UNDROP"
395                | "EXCEPTION"
396                | "CONNECT"
397                | "DISCONNECT"
398                | "SEND"
399                | "ENABLE"
400                | "DISABLE"
401                | "REPLAY"
402                | "SYNCHRONIZE"
403                | "CHECK"
404                | "REPORT"
405                | "BIND"
406                | "UNBIND"
407                | "INCLUDE"
408                | "EXPORT"
409                | "IMPORT"
410                | "ADMIN"
411                | "SPLIT"
412                | "TRACE"
413                | "RESUME"
414                | "SUSPEND"
415                | "ROUTE"
416                | "EMIT"
417                | "FOR"
418                | "WHILE"
419                | "LOOP"
420                | "RETURN"
421                | "REPEAT"
422                | "EXIT"
423                | "LEAVE"
424                | "ITERATE"
425                | "CONTINUE"
426                | "GOTO"
427                | "RAISERROR"
428                | "PRINT"
429                | "WAITFOR"
430                | "TRUNCATE"
431                | "DO"
432                | "CONNECTION"
433                | "ELSEIF"
434                | "ELSIF"
435                | "UNTIL"
436                | "CONNECT_BY_ROOT"
437                | "APPLY"
438                | "EXEC"
439                | "OPEN"
440                | "REVERT"
441                | "DEALLOC"
442                | "GRANT"
443                | "REVOKE"
444                | "DENY"
445                | "UNSET"
446                | "USE"
447                | "PRELOAD"
448                | "RECOMPRESS"
449                | "COMPUTE"
450                | "INVALIDATE"
451                | "ANALYSE"
452                | "BOOTSTRAP"
453                | "LATCH"
454                | "UNLATCH"
455                | "SETOF"
456                | "CHECKSUM"
457                | "DELIMITER"
458                | "GET"
459                | "HELP"
460                | "BINLOG"
461                | "RELOAD"
462                | "PARSE"
463                | "BUFFER"
464                | "BUILDS"
465                | "COMPACT"
466                | "FREEZE"
467                | "UNFREEZE"
468                | "BORROW"
469                | "UNLISTEN"
470                | "REPACK"
471                | "RESIGNAL"
472                | "SIGNAL"
473                | "THROW"
474                | "DBCC"
475                | "SUMMARIZE"
476                | "BATCH"
477        )
478    }
479
480    /// Variant of [`parse_command_kind`] for verbs that arrive as an
481    /// Identifier token (no dedicated TokenType).
482    fn parse_command_from_identifier(&mut self) -> Result<Statement> {
483        let verb = self.peek().value.to_uppercase();
484        self.advance();
485        let body = self.consume_raw_to_statement_end();
486        Ok(Statement::Command(CommandStatement {
487            comments: vec![],
488            kind: verb,
489            body,
490        }))
491    }
492
493    /// Look at the token `offset` positions ahead of the current one,
494    /// returning `None` if past EOF.
495    fn peek_offset(&self, offset: usize) -> Option<&Token> {
496        self.tokens.get(self.pos + offset)
497    }
498
499    /// Look ahead past a run of `(` tokens to see if a `SELECT`, `WITH`, or
500    /// `EXPLAIN` keyword starts inside. Used by the subquery parser to detect
501    /// `((SELECT …))` and similar shapes.
502    fn peek_starts_subquery_through_parens(&self) -> bool {
503        let mut i = self.pos;
504        while i < self.tokens.len() && self.tokens[i].token_type == TokenType::LParen {
505            i += 1;
506        }
507        i < self.tokens.len()
508            && matches!(
509                self.tokens[i].token_type,
510                TokenType::Select | TokenType::With | TokenType::Explain | TokenType::From
511            )
512    }
513
514    /// Helper to check if current token is an identifier or keyword that can serve as a name.
515    fn is_name_token(&self) -> bool {
516        matches!(
517            self.peek_type(),
518            TokenType::Identifier
519                | TokenType::All
520                | TokenType::Year
521                | TokenType::Month
522                | TokenType::Day
523                | TokenType::Hour
524                | TokenType::Minute
525                | TokenType::Second
526                | TokenType::Interval
527                | TokenType::Key
528                | TokenType::Filter
529                | TokenType::First
530                | TokenType::Next
531                | TokenType::Only
532                | TokenType::Respect
533                | TokenType::Epoch
534                | TokenType::Schema
535                | TokenType::Database
536                | TokenType::View
537                | TokenType::Collate
538                | TokenType::Comment
539                | TokenType::Left
540                | TokenType::Right
541                | TokenType::Replace
542                | TokenType::Cube
543                | TokenType::Rollup
544                | TokenType::Grouping
545                | TokenType::Pivot
546                | TokenType::Unpivot
547                | TokenType::Sets
548                | TokenType::Range
549                | TokenType::Conflict
550                | TokenType::Unnest
551                | TokenType::Text
552                | TokenType::Show
553                | TokenType::Describe
554                | TokenType::Analyze
555                | TokenType::Index
556                | TokenType::Cast
557                | TokenType::Group
558                | TokenType::Order
559                | TokenType::Explain
560                | TokenType::Table
561                | TokenType::Offset
562                | TokenType::Merge
563                | TokenType::Nulls
564                | TokenType::Temp
565                | TokenType::Temporary
566                | TokenType::Rows
567                | TokenType::Partition
568                | TokenType::Any
569                | TokenType::Escape
570        )
571    }
572
573    /// Consume a name token (identifier or unreserved keyword used as identifier).
574    fn expect_name(&mut self) -> Result<String> {
575        let (name, _) = self.expect_name_with_quote()?;
576        Ok(name)
577    }
578
579    /// If the current token is `@` / `:` / `Parameter` immediately followed by
580    /// a name token (no whitespace tracking — they are adjacent in the token
581    /// stream), consume both and return them as a combined alias name.
582    /// Used to accept auto-generated aliases like `AS @rpm` or `AS :minutes`
583    /// without changing parameter-marker handling elsewhere.
584    fn try_parse_prefixed_alias(&mut self) -> Result<Option<(String, QuoteStyle)>> {
585        let prefix = match self.peek_type() {
586            TokenType::AtSign => '@',
587            TokenType::Colon => ':',
588            // Standalone Parameter token (`$` not absorbed into an identifier).
589            TokenType::Parameter if self.peek().value == "$" => '$',
590            _ => return Ok(None),
591        };
592        let next = match self.peek_offset(1) {
593            Some(t) => t,
594            None => return Ok(None),
595        };
596        let is_name_like = matches!(
597            next.token_type,
598            TokenType::Identifier
599                | TokenType::Year
600                | TokenType::Month
601                | TokenType::Day
602                | TokenType::Hour
603                | TokenType::Minute
604                | TokenType::Second
605                | TokenType::Key
606                | TokenType::Filter
607                | TokenType::First
608                | TokenType::Next
609                | TokenType::Only
610                | TokenType::Schema
611                | TokenType::Database
612                | TokenType::View
613                | TokenType::Collate
614                | TokenType::Comment
615                | TokenType::Replace
616                | TokenType::Text
617                | TokenType::Show
618                | TokenType::Describe
619                | TokenType::Analyze
620                | TokenType::Index
621                | TokenType::Cast
622                | TokenType::Group
623                | TokenType::Order
624                | TokenType::Range
625        );
626        if !is_name_like {
627            return Ok(None);
628        }
629        self.advance(); // consume prefix
630        let name_tok = self.advance().clone();
631        let mut combined = String::with_capacity(name_tok.value.len() + 1);
632        combined.push(prefix);
633        combined.push_str(&name_tok.value);
634        Ok(Some((combined, quote_style_from_char(name_tok.quote_char))))
635    }
636
637    /// Like `expect_name` but also returns the quote style of the token.
638    fn expect_name_with_quote(&mut self) -> Result<(String, QuoteStyle)> {
639        if self.is_name_token() {
640            let token = self.advance().clone();
641            let qs = quote_style_from_char(token.quote_char);
642            let mut name = token.value.clone();
643            // Append trailing `${...}` template variables so identifiers
644            // like `t1_${type}` round-trip as a single name token.
645            while matches!(self.peek_type(), TokenType::Parameter)
646                && self.peek().value.starts_with("${")
647            {
648                name.push_str(&self.advance().value.clone());
649            }
650            return Ok((name, qs));
651        }
652        // Leading `${...}` template variable as a name (rare).
653        if matches!(self.peek_type(), TokenType::Parameter) && self.peek().value.starts_with("${") {
654            let mut name = self.advance().value.clone();
655            // Only fuse plain identifiers or further `${...}` segments —
656            // never reserved keywords (Order, By, etc.) even though those
657            // tokenize as name-like, or the template would swallow the
658            // surrounding clause.
659            while matches!(self.peek_type(), TokenType::Identifier)
660                || (matches!(self.peek_type(), TokenType::Parameter)
661                    && self.peek().value.starts_with("${"))
662            {
663                name.push_str(&self.advance().value.clone());
664            }
665            return Ok((name, QuoteStyle::None));
666        }
667        // ClickHouse typed placeholder used as an identifier:
668        // `{db:Identifier}`, `{tbl:Identifier}`. Accept anywhere a name is
669        // expected so `FROM {db:Identifier}.t` and friends parse.
670        if matches!(self.peek_type(), TokenType::Parameter) && self.peek().value.starts_with('{') {
671            let name = self.advance().value.clone();
672            return Ok((name, QuoteStyle::None));
673        }
674        // Also accept any keyword-like identifier
675        let token = self.peek().clone();
676        if matches!(
677            token.token_type,
678            TokenType::Identifier
679                | TokenType::Int
680                | TokenType::Integer
681                | TokenType::BigInt
682                | TokenType::SmallInt
683                | TokenType::TinyInt
684                | TokenType::Float
685                | TokenType::Double
686                | TokenType::Decimal
687                | TokenType::Numeric
688                | TokenType::Real
689                | TokenType::Varchar
690                | TokenType::Char
691                | TokenType::Text
692                | TokenType::Boolean
693                | TokenType::Date
694                | TokenType::Timestamp
695                | TokenType::TimestampTz
696                | TokenType::Time
697                | TokenType::Interval
698                | TokenType::Blob
699                | TokenType::Bytea
700                | TokenType::Json
701                | TokenType::Jsonb
702                | TokenType::Uuid
703                | TokenType::Array
704                | TokenType::Map
705                | TokenType::Struct
706                | TokenType::Offset
707                | TokenType::Limit
708                | TokenType::Default
709                | TokenType::Begin
710                | TokenType::Recursive
711                | TokenType::Ignore
712                | TokenType::Pivot
713                | TokenType::Unpivot
714                | TokenType::Rows
715                | TokenType::Range
716                | TokenType::Values
717        ) {
718            let t = self.advance().clone();
719            let qs = quote_style_from_char(t.quote_char);
720            Ok((t.value.clone(), qs))
721        } else {
722            Err(SqlglotError::ParserError {
723                message: format!(
724                    "Expected identifier, got {:?} ('{}') at line {} col {}",
725                    token.token_type, token.value, token.line, token.col
726                ),
727            })
728        }
729    }
730
731    // ── Top-level parsing ──────────────────────────────────────────
732
733    /// Parse a single SQL statement.
734    pub fn parse_statement(&mut self) -> Result<Statement> {
735        self.collect_comments();
736        let stmt = self.parse_statement_inner()?;
737        // ClickHouse trailing `WITH TOTALS` / `WITH TIES` / `WITH ROLLUP` /
738        // `WITH CUBE` postfix at the end of a SELECT — these are query-level
739        // modifiers we don't model; swallow them so the statement closes.
740        if matches!(self.peek_type(), TokenType::With) {
741            let after = self.peek_offset(1);
742            let is_postfix_modifier = after
743                .map(|t| {
744                    matches!(
745                        t.token_type,
746                        TokenType::Identifier | TokenType::Cube | TokenType::Rollup
747                    ) && matches!(
748                        t.value.to_uppercase().as_str(),
749                        "TOTALS" | "TIES" | "FILL" | "ROLLUP" | "CUBE"
750                    )
751                })
752                .unwrap_or(false);
753            if is_postfix_modifier {
754                self.advance();
755                self.advance();
756                // Swallow any chained option words up to `;`/EOF/FORMAT/SETTINGS.
757                while !matches!(self.peek_type(), TokenType::Semicolon | TokenType::Eof) {
758                    if self.is_name_token()
759                        && matches!(
760                            self.peek().value.to_uppercase().as_str(),
761                            "SETTINGS" | "FORMAT"
762                        )
763                    {
764                        break;
765                    }
766                    self.advance();
767                }
768            }
769        }
770        // ClickHouse trailing `SETTINGS k=v, k=v` clause / `FORMAT name`
771        // (statement-level). Swallow up to the next `;` or EOF.
772        if self.is_name_token()
773            && matches!(
774                self.peek().value.to_uppercase().as_str(),
775                "SETTINGS" | "FORMAT"
776            )
777        {
778            while !matches!(self.peek_type(), TokenType::Semicolon | TokenType::Eof) {
779                self.advance();
780            }
781        }
782        // BigQuery pipe-syntax: `<query> |> WHERE … |> AGGREGATE … |> …`.
783        // The `|>` operator chains query stages. We don't model them; swallow
784        // the entire chain to end of statement so the leading query stands.
785        if self.peek_type() == &TokenType::BitwiseOr
786            && self
787                .peek_offset(1)
788                .map(|t| matches!(t.token_type, TokenType::Gt))
789                .unwrap_or(false)
790        {
791            while !matches!(self.peek_type(), TokenType::Semicolon | TokenType::Eof) {
792                self.advance();
793            }
794        }
795        // Consume trailing semicolons
796        while self.match_token(TokenType::Semicolon) {}
797        Ok(stmt)
798    }
799
800    fn parse_statement_inner(&mut self) -> Result<Statement> {
801        self.collect_comments();
802        let comments = self.take_comments();
803        // MySQL / PSM labeled block: `mylabel: BEGIN … END mylabel`.
804        // Swallow the leading `<name>:` so the block dispatches normally.
805        if self.is_name_token()
806            && matches!(
807                self.peek_offset(1).map(|t| &t.token_type),
808                Some(TokenType::Colon)
809            )
810        {
811            let saved = self.pos;
812            self.advance();
813            self.advance();
814            // Only treat as a label if a known block keyword follows;
815            // otherwise rewind so we don't misinterpret `alias: type`.
816            let is_block = matches!(
817                self.peek_type(),
818                TokenType::Begin | TokenType::If | TokenType::Case
819            ) || self.check_keyword("WHILE")
820                || self.check_keyword("LOOP")
821                || self.check_keyword("FOR")
822                || self.check_keyword("REPEAT");
823            if !is_block {
824                self.pos = saved;
825            }
826        }
827        let mut stmt = match self.peek_type() {
828            TokenType::With => self.parse_with_statement(),
829            TokenType::Select => {
830                let select = self.parse_select_body(vec![])?;
831                self.maybe_parse_set_operation(Statement::Select(select))
832            }
833            TokenType::LParen => {
834                // Could be a parenthesized SELECT / VALUES / TABLE form.
835                let saved_pos = self.pos;
836                self.advance(); // consume '('
837                if matches!(
838                    self.peek_type(),
839                    TokenType::Select
840                        | TokenType::With
841                        | TokenType::From
842                        | TokenType::Values
843                        | TokenType::Table
844                        | TokenType::LParen
845                ) {
846                    let inner = self.parse_statement_inner()?;
847                    self.expect(TokenType::RParen)?;
848                    self.maybe_parse_set_operation(inner)
849                } else {
850                    self.pos = saved_pos;
851                    Err(SqlglotError::ParserError {
852                        message: "Expected statement".into(),
853                    })
854                }
855            }
856            TokenType::Insert => self.parse_insert().map(Statement::Insert),
857            TokenType::Replace => self.parse_insert().map(Statement::Insert),
858            TokenType::Update => self.parse_update().map(Statement::Update),
859            TokenType::Delete => self.parse_delete().map(Statement::Delete),
860            TokenType::Merge => self.parse_merge().map(Statement::Merge),
861            TokenType::Create => self.parse_create_or_command(),
862            TokenType::Drop => self.parse_drop(),
863            TokenType::Alter => self.parse_alter_or_command(),
864            TokenType::Truncate => {
865                let saved = self.pos;
866                match self.parse_truncate() {
867                    Ok(t) => {
868                        // Tolerate Oracle-flavored trailing modifiers on
869                        // TRUNCATE (PURGE, DROP STORAGE, REUSE STORAGE,
870                        // KEEP …, CASCADE, etc.) by swallowing all trailing
871                        // tokens up to the statement boundary.
872                        while !matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon) {
873                            self.advance();
874                        }
875                        Ok(Statement::Truncate(t))
876                    }
877                    Err(_) => {
878                        self.pos = saved;
879                        self.parse_command_kind("TRUNCATE")
880                    }
881                }
882            }
883            TokenType::Begin | TokenType::Commit | TokenType::Rollback | TokenType::Savepoint => {
884                // PL/pgSQL / MySQL stored-procedure block: `BEGIN <stmt> …
885                // END`. If `BEGIN` is followed by anything that isn't an
886                // obvious transaction modifier, capture the whole block as
887                // a command so the surrounding parse completes.
888                if matches!(self.peek_type(), TokenType::Begin) {
889                    let next = self.peek_offset(1).map(|t| &t.token_type);
890                    let is_psm_block = matches!(
891                        next,
892                        Some(TokenType::Identifier)
893                            | Some(TokenType::If)
894                            | Some(TokenType::Case)
895                            | Some(TokenType::Select)
896                            | Some(TokenType::Insert)
897                            | Some(TokenType::Update)
898                            | Some(TokenType::Delete)
899                    );
900                    if is_psm_block {
901                        return self.parse_command_kind("BEGIN");
902                    }
903                }
904                self.parse_transaction().map(Statement::Transaction)
905            }
906            TokenType::Explain => self.parse_explain().map(Statement::Explain),
907            TokenType::Use => self.parse_use().map(Statement::Use),
908            // Raw-tail command statements: SET / SHOW / DESCRIBE / ANALYZE
909            // (when standalone, not as part of EXPLAIN) / COMMENT ON ... .
910            // We preserve the verb plus the entire remainder up to `;` or EOF
911            // so the AST round-trips even though we don't model these in detail.
912            TokenType::Set => self.parse_command_kind("SET"),
913            TokenType::Show => self.parse_command_kind("SHOW"),
914            TokenType::Describe => self.parse_command_kind("DESCRIBE"),
915            // `DESC <name>` is a Hive/MySQL synonym for DESCRIBE. The lone
916            // `Desc` token also appears mid-statement (ORDER BY x DESC), so
917            // we only treat it as a statement when at the very start.
918            TokenType::Desc => self.parse_command_kind("DESC"),
919            // Hive multi-insert: `FROM tbl INSERT OVERWRITE TABLE x SELECT ...`
920            // [INSERT OVERWRITE TABLE y SELECT ...]+. Capture the whole thing
921            // as a raw command body so it round-trips.
922            TokenType::From => {
923                // Hive `FROM tbl INSERT OVERWRITE TABLE x …` / `FROM tbl
924                // SELECT cols`. DuckDB implicit SELECT: `FROM tbl …`. Try
925                // the structured DuckDB FROM-first parse only when there is
926                // no INSERT/SELECT marker at the top paren level; otherwise
927                // capture as a raw command so it round-trips. Fall back to
928                // command capture on parse failure as well.
929                let mut i = self.pos + 1;
930                let mut depth = 0i32;
931                let mut hive = false;
932                while i < self.tokens.len() {
933                    match &self.tokens[i].token_type {
934                        TokenType::Eof | TokenType::Semicolon => break,
935                        TokenType::LParen => depth += 1,
936                        TokenType::RParen => {
937                            if depth == 0 {
938                                break;
939                            }
940                            depth -= 1;
941                        }
942                        TokenType::Insert | TokenType::Select if depth == 0 => {
943                            hive = true;
944                            break;
945                        }
946                        _ => {}
947                    }
948                    i += 1;
949                }
950                if hive {
951                    self.parse_command_kind("FROM")
952                } else {
953                    let saved_from = self.pos;
954                    match self.parse_select_body(vec![]) {
955                        Ok(select) => self.maybe_parse_set_operation(Statement::Select(select)),
956                        Err(_) => {
957                            self.pos = saved_from;
958                            self.parse_command_kind("FROM")
959                        }
960                    }
961                }
962            }
963            TokenType::Analyze => self.parse_command_kind("ANALYZE"),
964            TokenType::Check => self.parse_command_kind("CHECK"),
965            TokenType::Comment => self.parse_comment_on_command(),
966            TokenType::Grant => self.parse_command_kind("GRANT"),
967            TokenType::Revoke => self.parse_command_kind("REVOKE"),
968            // Procedural / control-flow statements (Spark, MySQL stored
969            // procs, PL/SQL, T-SQL): IF / FOR / WHILE / LOOP / CASE blocks
970            // and the matching ELSE / END / WHEN tokens at statement start.
971            // Capture verbatim so the AST round-trips.
972            TokenType::If => self.parse_command_kind("IF"),
973            TokenType::Else => self.parse_command_kind("ELSE"),
974            TokenType::End => self.parse_command_kind("END"),
975            TokenType::Case => self.parse_command_kind("CASE"),
976            TokenType::When => self.parse_command_kind("WHEN"),
977            TokenType::Then => self.parse_command_kind("THEN"),
978            TokenType::Do => self.parse_command_kind("DO"),
979            // Spark: `TABLE name` and `TABLE name |> …` are SELECT-equivalent
980            // shorthand. Capture verbatim so the AST round-trips.
981            TokenType::Table => self.parse_command_kind("TABLE"),
982            TokenType::Values => self.parse_command_kind("VALUES"),
983            // DuckDB SQL-shorthand: `PIVOT tbl ON col USING agg(...)` and
984            // `UNPIVOT tbl ON col INTO ...`. Preserve verbatim.
985            TokenType::Pivot => self.parse_command_kind("PIVOT"),
986            TokenType::Unpivot => self.parse_command_kind("UNPIVOT"),
987            // PG cursor verbs: FETCH, MOVE, CLOSE.
988            TokenType::Fetch => self.parse_command_kind("FETCH"),
989            // Vendor-specific verbs that tokenize as plain identifiers:
990            //   GO (T-SQL batch separator), DECLARE (T-SQL/PL-pgSQL),
991            //   LOAD (PG / MySQL extensions), REM / REMARK (SQL*Plus),
992            //   RESET / PRAGMA / VACUUM / REINDEX (PG / SQLite), CALL (PSM).
993            TokenType::Identifier if self.match_command_keyword() => {
994                self.parse_command_from_identifier()
995            }
996            // PL/pgSQL / MySQL stored-procedure assignment `var := expr` or
997            // `var = expr` at statement position. Preserve verbatim.
998            TokenType::Identifier
999                if matches!(
1000                    self.peek_offset(1).map(|t| &t.token_type),
1001                    Some(TokenType::Colon)
1002                ) && matches!(
1003                    self.peek_offset(2).map(|t| &t.token_type),
1004                    Some(TokenType::Eq)
1005                ) =>
1006            {
1007                self.parse_command_kind("ASSIGN")
1008            }
1009            // PL/SQL / PL/pgSQL variable declaration at top level:
1010            //   `name TYPE [:= default]`. Some corpora split DECLARE blocks
1011            //   into individual lines; treat these as opaque commands.
1012            //   Heuristic: <identifier> followed by either a data-type
1013            //   token, or an identifier that looks type-like (uppercase
1014            //   keyword such as NUMBER/VARCHAR2/BOOLEAN/PLS_INTEGER/etc.).
1015            TokenType::Identifier
1016                if self
1017                    .peek_offset(1)
1018                    .map(|t| {
1019                        self.is_data_type_token_kind(&t.token_type)
1020                            || (matches!(t.token_type, TokenType::Identifier)
1021                                && matches!(
1022                                    t.value.to_uppercase().as_str(),
1023                                    "NUMBER"
1024                                        | "VARCHAR2"
1025                                        | "NVARCHAR2"
1026                                        | "PLS_INTEGER"
1027                                        | "BINARY_INTEGER"
1028                                        | "ROWID"
1029                                        | "UROWID"
1030                                        | "CLOB"
1031                                        | "NCLOB"
1032                                        | "BFILE"
1033                                        | "LONG"
1034                                        | "RAW"
1035                                        | "XMLTYPE"
1036                                        | "RECORD"
1037                                ))
1038                            || matches!(t.token_type, TokenType::Percent | TokenType::Percent2)
1039                    })
1040                    .unwrap_or(false)
1041                    && self
1042                        .peek_offset(2)
1043                        .map(|t| {
1044                            // Confirm declaration shape: trailing `:=`,
1045                            // `%TYPE`/`%ROWTYPE`, semicolon, EOF, or
1046                            // `(precision)` parenthesised type modifier.
1047                            matches!(
1048                                t.token_type,
1049                                TokenType::Colon
1050                                    | TokenType::Semicolon
1051                                    | TokenType::Eof
1052                                    | TokenType::Percent
1053                                    | TokenType::Percent2
1054                                    | TokenType::LParen
1055                            ) || matches!(t.token_type, TokenType::Identifier)
1056                                && matches!(
1057                                    t.value.to_uppercase().as_str(),
1058                                    "NOT" | "DEFAULT" | "CONSTANT"
1059                                )
1060                        })
1061                        .unwrap_or(true) =>
1062            {
1063                self.parse_command_kind("PLSQL_DECL")
1064            }
1065            _ => Err(SqlglotError::UnexpectedToken {
1066                token: self.peek().clone(),
1067            }),
1068        }?;
1069        if !comments.is_empty() {
1070            attach_comments_to_statement(&mut stmt, comments);
1071        }
1072        Ok(stmt)
1073    }
1074
1075    /// Parse multiple statements separated by semicolons.
1076    pub fn parse_statements(&mut self) -> Result<Vec<Statement>> {
1077        let mut stmts = Vec::new();
1078        while !matches!(self.peek_type(), TokenType::Eof) {
1079            while self.match_token(TokenType::Semicolon) {}
1080            if matches!(self.peek_type(), TokenType::Eof) {
1081                break;
1082            }
1083            stmts.push(self.parse_statement()?);
1084            // ClickHouse trailing `FORMAT <name>` after a statement is a
1085            // client-side output directive, not part of the AST. Swallow
1086            // it (and any whitespace-separated payload up to the next
1087            // semicolon / EOF) so the statement still parses.
1088            if self.peek().value.eq_ignore_ascii_case("FORMAT") {
1089                let saved = self.pos;
1090                self.advance();
1091                if self.is_name_token() {
1092                    self.advance();
1093                    while !matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon) {
1094                        self.advance();
1095                    }
1096                } else {
1097                    self.pos = saved;
1098                }
1099            }
1100        }
1101        Ok(stmts)
1102    }
1103
1104    // ── WITH / CTE parsing ─────────────────────────────────────────
1105
1106    fn parse_with_statement(&mut self) -> Result<Statement> {
1107        self.expect(TokenType::With)?;
1108        let recursive = self.match_token(TokenType::Recursive);
1109
1110        // T-SQL `WITH XMLNAMESPACES ('uri' AS prefix [, ...]) <stmt>`. The
1111        // XML namespaces are not modeled in the AST; swallow the keyword
1112        // and its parenthesized binding list opaquely so the surrounding
1113        // SELECT / INSERT / UPDATE / DELETE / MERGE parses cleanly.
1114        if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("XMLNAMESPACES") {
1115            self.advance(); // XMLNAMESPACES
1116            if self.match_token(TokenType::LParen) {
1117                let mut depth = 1_i32;
1118                while depth > 0 && !matches!(self.peek_type(), TokenType::Eof) {
1119                    match self.peek_type() {
1120                        TokenType::LParen => depth += 1,
1121                        TokenType::RParen => depth -= 1,
1122                        _ => {}
1123                    }
1124                    self.advance();
1125                }
1126            }
1127            return self.parse_with_body(vec![]);
1128        }
1129
1130        // ClickHouse scalar-binding form: `WITH (expr) AS name [, ...] SELECT …`
1131        // (and the symmetric `WITH expr AS name`). Detect by peeking for a
1132        // `<expr> AS <name>` pattern rather than the canonical `<name> AS
1133        // (select …)`. We swallow these bindings — they aren't modeled as
1134        // CTEs — then fall through to the main query.
1135        if self.is_clickhouse_scalar_with() {
1136            loop {
1137                let _ = self.parse_expr()?;
1138                self.expect(TokenType::As)?;
1139                // The binding name may use a data-type keyword (`Uuid`,
1140                // `Text`, etc.) — accept any single token that isn't a
1141                // structural delimiter so the loop advances.
1142                if self.is_name_token() || self.is_data_type_token() {
1143                    self.advance();
1144                } else if !matches!(
1145                    self.peek_type(),
1146                    TokenType::Comma
1147                        | TokenType::Eof
1148                        | TokenType::Semicolon
1149                        | TokenType::Select
1150                        | TokenType::Insert
1151                        | TokenType::Update
1152                        | TokenType::Delete
1153                        | TokenType::Merge
1154                ) {
1155                    self.advance();
1156                }
1157                if !self.match_token(TokenType::Comma) {
1158                    break;
1159                }
1160                // The next binding might still be `name AS (select …)`; if so,
1161                // fall back to the canonical CTE parser for the remainder.
1162                if !self.is_clickhouse_scalar_with() {
1163                    let mut ctes = vec![self.parse_cte(recursive)?];
1164                    while self.match_token(TokenType::Comma) {
1165                        ctes.push(self.parse_cte(recursive)?);
1166                    }
1167                    return self.parse_with_body(ctes);
1168                }
1169            }
1170            return self.parse_with_body(vec![]);
1171        }
1172
1173        let mut ctes = vec![self.parse_cte(recursive)?];
1174        while self.match_token(TokenType::Comma) {
1175            ctes.push(self.parse_cte(recursive)?);
1176        }
1177        // PostgreSQL recursive-query SEARCH / CYCLE clauses appear between
1178        // the last CTE and the main query body. Swallow them opaquely.
1179        // Forms:
1180        //   SEARCH { DEPTH | BREADTH } FIRST BY <col_list> SET <col>
1181        //   CYCLE <col_list> SET <col> [TO <val> DEFAULT <val>] USING <col>
1182        loop {
1183            let saved = self.pos;
1184            if self.match_keyword("SEARCH") {
1185                let _ = self.match_keyword("DEPTH") || self.match_keyword("BREADTH");
1186                let _ = self.match_keyword("FIRST");
1187                let _ = self.match_token(TokenType::By);
1188                // Swallow tokens until SET or end-of-search clause.
1189                while !matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon)
1190                    && !self.check_keyword("SET")
1191                {
1192                    self.advance();
1193                }
1194                if self.match_keyword("SET") {
1195                    let _ = self.is_name_token() && {
1196                        self.advance();
1197                        true
1198                    };
1199                }
1200                continue;
1201            }
1202            if self.check_keyword("CYCLE") {
1203                self.advance();
1204                while !matches!(
1205                    self.peek_type(),
1206                    TokenType::Select
1207                        | TokenType::Insert
1208                        | TokenType::Update
1209                        | TokenType::Delete
1210                        | TokenType::Merge
1211                        | TokenType::With
1212                        | TokenType::Eof
1213                        | TokenType::Semicolon
1214                ) {
1215                    self.advance();
1216                }
1217                continue;
1218            }
1219            self.pos = saved;
1220            break;
1221        }
1222        self.parse_with_body(ctes)
1223    }
1224
1225    /// Returns true if the current token sequence looks like a ClickHouse
1226    /// scalar `WITH expr AS name` rather than a canonical `name AS (select …)`
1227    /// CTE binding. Used by [`parse_with_statement`] to switch parsing modes.
1228    fn is_clickhouse_scalar_with(&self) -> bool {
1229        // Canonical CTE binding starts with `<name>` then either `(` (column
1230        // list) or `AS`. Anything else — a parenthesized expression, a number,
1231        // a string, a function call, an operator — must be the scalar form.
1232        match self.peek_type() {
1233            TokenType::LParen => true,
1234            TokenType::LBracket => true,
1235            TokenType::Number | TokenType::String | TokenType::HexString => true,
1236            t if matches!(t, TokenType::Minus | TokenType::Plus) => true,
1237            _ => {
1238                // Plain identifier followed by anything other than `(` or `AS`
1239                // also indicates the scalar form (e.g. `WITH x + 1 AS y`).
1240                if self.is_name_token() {
1241                    let next = self.peek_offset(1).map(|t| &t.token_type);
1242                    match next {
1243                        Some(TokenType::LParen) => {
1244                            // `name(...)` is canonical column-list form only
1245                            // if the body is a `name [, name]*` followed by
1246                            // `) AS`. Otherwise (function call like
1247                            // `arrayJoin([...])`) it's the scalar form.
1248                            !self.parens_are_name_list_then_as(1)
1249                        }
1250                        Some(TokenType::As) => false,
1251                        _ => true,
1252                    }
1253                } else {
1254                    false
1255                }
1256            }
1257        }
1258    }
1259
1260    /// Starting at `tokens[self.pos + offset]` (which must be `(`), check
1261    /// whether the body is a comma-separated identifier list followed by
1262    /// `)` and then `AS` — the shape of a CTE column-list binding.
1263    fn parens_are_name_list_then_as(&self, offset: usize) -> bool {
1264        let mut i = self.pos + offset;
1265        if self.tokens.get(i).map(|t| &t.token_type) != Some(&TokenType::LParen) {
1266            return false;
1267        }
1268        i += 1;
1269        loop {
1270            // Accept any name-like token in the column list, not just plain
1271            // identifiers — DuckDB CTEs frequently use unreserved keywords
1272            // like `key`, `value`, `order`, `range` as column names.
1273            let is_name_like = matches!(
1274                self.tokens.get(i).map(|t| &t.token_type),
1275                Some(TokenType::Identifier)
1276                    | Some(TokenType::Key)
1277                    | Some(TokenType::Year)
1278                    | Some(TokenType::Month)
1279                    | Some(TokenType::Day)
1280                    | Some(TokenType::Hour)
1281                    | Some(TokenType::Minute)
1282                    | Some(TokenType::Second)
1283                    | Some(TokenType::Filter)
1284                    | Some(TokenType::First)
1285                    | Some(TokenType::Next)
1286                    | Some(TokenType::Only)
1287                    | Some(TokenType::Schema)
1288                    | Some(TokenType::Database)
1289                    | Some(TokenType::View)
1290                    | Some(TokenType::Collate)
1291                    | Some(TokenType::Comment)
1292                    | Some(TokenType::Replace)
1293                    | Some(TokenType::Text)
1294                    | Some(TokenType::Show)
1295                    | Some(TokenType::Describe)
1296                    | Some(TokenType::Analyze)
1297                    | Some(TokenType::Index)
1298                    | Some(TokenType::Cast)
1299                    | Some(TokenType::Group)
1300                    | Some(TokenType::Order)
1301                    | Some(TokenType::Range)
1302                    | Some(TokenType::Partition)
1303                    | Some(TokenType::Rows)
1304                    | Some(TokenType::Table)
1305                    | Some(TokenType::Offset)
1306                    | Some(TokenType::Temp)
1307                    | Some(TokenType::Temporary)
1308                    | Some(TokenType::Nulls)
1309                    | Some(TokenType::Conflict)
1310                    | Some(TokenType::Unnest)
1311                    | Some(TokenType::Explain)
1312                    | Some(TokenType::Merge)
1313                    | Some(TokenType::Any)
1314                    | Some(TokenType::Escape)
1315            );
1316            if is_name_like {
1317                i += 1;
1318            } else {
1319                return false;
1320            }
1321            match self.tokens.get(i).map(|t| &t.token_type) {
1322                Some(TokenType::Comma) => i += 1,
1323                Some(TokenType::RParen) => {
1324                    i += 1;
1325                    // DuckDB recursive cycle clause: `(cols) USING KEY (...)
1326                    // AS (...)`. Treat the cycle keyword as a sign this is a
1327                    // canonical CTE binding, not a ClickHouse scalar.
1328                    if self.tokens.get(i).map(|t| t.value.to_uppercase())
1329                        == Some("USING".to_string())
1330                    {
1331                        return true;
1332                    }
1333                    if self.tokens.get(i).map(|t| &t.token_type) != Some(&TokenType::As) {
1334                        return false;
1335                    }
1336                    // Canonical form requires the body after `AS` to be
1337                    // a parenthesized SELECT (or `[NOT] MATERIALIZED (…)`
1338                    // for DuckDB / PostgreSQL). If it isn't, this is the
1339                    // ClickHouse scalar form.
1340                    i += 1;
1341                    let after_as = self.tokens.get(i).map(|t| &t.token_type);
1342                    if after_as == Some(&TokenType::LParen) {
1343                        return true;
1344                    }
1345                    let after_as_value = self.tokens.get(i).map(|t| t.value.as_str());
1346                    if matches!(
1347                        after_as_value,
1348                        Some(v) if v.eq_ignore_ascii_case("MATERIALIZED")
1349                            || v.eq_ignore_ascii_case("NOT")
1350                    ) {
1351                        return true;
1352                    }
1353                    return false;
1354                }
1355                _ => return false,
1356            }
1357        }
1358    }
1359
1360    fn parse_with_body(&mut self, ctes: Vec<Cte>) -> Result<Statement> {
1361        match self.peek_type() {
1362            TokenType::Select => {
1363                let select = self.parse_select_body(ctes)?;
1364                self.maybe_parse_set_operation(Statement::Select(select))
1365            }
1366            // DuckDB `WITH x AS (...) FROM tbl SELECT cols` (FROM-first form).
1367            // We rely on parse_select_body's existing FROM-first tolerance.
1368            TokenType::From => {
1369                let select = self.parse_select_body(ctes)?;
1370                self.maybe_parse_set_operation(Statement::Select(select))
1371            }
1372            // PostgreSQL / DuckDB `WITH x AS (...) TABLE tbl` body — equivalent
1373            // to `SELECT * FROM tbl`. Swallow the table reference and trailing
1374            // clauses opaquely and emit a stub Select so the surrounding
1375            // statement parses cleanly.
1376            // DuckDB / PostgreSQL `TABLE tbl` as the body of a WITH query —
1377            // shorthand for `SELECT * FROM tbl`. Swallow the trailing tokens
1378            // opaquely and emit a stub Select so the surrounding parse runs.
1379            TokenType::Table => {
1380                self.advance();
1381                while !matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon) {
1382                    self.advance();
1383                }
1384                let select = SelectStatement {
1385                    comments: vec![],
1386                    ctes,
1387                    distinct: false,
1388                    top: None,
1389                    columns: vec![SelectItem::Wildcard],
1390                    from: None,
1391                    joins: vec![],
1392                    where_clause: None,
1393                    group_by: vec![],
1394                    having: None,
1395                    order_by: vec![],
1396                    limit: None,
1397                    offset: None,
1398                    fetch_first: None,
1399                    qualify: None,
1400                    window_definitions: vec![],
1401                };
1402                Ok(Statement::Select(select))
1403            }
1404            TokenType::Insert => {
1405                let ins = self.parse_insert()?;
1406                let _ = ctes;
1407                Ok(Statement::Insert(ins))
1408            }
1409            TokenType::Update => {
1410                let upd = self.parse_update()?;
1411                let _ = ctes;
1412                Ok(Statement::Update(upd))
1413            }
1414            TokenType::Delete => {
1415                let del = self.parse_delete()?;
1416                let _ = ctes;
1417                Ok(Statement::Delete(del))
1418            }
1419            TokenType::Merge => {
1420                let mrg = self.parse_merge()?;
1421                let _ = ctes;
1422                Ok(Statement::Merge(mrg))
1423            }
1424            _ => Err(SqlglotError::ParserError {
1425                message: "Expected SELECT or INSERT after WITH clause".into(),
1426            }),
1427        }
1428    }
1429
1430    fn parse_cte(&mut self, recursive: bool) -> Result<Cte> {
1431        let (name, name_quote_style) = self.expect_name_with_quote()?;
1432
1433        let columns = if self.match_token(TokenType::LParen) {
1434            let mut cols = vec![self.expect_name()?];
1435            while self.match_token(TokenType::Comma) {
1436                cols.push(self.expect_name()?);
1437            }
1438            self.expect(TokenType::RParen)?;
1439            cols
1440        } else {
1441            vec![]
1442        };
1443
1444        // DuckDB recursive CTE cycle clause:
1445        //   `WITH RECURSIVE tbl(a, b) USING KEY (a, max(b)) AS (...)`.
1446        // Swallow `USING KEY (...)` opaquely so the surrounding parse runs.
1447        if self.check_keyword("USING") {
1448            let saved = self.pos;
1449            self.advance();
1450            if self.check_keyword("KEY") {
1451                self.advance();
1452                if self.match_token(TokenType::LParen) {
1453                    let mut depth = 1_i32;
1454                    while depth > 0 && !matches!(self.peek_type(), TokenType::Eof) {
1455                        match self.peek_type() {
1456                            TokenType::LParen => depth += 1,
1457                            TokenType::RParen => depth -= 1,
1458                            _ => {}
1459                        }
1460                        self.advance();
1461                    }
1462                }
1463            } else {
1464                self.pos = saved;
1465            }
1466        }
1467
1468        self.expect(TokenType::As)?;
1469        let materialized = if self.match_keyword("MATERIALIZED") {
1470            Some(true)
1471        } else if self.check_keyword("NOT") {
1472            let saved = self.pos;
1473            self.advance();
1474            if self.match_keyword("MATERIALIZED") {
1475                Some(false)
1476            } else {
1477                self.pos = saved;
1478                None
1479            }
1480        } else {
1481            None
1482        };
1483
1484        self.expect(TokenType::LParen)?;
1485        let query = self.parse_statement_inner()?;
1486        self.expect(TokenType::RParen)?;
1487
1488        Ok(Cte {
1489            name,
1490            name_quote_style,
1491            columns,
1492            query: Box::new(query),
1493            materialized,
1494            recursive,
1495        })
1496    }
1497
1498    // ── SELECT ──────────────────────────────────────────────────────
1499
1500    fn parse_select_body(&mut self, ctes: Vec<Cte>) -> Result<SelectStatement> {
1501        // DuckDB allows starting a query with `FROM ...` and implies
1502        // `SELECT *`. Detect that and synthesise the wildcard projection.
1503        let from_first = !matches!(self.peek_type(), TokenType::Select)
1504            && matches!(self.peek_type(), TokenType::From);
1505        if !from_first {
1506            self.expect(TokenType::Select)?;
1507        }
1508
1509        // MySQL `SELECT` modifiers (between SELECT and the column list):
1510        // DISTINCTROW (alias of DISTINCT), HIGH_PRIORITY, STRAIGHT_JOIN,
1511        // SQL_SMALL_RESULT, SQL_BIG_RESULT, SQL_BUFFER_RESULT, SQL_CACHE /
1512        // SQL_NO_CACHE, SQL_CALC_FOUND_ROWS. Swallow any number of these.
1513        let mut distinctrow = false;
1514        loop {
1515            if self.is_name_token() {
1516                let v = self.peek().value.to_uppercase();
1517                if matches!(
1518                    v.as_str(),
1519                    "DISTINCTROW"
1520                        | "HIGH_PRIORITY"
1521                        | "STRAIGHT_JOIN"
1522                        | "SQL_SMALL_RESULT"
1523                        | "SQL_BIG_RESULT"
1524                        | "SQL_BUFFER_RESULT"
1525                        | "SQL_CACHE"
1526                        | "SQL_NO_CACHE"
1527                        | "SQL_CALC_FOUND_ROWS"
1528                ) {
1529                    if v == "DISTINCTROW" {
1530                        distinctrow = true;
1531                    }
1532                    self.advance();
1533                    continue;
1534                }
1535            }
1536            break;
1537        }
1538        let distinct = distinctrow || self.match_token(TokenType::Distinct);
1539        // PostgreSQL / DuckDB `DISTINCT ON (expr, ...)` — swallow the column
1540        // list so the surrounding query parses. We don't model DISTINCT ON in
1541        // the AST; treat it as plain DISTINCT.
1542        if distinct && self.match_token(TokenType::On) {
1543            self.expect(TokenType::LParen)?;
1544            let mut depth = 1;
1545            while depth > 0 {
1546                match self.peek_type() {
1547                    TokenType::LParen => depth += 1,
1548                    TokenType::RParen => {
1549                        depth -= 1;
1550                        if depth == 0 {
1551                            self.advance();
1552                            break;
1553                        }
1554                    }
1555                    TokenType::Eof => break,
1556                    _ => {}
1557                }
1558                self.advance();
1559            }
1560        }
1561        // SQL-standard `SELECT ALL` quantifier (§7.12). Equivalent to omitting
1562        // the quantifier; consume it so it does not get mis-parsed as a column.
1563        if !distinct {
1564            let _ = self.match_token(TokenType::All);
1565        }
1566
1567        // BigQuery `SELECT [DISTINCT] AS STRUCT|VALUE …` — type-tag for the
1568        // implicit row constructor. We don't model it; swallow the prefix.
1569        if self.peek_type() == &TokenType::As {
1570            let v = self
1571                .peek_offset(1)
1572                .map(|t| t.value.to_uppercase())
1573                .unwrap_or_default();
1574            if matches!(v.as_str(), "STRUCT" | "VALUE") {
1575                self.advance(); // AS
1576                self.advance(); // STRUCT|VALUE
1577            }
1578        }
1579
1580        // TOP N (SQL Server style)
1581        // Use parse_primary() instead of parse_expr() to prevent the parser
1582        // from consuming `*` (SELECT all columns) as a multiplication operator.
1583        // This correctly handles: TOP 5, TOP 100, TOP (expr), TOP (@var)
1584        let top = if self.match_token(TokenType::Top) {
1585            Some(Box::new(self.parse_primary()?))
1586        } else {
1587            None
1588        };
1589
1590        let columns = if from_first {
1591            vec![SelectItem::Wildcard]
1592        } else {
1593            self.parse_select_items()?
1594        };
1595
1596        let from = if self.match_token(TokenType::From) {
1597            Some(FromClause {
1598                source: self.parse_table_source()?,
1599            })
1600        } else {
1601            None
1602        };
1603
1604        let joins = self.parse_joins()?;
1605
1606        // ClickHouse `PREWHERE expr` hint clause (sits between FROM/joins and
1607        // WHERE). Parsed as a regular boolean expression and folded into the
1608        // WHERE clause via `AND` so the AST stays simple.
1609        let prewhere = if self.check_keyword("PREWHERE") {
1610            self.advance();
1611            Some(self.parse_expr()?)
1612        } else {
1613            None
1614        };
1615
1616        let where_clause = if self.match_token(TokenType::Where) {
1617            let e = self.parse_expr()?;
1618            // ClickHouse: `WHERE (expr) AS alias` — alias-binds the
1619            // predicate. Swallow the AS-alias tail; we don't model it.
1620            if self.match_token(TokenType::As) && self.is_name_token() {
1621                self.advance();
1622            }
1623            Some(e)
1624        } else {
1625            None
1626        };
1627
1628        let where_clause = match (prewhere, where_clause) {
1629            (Some(pw), Some(w)) => Some(Expr::BinaryOp {
1630                left: Box::new(pw),
1631                op: BinaryOperator::And,
1632                right: Box::new(w),
1633            }),
1634            (Some(pw), None) => Some(pw),
1635            (None, w) => w,
1636        };
1637
1638        // Teradata `PREFERRING <expr> [PARTITION BY <list>]` skyline clause.
1639        // Sits between WHERE and GROUP BY. Swallow opaquely up to a known
1640        // terminator so the surrounding query parses.
1641        if self.check_keyword("PREFERRING") {
1642            self.advance();
1643            loop {
1644                match self.peek_type() {
1645                    TokenType::Eof
1646                    | TokenType::Semicolon
1647                    | TokenType::Group
1648                    | TokenType::Order
1649                    | TokenType::Having
1650                    | TokenType::Qualify
1651                    | TokenType::Limit
1652                    | TokenType::Union
1653                    | TokenType::Intersect
1654                    | TokenType::Except
1655                    | TokenType::RParen => break,
1656                    _ => {}
1657                }
1658                self.advance();
1659            }
1660        }
1661
1662        let group_by = if self.match_token(TokenType::Group) {
1663            self.expect(TokenType::By)?;
1664            let items = self.parse_group_by_list()?;
1665            // ClickHouse / MySQL `GROUP BY ... WITH ROLLUP|CUBE|TOTALS` —
1666            // swallow the modifier; we don't model it in the AST.
1667            if self.match_token(TokenType::With) {
1668                let _ = self.match_token(TokenType::Rollup)
1669                    || self.match_token(TokenType::Cube)
1670                    || self.match_keyword("TOTALS");
1671            }
1672            // Hive / Spark `GROUP BY k1, k2 GROUPING SETS ((k1), (k2))` —
1673            // swallow the trailing parenthesized list.
1674            if self.match_token(TokenType::Grouping) {
1675                if self.check_keyword("SETS") {
1676                    self.advance();
1677                }
1678                if self.match_token(TokenType::LParen) {
1679                    let mut depth = 1;
1680                    while depth > 0 {
1681                        match self.peek_type() {
1682                            TokenType::LParen => depth += 1,
1683                            TokenType::RParen => {
1684                                depth -= 1;
1685                                if depth == 0 {
1686                                    self.advance();
1687                                    break;
1688                                }
1689                            }
1690                            TokenType::Eof => break,
1691                            _ => {}
1692                        }
1693                        self.advance();
1694                    }
1695                }
1696            }
1697            items
1698        } else {
1699            vec![]
1700        };
1701
1702        let having = if self.match_token(TokenType::Having) {
1703            let expr = self.parse_expr()?;
1704            // ClickHouse corpora occasionally include a trailing alias after
1705            // HAVING expression text (`HAVING cond AS x`). Swallow alias so it
1706            // doesn't leak as an unexpected token.
1707            if self.match_token(TokenType::As) && self.is_name_token() {
1708                self.advance();
1709            }
1710            Some(expr)
1711        } else {
1712            None
1713        };
1714
1715        let qualify = if self.match_token(TokenType::Qualify) {
1716            Some(self.parse_expr()?)
1717        } else {
1718            None
1719        };
1720
1721        // Named WINDOW definitions
1722        let window_definitions = if self.match_token(TokenType::Window) {
1723            self.parse_window_definitions()?
1724        } else {
1725            vec![]
1726        };
1727
1728        let order_by = if self.match_token(TokenType::Order) {
1729            self.expect(TokenType::By)?;
1730            self.parse_order_by_items()?
1731        } else {
1732            vec![]
1733        };
1734
1735        // Hive / Spark non-standard ordering clauses; behave syntactically
1736        // like ORDER BY. We parse and discard them so the surrounding query
1737        // continues to parse.
1738        loop {
1739            let is_sort = self.check_keyword("SORT");
1740            let is_distribute = self.check_keyword("DISTRIBUTE");
1741            let is_cluster = self.check_keyword("CLUSTER");
1742            if !(is_sort || is_distribute || is_cluster) {
1743                break;
1744            }
1745            let saved = self.pos;
1746            self.advance();
1747            if self.peek_type() == &TokenType::By {
1748                self.advance();
1749                let _ = self.parse_order_by_items()?;
1750            } else {
1751                self.pos = saved;
1752                break;
1753            }
1754        }
1755
1756        let (mut limit, mut offset) = if self.match_token(TokenType::Limit) {
1757            let first = self.parse_expr()?;
1758            // MySQL / ClickHouse `LIMIT offset, count` form — convert to
1759            // `LIMIT count OFFSET offset`.
1760            if self.match_token(TokenType::Comma) {
1761                let count = self.parse_expr()?;
1762                (Some(count), Some(first))
1763            } else {
1764                (Some(first), None)
1765            }
1766        } else {
1767            (None, None)
1768        };
1769
1770        // ClickHouse `LIMIT N BY col[, ...]` / `LIMIT N BY col LIMIT M` —
1771        // consume the BY-list and an optional outer LIMIT so the trailing
1772        // SETTINGS / FORMAT clauses still parse.
1773        if limit.is_some() && self.match_token(TokenType::By) {
1774            let _ = self.parse_expr_list_allow_item_alias()?;
1775            if self.match_token(TokenType::Limit) {
1776                let _ = self.parse_expr()?;
1777            }
1778        }
1779
1780        if offset.is_none() && self.match_token(TokenType::Offset) {
1781            let expr = self.parse_expr()?;
1782            // T-SQL / ANSI SQL:2008 form: OFFSET n ROWS [FETCH …].
1783            // Consume the optional ROWS/ROW keyword so FETCH can match next.
1784            let _ = self.match_token(TokenType::Rows) || self.match_keyword("ROW");
1785            offset = Some(expr);
1786        } else if offset.is_some() {
1787            // Already populated from `LIMIT a, b`; still consume an explicit
1788            // `OFFSET n` if it appears so it does not leak into the trailer.
1789            if self.match_token(TokenType::Offset) {
1790                let expr = self.parse_expr()?;
1791                let _ = self.match_token(TokenType::Rows) || self.match_keyword("ROW");
1792                offset = Some(expr);
1793            }
1794        }
1795
1796        // Trino / Presto: `OFFSET n LIMIT m` (ordering opposite to MySQL).
1797        // We've parsed OFFSET; accept a trailing LIMIT n.
1798        if limit.is_none() && self.match_token(TokenType::Limit) {
1799            limit = Some(self.parse_expr()?);
1800        }
1801
1802        // FETCH FIRST|NEXT n ROWS ONLY (Oracle / ANSI SQL:2008 / T-SQL)
1803        let fetch_first = if self.match_token(TokenType::Fetch) {
1804            // consume FIRST or NEXT
1805            let _ = self.match_token(TokenType::First) || self.match_token(TokenType::Next);
1806            let count = self.parse_expr()?;
1807            // consume ROWS or ROW
1808            let _ = self.match_keyword("ROWS") || self.match_keyword("ROW");
1809            // consume ONLY
1810            let _ = self.match_token(TokenType::Only);
1811            Some(count)
1812        } else {
1813            None
1814        };
1815
1816        // ClickHouse trailing `WITH TOTALS` / `WITH TIES` / `WITH ROLLUP` /
1817        // `WITH CUBE` / `WITH FILL` modifiers in subquery position. These
1818        // are query-level modifiers we don't model; swallow so the
1819        // surrounding `)` is reached.
1820        if matches!(self.peek_type(), TokenType::With) {
1821            let after = self.peek_offset(1);
1822            let is_postfix_modifier = after
1823                .map(|t| {
1824                    matches!(
1825                        t.token_type,
1826                        TokenType::Identifier | TokenType::Cube | TokenType::Rollup
1827                    ) && matches!(
1828                        t.value.to_uppercase().as_str(),
1829                        "TOTALS" | "TIES" | "FILL" | "ROLLUP" | "CUBE"
1830                    )
1831                })
1832                .unwrap_or(false);
1833            if is_postfix_modifier {
1834                self.advance(); // WITH
1835                self.advance(); // modifier keyword
1836            }
1837        }
1838
1839        // ClickHouse `SETTINGS k = v, ...` / `FORMAT <name>` and MySQL
1840        // `INTO OUTFILE 'file'` style trailing clauses. None of these have
1841        // a dedicated AST representation; consume to keep the surrounding
1842        // statement parseable.
1843        loop {
1844            if self.check_keyword("SETTINGS")
1845                || self.check_keyword("FORMAT")
1846                || self.check_keyword("INTO")
1847            {
1848                self.skip_trailing_options();
1849                break;
1850            }
1851            break;
1852        }
1853
1854        Ok(SelectStatement {
1855            comments: vec![],
1856            ctes,
1857            distinct,
1858            top,
1859            columns,
1860            from,
1861            joins,
1862            where_clause,
1863            group_by,
1864            having,
1865            order_by,
1866            limit,
1867            offset,
1868            fetch_first,
1869            qualify,
1870            window_definitions,
1871        })
1872    }
1873
1874    fn parse_window_definitions(&mut self) -> Result<Vec<WindowDefinition>> {
1875        let mut defs = Vec::new();
1876        loop {
1877            let name = self.expect_name()?;
1878            self.expect(TokenType::As)?;
1879            self.expect(TokenType::LParen)?;
1880            let spec = self.parse_window_spec()?;
1881            self.expect(TokenType::RParen)?;
1882            defs.push(WindowDefinition { name, spec });
1883            if !self.match_token(TokenType::Comma) {
1884                break;
1885            }
1886        }
1887        Ok(defs)
1888    }
1889
1890    /// Check if we should parse a set operation (UNION / INTERSECT / EXCEPT)
1891    fn maybe_parse_set_operation(&mut self, left: Statement) -> Result<Statement> {
1892        let op = match self.peek_type() {
1893            TokenType::Union => SetOperationType::Union,
1894            TokenType::Intersect => SetOperationType::Intersect,
1895            TokenType::Except => SetOperationType::Except,
1896            _ => {
1897                // Spark / Oracle `MINUS` as a synonym for `EXCEPT`.
1898                if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("MINUS") {
1899                    self.advance();
1900                    let all = self.match_token(TokenType::All);
1901                    let _ = self.match_token(TokenType::Distinct);
1902                    let right = self.parse_statement_inner()?;
1903                    return Ok(Statement::SetOperation(SetOperationStatement {
1904                        comments: vec![],
1905                        op: SetOperationType::Except,
1906                        all,
1907                        left: Box::new(left),
1908                        right: Box::new(right),
1909                        order_by: vec![],
1910                        limit: None,
1911                        offset: None,
1912                    }));
1913                }
1914                return Ok(left);
1915            }
1916        };
1917        self.advance();
1918
1919        let all = self.match_token(TokenType::All);
1920        let _ = self.match_token(TokenType::Distinct); // UNION DISTINCT
1921
1922        // DuckDB `UNION ALL BY NAME` / `UNION BY NAME` — column-name-based
1923        // set operation. Swallow the modifier so the inner SELECT parses.
1924        if self.match_token(TokenType::By) {
1925            if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("NAME") {
1926                self.advance();
1927            }
1928        }
1929
1930        let right = self.parse_statement_inner()?;
1931
1932        // Check for further set operations chaining
1933        let combined = Statement::SetOperation(SetOperationStatement {
1934            comments: vec![],
1935            op,
1936            all,
1937            left: Box::new(left),
1938            right: Box::new(right),
1939            order_by: vec![],
1940            limit: None,
1941            offset: None,
1942        });
1943
1944        // Parse trailing ORDER BY / LIMIT / OFFSET that applies to the whole set operation
1945        if matches!(
1946            self.peek_type(),
1947            TokenType::Union | TokenType::Intersect | TokenType::Except
1948        ) {
1949            self.maybe_parse_set_operation(combined)
1950        } else {
1951            // Check for global ORDER BY / LIMIT
1952            if let Statement::SetOperation(mut sop) = combined {
1953                if self.match_token(TokenType::Order) {
1954                    self.expect(TokenType::By)?;
1955                    sop.order_by = self.parse_order_by_items()?;
1956                }
1957                if self.match_token(TokenType::Limit) {
1958                    sop.limit = Some(self.parse_expr()?);
1959                }
1960                if self.match_token(TokenType::Offset) {
1961                    sop.offset = Some(self.parse_expr()?);
1962                    // ANSI SQL:2008 / T-SQL: OFFSET n ROWS. Consume optional ROW(S).
1963                    let _ = self.match_token(TokenType::Rows) || self.match_keyword("ROW");
1964                }
1965                // Accept trailing LIMIT after OFFSET (OFFSET n LIMIT m ordering).
1966                if sop.limit.is_none() && self.match_token(TokenType::Limit) {
1967                    sop.limit = Some(self.parse_expr()?);
1968                }
1969                Ok(Statement::SetOperation(sop))
1970            } else {
1971                Ok(combined)
1972            }
1973        }
1974    }
1975
1976    fn parse_select_items(&mut self) -> Result<Vec<SelectItem>> {
1977        let mut items = vec![self.parse_select_item()?];
1978        while self.match_token(TokenType::Comma) {
1979            // DuckDB / BigQuery / Snowflake allow a trailing comma in the
1980            // SELECT list before `FROM` / end of select clause. Bail out if
1981            // the next token can't start a select item.
1982            if matches!(
1983                self.peek_type(),
1984                TokenType::From
1985                    | TokenType::Where
1986                    | TokenType::Group
1987                    | TokenType::Order
1988                    | TokenType::Limit
1989                    | TokenType::Having
1990                    | TokenType::Qualify
1991                    | TokenType::Eof
1992                    | TokenType::Semicolon
1993                    | TokenType::RParen
1994                    | TokenType::Union
1995                    | TokenType::Intersect
1996                    | TokenType::Except
1997            ) {
1998                break;
1999            }
2000            items.push(self.parse_select_item()?);
2001        }
2002        Ok(items)
2003    }
2004
2005    /// Consume DuckDB / Snowflake star modifiers — `EXCLUDE (...)`,
2006    /// `EXCEPT (...)`, `RENAME (...)`, `REPLACE (...)` — that may follow
2007    /// `*` or `t.*` in a SELECT list. Each modifier may appear at most
2008    /// once; we tolerate any order.
2009    fn swallow_star_modifiers(&mut self) {
2010        loop {
2011            let matched = self.check_keyword("EXCLUDE")
2012                || self.check_keyword("RENAME")
2013                || (self.check_keyword("REPLACE")
2014                    && matches!(
2015                        self.peek_offset(1).map(|t| &t.token_type),
2016                        Some(TokenType::LParen)
2017                    ))
2018                || (self.peek_type() == &TokenType::Except
2019                    && matches!(
2020                        self.peek_offset(1).map(|t| &t.token_type),
2021                        Some(TokenType::LParen)
2022                    ));
2023            // sqlfluff `SELECT * GLOB '…' FROM t` / `* SIMILAR TO '…'` /
2024            // `* LIKE '…'` style column-filter shorthand. Swallow the
2025            // operator and its pattern literal so the rest parses.
2026            let pattern_modifier = if matches!(self.peek_type(), TokenType::Like | TokenType::ILike)
2027                || (self.check_keyword("GLOB")
2028                    || self.check_keyword("REGEXP")
2029                    || self.check_keyword("RLIKE")
2030                    || self.check_keyword("IREGEXP")
2031                    || self.check_keyword("SIMILAR"))
2032            {
2033                let next_is_string = matches!(
2034                    self.peek_offset(1).map(|t| &t.token_type),
2035                    Some(TokenType::String)
2036                );
2037                let is_similar_to = self.check_keyword("SIMILAR")
2038                    && self
2039                        .peek_offset(1)
2040                        .map(|t| t.value.eq_ignore_ascii_case("TO"))
2041                        .unwrap_or(false);
2042                next_is_string || is_similar_to
2043            } else {
2044                false
2045            };
2046            if !matched && !pattern_modifier {
2047                break;
2048            }
2049            if pattern_modifier {
2050                // Operator keyword (and optional TO for SIMILAR TO) +
2051                // pattern string. We're tolerant of extra ESCAPE clause.
2052                self.advance(); // GLOB / LIKE / etc.
2053                if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("TO") {
2054                    self.advance();
2055                }
2056                if matches!(self.peek_type(), TokenType::String) {
2057                    self.advance();
2058                    if self.match_token(TokenType::Escape) {
2059                        if matches!(self.peek_type(), TokenType::String) {
2060                            self.advance();
2061                        }
2062                    }
2063                }
2064                continue;
2065            }
2066            self.advance(); // keyword
2067            if self.match_token(TokenType::LParen) {
2068                let mut depth = 1;
2069                while depth > 0 {
2070                    match self.peek_type() {
2071                        TokenType::LParen => depth += 1,
2072                        TokenType::RParen => {
2073                            depth -= 1;
2074                            if depth == 0 {
2075                                self.advance();
2076                                break;
2077                            }
2078                        }
2079                        TokenType::Eof => break,
2080                        _ => {}
2081                    }
2082                    self.advance();
2083                }
2084            } else if self.is_name_token() {
2085                // EXCLUDE col (single-column without parens)
2086                self.advance();
2087            }
2088        }
2089    }
2090
2091    fn parse_select_item(&mut self) -> Result<SelectItem> {
2092        if self.peek().token_type == TokenType::Star {
2093            self.advance();
2094            // DuckDB / Snowflake `* EXCLUDE (col, ...)`,
2095            // `* RENAME (a AS b, ...)`, `* REPLACE (expr AS col, ...)`.
2096            // Swallow the modifier so the surrounding select parses.
2097            self.swallow_star_modifiers();
2098            return Ok(SelectItem::Wildcard);
2099        }
2100
2101        // DuckDB struct-shorthand alias-first form: `alias: expr` in a SELECT
2102        // list. Only fire when we see `<name> :` followed by something that
2103        // is not another `:` (which would form `::` cast) — i.e. a leading
2104        // alias-then-colon pattern. The alias may be any name-like token.
2105        if self.is_name_token() {
2106            let pos1 = self.peek_offset(1).map(|t| &t.token_type);
2107            let pos2 = self.peek_offset(2).map(|t| &t.token_type);
2108            if matches!(pos1, Some(TokenType::Colon)) && !matches!(pos2, Some(TokenType::Colon)) {
2109                // Save state so we can roll back if the trailing expression
2110                // fails to parse (avoids misclassifying obscure forms).
2111                let saved = self.pos;
2112                let alias_tok = self.advance().clone();
2113                self.advance(); // consume ':'
2114                if let Ok(expr) = self.parse_expr() {
2115                    return Ok(SelectItem::Expr {
2116                        expr,
2117                        alias: Some(alias_tok.value),
2118                        alias_quote_style: quote_style_from_char(alias_tok.quote_char),
2119                    });
2120                }
2121                self.pos = saved;
2122            }
2123        }
2124
2125        let expr = self.parse_expr()?;
2126
2127        // Check for table.* pattern
2128        if let Expr::QualifiedWildcard { ref table } = expr {
2129            self.swallow_star_modifiers();
2130            return Ok(SelectItem::QualifiedWildcard {
2131                table: table.clone(),
2132            });
2133        }
2134
2135        // Hive scripting: `SELECT TRANSFORM(cols) [ROW FORMAT ...] USING
2136        // 'cmd' [AS (cols)] [ROW FORMAT ...] [RECORDREADER 'cls']`. The
2137        // tail clauses appear between the function call and `FROM`. We
2138        // don't model the scripting AST yet; swallow opaquely so the rest
2139        // of the SELECT parses.
2140        if matches!(
2141            &expr,
2142            Expr::Function { name, .. } if name.eq_ignore_ascii_case("TRANSFORM")
2143        ) {
2144            while !matches!(
2145                self.peek_type(),
2146                TokenType::From | TokenType::Eof | TokenType::Semicolon | TokenType::Comma
2147            ) {
2148                let v = self.peek().value.to_uppercase();
2149                let is_tail = self.peek_type() == &TokenType::Using
2150                    || self.peek_type() == &TokenType::As
2151                    || matches!(
2152                        v.as_str(),
2153                        "ROW"
2154                            | "FORMAT"
2155                            | "SERDE"
2156                            | "WITH"
2157                            | "SERDEPROPERTIES"
2158                            | "RECORDREADER"
2159                            | "RECORDWRITER"
2160                            | "FIELDS"
2161                            | "TERMINATED"
2162                            | "BY"
2163                            | "COLLECTION"
2164                            | "ITEMS"
2165                            | "MAP"
2166                            | "KEYS"
2167                            | "LINES"
2168                            | "NULL"
2169                            | "DEFINED"
2170                            | "STORED"
2171                            | "DELIMITED"
2172                            | "ESCAPED"
2173                            | "LOCATION"
2174                            | "OUTPUTFORMAT"
2175                            | "INPUTFORMAT"
2176                    );
2177                if !is_tail
2178                    && !matches!(
2179                        self.peek_type(),
2180                        TokenType::String
2181                            | TokenType::LParen
2182                            | TokenType::RParen
2183                            | TokenType::Identifier
2184                            | TokenType::Eq
2185                    )
2186                {
2187                    break;
2188                }
2189                self.advance();
2190            }
2191            return Ok(SelectItem::Expr {
2192                expr,
2193                alias: None,
2194                alias_quote_style: QuoteStyle::None,
2195            });
2196        }
2197
2198        let (alias, alias_quote_style) = match self.parse_optional_alias()? {
2199            Some((name, qs)) => (Some(name), qs),
2200            None => (None, QuoteStyle::None),
2201        };
2202
2203        Ok(SelectItem::Expr {
2204            expr,
2205            alias,
2206            alias_quote_style,
2207        })
2208    }
2209
2210    fn parse_optional_alias(&mut self) -> Result<Option<(String, QuoteStyle)>> {
2211        if self.match_token(TokenType::As) {
2212            // After AS, also accept `@name` / `:name` as an alias. Both forms
2213            // appear in auto-generated SQL corpora (e.g. `AS @rpm`, `AS :minutes`)
2214            // where the symbol is part of the column name from the source data.
2215            if let Some((name, qs)) = self.try_parse_prefixed_alias()? {
2216                return Ok(Some((name, qs)));
2217            }
2218            // PostgreSQL / SQLite tolerate reserved-word literals as aliases
2219            // (`SELECT bool 't' AS true`). Accept TRUE / FALSE / NULL tokens.
2220            if matches!(
2221                self.peek_type(),
2222                TokenType::True | TokenType::False | TokenType::Null
2223            ) {
2224                let token = self.advance().clone();
2225                return Ok(Some((token.value, QuoteStyle::None)));
2226            }
2227            // DuckDB allows column aliases that collide with reserved
2228            // keywords (`AS matched`, `AS or`, `AS using`). After AS, take
2229            // whatever non-structural token appears.
2230            if matches!(
2231                self.peek_type(),
2232                TokenType::Matched
2233                    | TokenType::Or
2234                    | TokenType::And
2235                    | TokenType::Using
2236                    | TokenType::When
2237                    | TokenType::Where
2238                    | TokenType::Asc
2239                    | TokenType::Desc
2240                    | TokenType::Limit
2241                    | TokenType::Group
2242                    | TokenType::Having
2243                    | TokenType::On
2244                    | TokenType::Into
2245                    | TokenType::From
2246                    | TokenType::Order
2247                    | TokenType::Like
2248            ) {
2249                let token = self.advance().clone();
2250                return Ok(Some((token.value, QuoteStyle::None)));
2251            }
2252            return Ok(Some(self.expect_name_with_quote()?));
2253        }
2254        // Implicit alias
2255        if self.is_name_token() {
2256            let peeked_upper = self.peek().value.to_uppercase();
2257            if !matches!(
2258                peeked_upper.as_str(),
2259                "FROM"
2260                    | "WHERE"
2261                    | "GROUP"
2262                    | "ORDER"
2263                    | "LIMIT"
2264                    | "HAVING"
2265                    | "UNION"
2266                    | "INTERSECT"
2267                    | "EXCEPT"
2268                    | "JOIN"
2269                    | "INNER"
2270                    | "LEFT"
2271                    | "RIGHT"
2272                    | "FULL"
2273                    | "CROSS"
2274                    | "ON"
2275                    | "WINDOW"
2276                    | "QUALIFY"
2277                    | "INTO"
2278                    | "SET"
2279                    | "RETURNING"
2280                    | "PIVOT"
2281                    | "UNPIVOT"
2282                    | "PREWHERE"
2283                    | "SETTINGS"
2284                    | "FORMAT"
2285                    | "SAMPLE"
2286                    | "TABLESAMPLE"
2287                    | "LATERAL"
2288                    | "USING"
2289                    | "OFFSET"
2290                    | "FETCH"
2291                    | "FOR"
2292                    | "WITH"
2293                    | "OPTION"
2294                    | "MATCH_RECOGNIZE"
2295                    | "SORT"
2296                    | "DISTRIBUTE"
2297                    | "CLUSTER"
2298                    | "GLOBAL"
2299                    | "PREFERRING"
2300                    | "FORCE"
2301                    | "USE"
2302                    | "IGNORE"
2303                    | "STRAIGHT_JOIN"
2304                    | "DISTRIBUTED"
2305                    | "VALUE"
2306                    | "VALUES"
2307                    | "DEFAULT"
2308                    | "PARTITION"
2309            ) {
2310                let token = self.advance().clone();
2311                let qs = quote_style_from_char(token.quote_char);
2312                return Ok(Some((token.value.clone(), qs)));
2313            }
2314        }
2315        Ok(None)
2316    }
2317
2318    fn parse_table_source(&mut self) -> Result<TableSource> {
2319        let mut source = self.parse_base_table_source()?;
2320        // PostgreSQL table-inheritance star: `FROM parent*` includes all
2321        // child tables. Swallow the trailing `*` so the table alias /
2322        // joins continue to parse.
2323        let _ = self.match_token(TokenType::Star);
2324        // BigQuery / Snowflake / MySQL TiDB time-travel:
2325        //   `<tbl> [FOR SYSTEM_TIME] AS OF [TIMESTAMP] <expr>` or
2326        //   `<tbl> AS OF VERSION <expr>` / `AS OF TIMESTAMP <expr>`.
2327        // We don't model the time-travel clause in the AST; swallow the
2328        // keywords and the expression so the surrounding query parses.
2329        if self.is_name_token()
2330            && self.peek().value.eq_ignore_ascii_case("FOR")
2331            && self
2332                .peek_offset(1)
2333                .map(|t| t.value.eq_ignore_ascii_case("SYSTEM_TIME"))
2334                .unwrap_or(false)
2335        {
2336            self.advance(); // FOR
2337            self.advance(); // SYSTEM_TIME
2338        }
2339        if self.peek_type() == &TokenType::As
2340            && self
2341                .peek_offset(1)
2342                .map(|t| t.value.eq_ignore_ascii_case("OF"))
2343                .unwrap_or(false)
2344        {
2345            self.advance(); // AS
2346            self.advance(); // OF
2347            // Optional TIMESTAMP / VERSION qualifier.
2348            if matches!(self.peek_type(), TokenType::Timestamp)
2349                || (self.is_name_token()
2350                    && matches!(
2351                        self.peek().value.to_uppercase().as_str(),
2352                        "VERSION" | "SCN" | "SEQUENCE"
2353                    ))
2354            {
2355                self.advance();
2356            }
2357            let _ = self.parse_expr()?;
2358        }
2359        // Hive / Spark / Trino `TABLESAMPLE [method] (...)` after a table
2360        // reference. We don't model the sample clause in the AST; just
2361        // consume the optional method identifier (BERNOULLI / SYSTEM /
2362        // RESERVOIR) and the parenthesized body so the surrounding query
2363        // parses. Also accept an optional `REPEATABLE (n)` trailer.
2364        if self.match_token(TokenType::Tablesample) {
2365            // Optional sampling method identifier.
2366            if matches!(self.peek_type(), TokenType::Identifier) {
2367                self.advance();
2368            }
2369            if self.match_token(TokenType::LParen) {
2370                let mut depth = 1;
2371                while depth > 0 {
2372                    match self.peek_type() {
2373                        TokenType::LParen => depth += 1,
2374                        TokenType::RParen => {
2375                            depth -= 1;
2376                            if depth == 0 {
2377                                self.advance();
2378                                break;
2379                            }
2380                        }
2381                        TokenType::Eof => break,
2382                        _ => {}
2383                    }
2384                    self.advance();
2385                }
2386            }
2387            if self.check_keyword("REPEATABLE") {
2388                self.advance();
2389                if self.match_token(TokenType::LParen) {
2390                    let mut depth = 1;
2391                    while depth > 0 {
2392                        match self.peek_type() {
2393                            TokenType::LParen => depth += 1,
2394                            TokenType::RParen => {
2395                                depth -= 1;
2396                                if depth == 0 {
2397                                    self.advance();
2398                                    break;
2399                                }
2400                            }
2401                            TokenType::Eof => break,
2402                            _ => {}
2403                        }
2404                        self.advance();
2405                    }
2406                }
2407            }
2408            // Optional trailing alias on the sampled table — `… TABLESAMPLE
2409            // (…) s`. We attach it to the underlying table reference when
2410            // possible, otherwise just consume the identifier.
2411            if let TableSource::Table(ref mut tr) = source {
2412                if tr.alias.is_none() {
2413                    if let Some((name, qs)) = self.parse_optional_alias()? {
2414                        tr.alias = Some(name);
2415                        tr.alias_quote_style = qs;
2416                    }
2417                }
2418            }
2419        }
2420        // Check for trailing PIVOT / UNPIVOT
2421        let source = self.parse_pivot_or_unpivot(source)?;
2422        // ClickHouse: `SELECT * FROM t SAMPLE 0.1` (no parens) — and the
2423        // optional `OFFSET m` modifier. The keyword tokenizes as a plain
2424        // identifier so this also handles dialects that don't reserve it.
2425        if self.check_keyword("SAMPLE") {
2426            self.advance();
2427            // Accept a number, identifier, or parenthesized expression.
2428            if matches!(self.peek_type(), TokenType::Number) {
2429                self.advance();
2430                // Optional `/ N` ratio.
2431                if self.peek_type() == &TokenType::Slash {
2432                    self.advance();
2433                    if matches!(self.peek_type(), TokenType::Number) {
2434                        self.advance();
2435                    }
2436                }
2437            }
2438            if self.check_keyword("OFFSET") {
2439                self.advance();
2440                if matches!(self.peek_type(), TokenType::Number) {
2441                    self.advance();
2442                }
2443            }
2444        }
2445        Ok(source)
2446    }
2447
2448    fn parse_base_table_source(&mut self) -> Result<TableSource> {
2449        // LATERAL
2450        if self.match_token(TokenType::Lateral) {
2451            let source = self.parse_table_source()?;
2452            return Ok(TableSource::Lateral {
2453                source: Box::new(source),
2454            });
2455        }
2456
2457        // Spark / DuckDB / Postgres `FROM VALUES (...) [, (...)]+ [alias[(cols)]]`
2458        // (un-parenthesised VALUES list). Swallow the rows.
2459        if self.match_token(TokenType::Values) {
2460            // First row.
2461            if self.match_token(TokenType::LParen) {
2462                let mut depth = 1;
2463                while depth > 0 {
2464                    match self.peek_type() {
2465                        TokenType::LParen => depth += 1,
2466                        TokenType::RParen => {
2467                            depth -= 1;
2468                            if depth == 0 {
2469                                self.advance();
2470                                break;
2471                            }
2472                        }
2473                        TokenType::Eof => break,
2474                        _ => {}
2475                    }
2476                    self.advance();
2477                }
2478            }
2479            // Additional rows.
2480            while self.peek_type() == &TokenType::Comma {
2481                let saved = self.pos;
2482                self.advance();
2483                if !self.match_token(TokenType::LParen) {
2484                    // Not a row — restore comma for the outer parser.
2485                    self.pos = saved;
2486                    break;
2487                }
2488                let mut depth = 1;
2489                while depth > 0 {
2490                    match self.peek_type() {
2491                        TokenType::LParen => depth += 1,
2492                        TokenType::RParen => {
2493                            depth -= 1;
2494                            if depth == 0 {
2495                                self.advance();
2496                                break;
2497                            }
2498                        }
2499                        TokenType::Eof => break,
2500                        _ => {}
2501                    }
2502                    self.advance();
2503                }
2504            }
2505            let (alias, alias_quote_style) = match self.parse_optional_alias()? {
2506                Some((name, qs)) => (Some(name), qs),
2507                None => (None, QuoteStyle::None),
2508            };
2509            if alias.is_some() && self.peek_type() == &TokenType::LParen {
2510                let saved = self.pos;
2511                self.advance();
2512                let mut ok = true;
2513                loop {
2514                    if !self.is_name_token() {
2515                        ok = false;
2516                        break;
2517                    }
2518                    self.advance();
2519                    if self.match_token(TokenType::RParen) {
2520                        break;
2521                    }
2522                    if !self.match_token(TokenType::Comma) {
2523                        ok = false;
2524                        break;
2525                    }
2526                }
2527                if !ok {
2528                    self.pos = saved;
2529                }
2530            }
2531            return Ok(TableSource::TableFunction {
2532                name: "VALUES".to_string(),
2533                args: vec![],
2534                alias,
2535                alias_quote_style,
2536            });
2537        }
2538
2539        // UNNEST(expr)
2540        if self.match_token(TokenType::Unnest) {
2541            self.expect(TokenType::LParen)?;
2542            let expr = self.parse_expr()?;
2543            // Multi-arg form (Trino): UNNEST(a, b, c). Drop extras.
2544            while self.match_token(TokenType::Comma) {
2545                let _ = self.parse_expr()?;
2546            }
2547            self.expect(TokenType::RParen)?;
2548            let (mut alias, mut alias_quote_style) = match self.parse_optional_alias()? {
2549                Some((name, qs)) => (Some(name), qs),
2550                None => (None, QuoteStyle::None),
2551            };
2552            // BigQuery `WITH OFFSET [AS name]` / Postgres `WITH ORDINALITY`.
2553            let mut with_offset = false;
2554            if self.check_keyword("WITH") {
2555                let saved = self.pos;
2556                self.advance();
2557                if self.check_keyword("OFFSET") || self.check_keyword("ORDINALITY") {
2558                    self.advance();
2559                    with_offset = true;
2560                    // Optional alias after OFFSET / ORDINALITY.
2561                    if alias.is_none() {
2562                        if let Some((n, qs)) = self.parse_optional_alias()? {
2563                            alias = Some(n);
2564                            alias_quote_style = qs;
2565                        }
2566                    } else if self.is_name_token() {
2567                        // `UNNEST(a) id WITH OFFSET pos` — extra trailing
2568                        // name; absorb so we don't trip the join parser.
2569                        self.advance();
2570                    }
2571                } else {
2572                    self.pos = saved;
2573                }
2574            }
2575            // Optional positional column list: `AS t (n, a)`.
2576            if alias.is_some() && self.peek_type() == &TokenType::LParen {
2577                let saved = self.pos;
2578                self.advance();
2579                let mut ok = true;
2580                loop {
2581                    if !self.is_name_token() {
2582                        ok = false;
2583                        break;
2584                    }
2585                    self.advance();
2586                    if self.match_token(TokenType::RParen) {
2587                        break;
2588                    }
2589                    if !self.match_token(TokenType::Comma) {
2590                        ok = false;
2591                        break;
2592                    }
2593                }
2594                if !ok {
2595                    self.pos = saved;
2596                }
2597            }
2598            return Ok(TableSource::Unnest {
2599                expr: Box::new(expr),
2600                alias,
2601                alias_quote_style,
2602                with_offset,
2603            });
2604        }
2605
2606        // Subquery: (SELECT ...)
2607        if self.peek_type() == &TokenType::LParen {
2608            let saved = self.pos;
2609            self.advance();
2610            // Skip nested `(` so `((SELECT …))` and `((SELECT) UNION (SELECT))`
2611            // parse as a subquery. We count how many we consumed and pair
2612            // them with the matching trailing `)`s.
2613            let mut extra_parens = 0_usize;
2614            while self.peek_type() == &TokenType::LParen
2615                && self.peek_starts_subquery_through_parens()
2616            {
2617                self.advance();
2618                extra_parens += 1;
2619            }
2620            let starts_subquery = matches!(
2621                self.peek_type(),
2622                TokenType::Select
2623                    | TokenType::With
2624                    | TokenType::Explain
2625                    | TokenType::From
2626                    | TokenType::Describe
2627                    | TokenType::Show
2628                    | TokenType::Table
2629            );
2630            if starts_subquery {
2631                let query = self.parse_statement_inner()?;
2632                // Set operations across parenthesised subqueries: `(SELECT …)
2633                // UNION ALL (SELECT …) [ORDER BY …] [LIMIT …]`.
2634                let query = self.maybe_parse_set_operation(query)?;
2635                for _ in 0..extra_parens {
2636                    self.expect(TokenType::RParen)?;
2637                }
2638                self.expect(TokenType::RParen)?;
2639                let (alias, alias_quote_style) = match self.parse_optional_alias()? {
2640                    Some((name, qs)) => (Some(name), qs),
2641                    None => (None, QuoteStyle::None),
2642                };
2643                // Positional column-list alias: `(SELECT ...) t(c1, c2)`
2644                if alias.is_some() && self.peek_type() == &TokenType::LParen {
2645                    let saved2 = self.pos;
2646                    self.advance();
2647                    let mut ok = true;
2648                    loop {
2649                        if !self.is_name_token() {
2650                            ok = false;
2651                            break;
2652                        }
2653                        self.advance();
2654                        if self.match_token(TokenType::RParen) {
2655                            break;
2656                        }
2657                        if !self.match_token(TokenType::Comma) {
2658                            ok = false;
2659                            break;
2660                        }
2661                    }
2662                    if !ok {
2663                        self.pos = saved2;
2664                    }
2665                }
2666                return Ok(TableSource::Subquery {
2667                    query: Box::new(query),
2668                    alias,
2669                    alias_quote_style,
2670                });
2671            }
2672            // `(VALUES (...), (...)) alias[(cols)]` — common in DuckDB /
2673            // Postgres derived tables. We don't model the VALUES rows in the
2674            // AST as a table source; swallow the parenthesized body and
2675            // synthesise an empty subquery placeholder.
2676            if self.peek_type() == &TokenType::Values {
2677                // Re-advance past the values list, balancing parens (we are
2678                // inside the outer LParen at depth 1).
2679                let mut depth = 1;
2680                while depth > 0 {
2681                    match self.peek_type() {
2682                        TokenType::LParen => depth += 1,
2683                        TokenType::RParen => {
2684                            depth -= 1;
2685                            if depth == 0 {
2686                                self.advance();
2687                                break;
2688                            }
2689                        }
2690                        TokenType::Eof => break,
2691                        _ => {}
2692                    }
2693                    self.advance();
2694                }
2695                let (alias, alias_quote_style) = match self.parse_optional_alias()? {
2696                    Some((name, qs)) => (Some(name), qs),
2697                    None => (None, QuoteStyle::None),
2698                };
2699                if alias.is_some() && self.peek_type() == &TokenType::LParen {
2700                    let saved2 = self.pos;
2701                    self.advance();
2702                    let mut ok = true;
2703                    loop {
2704                        if !self.is_name_token() {
2705                            ok = false;
2706                            break;
2707                        }
2708                        self.advance();
2709                        if self.match_token(TokenType::RParen) {
2710                            break;
2711                        }
2712                        if !self.match_token(TokenType::Comma) {
2713                            ok = false;
2714                            break;
2715                        }
2716                    }
2717                    if !ok {
2718                        self.pos = saved2;
2719                    }
2720                }
2721                // Synthesise an empty values placeholder. Reuse Subquery
2722                // with a single-row Insert wrapper is awkward; instead,
2723                // wrap as a TableFunction("VALUES") with empty args.
2724                return Ok(TableSource::TableFunction {
2725                    name: "VALUES".to_string(),
2726                    args: vec![],
2727                    alias,
2728                    alias_quote_style,
2729                });
2730            }
2731            self.pos = saved;
2732
2733            // MySQL / SQLite / others permit parenthesized join expressions
2734            // as a table source: `(t1 LEFT JOIN t2 ON …)` or comma-list
2735            // `(t1, t2)`. Recurse into the parens, then consume joins /
2736            // commas until the matching `)`. Emit the first source so the
2737            // surrounding query parses; trailing tables are discarded
2738            // (their predicates were already parsed into the JOIN node we
2739            // throw away — acceptance only).
2740            if self.peek_type() == &TokenType::LParen {
2741                let inner_saved = self.pos;
2742                self.advance();
2743                let after_lparen = self.pos;
2744                if let Ok(inner) = self.parse_table_source() {
2745                    let _ = self.parse_joins();
2746                    while self.match_token(TokenType::Comma) {
2747                        if self.parse_table_source().is_err() {
2748                            self.pos = inner_saved;
2749                            // Fall through to the generic parse_table_ref
2750                            // path below, which will surface the original
2751                            // error message.
2752                            break;
2753                        }
2754                        let _ = self.parse_joins();
2755                    }
2756                    if self.pos != inner_saved && self.match_token(TokenType::RParen) {
2757                        let (alias, alias_quote_style) = match self.parse_optional_alias()? {
2758                            Some((name, qs)) => (Some(name), qs),
2759                            None => (None, QuoteStyle::None),
2760                        };
2761                        if let Some(name) = alias.clone() {
2762                            if let TableSource::Table(mut tr) = inner {
2763                                tr.alias = Some(name);
2764                                tr.alias_quote_style = alias_quote_style;
2765                                return Ok(TableSource::Table(tr));
2766                            }
2767                        }
2768                        return Ok(inner);
2769                    }
2770                }
2771                // Restore so the caller sees the LParen and emits a useful
2772                // error rather than silently misparsing partial state.
2773                self.pos = inner_saved;
2774                let _ = after_lparen; // suppress unused warning when build optimises
2775            }
2776        }
2777
2778        // Regular table reference (possibly with function syntax)
2779        let table_ref = self.parse_table_ref()?;
2780
2781        // MySQL / TiDB partition selector: `tbl PARTITION (p0, p1)`. Swallow
2782        // it so the table reference parses cleanly.
2783        if matches!(self.peek_type(), TokenType::Partition)
2784            && matches!(
2785                self.peek_offset(1).map(|t| &t.token_type),
2786                Some(TokenType::LParen)
2787            )
2788        {
2789            self.advance();
2790            self.advance();
2791            while !matches!(self.peek_type(), TokenType::RParen | TokenType::Eof) {
2792                self.advance();
2793            }
2794            let _ = self.match_token(TokenType::RParen);
2795        }
2796
2797        // Check if it's actually a table function: name(args...). Also
2798        // accept dotted qualifiers so DuckDB `schema.func(...)` /
2799        // `catalog.schema.func(...)` parse.
2800        if self.peek_type() == &TokenType::LParen {
2801            // SQL/PGQ `GRAPH_TABLE(graph MATCH … COLUMNS (…))`,
2802            // SQL/XML `XMLTABLE('xpath' PASSING expr COLUMNS …)`,
2803            // SQL/JSON `JSON_TABLE(expr, '$' COLUMNS (…))`. Swallow the
2804            // body opaquely so the rest of the query parses.
2805            let fname = table_ref.name.to_uppercase();
2806            if matches!(
2807                fname.as_str(),
2808                "GRAPH_TABLE" | "XMLTABLE" | "JSON_TABLE" | "OPENJSON" | "OPENROWSET" | "OPENXML"
2809            ) {
2810                self.advance();
2811                let mut depth = 1usize;
2812                while depth > 0 && !matches!(self.peek_type(), TokenType::Eof) {
2813                    match self.peek_type() {
2814                        TokenType::LParen => depth += 1,
2815                        TokenType::RParen => {
2816                            depth -= 1;
2817                            if depth == 0 {
2818                                self.advance();
2819                                break;
2820                            }
2821                        }
2822                        _ => {}
2823                    }
2824                    self.advance();
2825                }
2826                let (alias, alias_quote_style) = match self.parse_optional_alias()? {
2827                    Some((name, qs)) => (Some(name), qs),
2828                    None => (None, QuoteStyle::None),
2829                };
2830                if alias.is_some() && self.peek_type() == &TokenType::LParen {
2831                    let saved = self.pos;
2832                    self.advance();
2833                    let mut ok = true;
2834                    loop {
2835                        if !self.is_name_token() {
2836                            ok = false;
2837                            break;
2838                        }
2839                        self.advance();
2840                        if self.match_token(TokenType::RParen) {
2841                            break;
2842                        }
2843                        if !self.match_token(TokenType::Comma) {
2844                            ok = false;
2845                            break;
2846                        }
2847                    }
2848                    if !ok {
2849                        self.pos = saved;
2850                    }
2851                }
2852                return Ok(TableSource::TableFunction {
2853                    name: match (&table_ref.catalog, &table_ref.schema) {
2854                        (Some(c), Some(s)) => format!("{}.{}.{}", c, s, table_ref.name),
2855                        (None, Some(s)) => format!("{}.{}", s, table_ref.name),
2856                        _ => table_ref.name,
2857                    },
2858                    args: vec![],
2859                    alias,
2860                    alias_quote_style,
2861                });
2862            }
2863            self.advance();
2864            // Hive `noop(on tbl partition by ... order by ... )` table-valued
2865            // function. Arguments start with the `ON` keyword and include
2866            // PARTITION/ORDER/CLUSTER/DISTRIBUTE/SORT BY clauses we don't
2867            // model. Swallow the body opaquely.
2868            let args = if matches!(self.peek_type(), TokenType::On) {
2869                let mut depth = 0usize;
2870                while !matches!(self.peek_type(), TokenType::Eof) {
2871                    match self.peek_type() {
2872                        TokenType::LParen => depth += 1,
2873                        TokenType::RParen => {
2874                            if depth == 0 {
2875                                break;
2876                            }
2877                            depth -= 1;
2878                        }
2879                        _ => {}
2880                    }
2881                    self.advance();
2882                }
2883                vec![]
2884            } else if self.peek_type() != &TokenType::RParen {
2885                self.parse_expr_list()?
2886            } else {
2887                vec![]
2888            };
2889            self.expect(TokenType::RParen)?;
2890            let (alias, alias_quote_style) = match self.parse_optional_alias()? {
2891                Some((name, qs)) => (Some(name), qs),
2892                None => (None, QuoteStyle::None),
2893            };
2894            // DuckDB / Postgres positional column-list alias:
2895            //   range(10) t(i)   →   alias = "t", columns = (i)
2896            // We consume the parenthesized list but do not model it in the AST.
2897            if alias.is_some() && self.peek_type() == &TokenType::LParen {
2898                let saved = self.pos;
2899                self.advance();
2900                let mut ok = true;
2901                loop {
2902                    if !self.is_name_token() {
2903                        ok = false;
2904                        break;
2905                    }
2906                    self.advance();
2907                    if self.match_token(TokenType::RParen) {
2908                        break;
2909                    }
2910                    if !self.match_token(TokenType::Comma) {
2911                        ok = false;
2912                        break;
2913                    }
2914                }
2915                if !ok {
2916                    self.pos = saved;
2917                }
2918            }
2919            return Ok(TableSource::TableFunction {
2920                name: match (&table_ref.catalog, &table_ref.schema) {
2921                    (Some(c), Some(s)) => format!("{}.{}.{}", c, s, table_ref.name),
2922                    (None, Some(s)) => format!("{}.{}", s, table_ref.name),
2923                    _ => table_ref.name,
2924                },
2925                args,
2926                alias,
2927                alias_quote_style,
2928            });
2929        }
2930
2931        // Also support positional column-list alias on a plain table reference:
2932        //   FROM tbl t(c1, c2)
2933        if self.peek_type() == &TokenType::LParen && table_ref.alias.is_some() {
2934            let saved = self.pos;
2935            self.advance();
2936            let mut ok = true;
2937            loop {
2938                if !self.is_name_token() {
2939                    ok = false;
2940                    break;
2941                }
2942                self.advance();
2943                if self.match_token(TokenType::RParen) {
2944                    break;
2945                }
2946                if !self.match_token(TokenType::Comma) {
2947                    ok = false;
2948                    break;
2949                }
2950            }
2951            if !ok {
2952                self.pos = saved;
2953            }
2954        }
2955
2956        // MySQL / MariaDB index hints — `USE INDEX (idx)`, `FORCE INDEX (idx)`,
2957        // `IGNORE INDEX (idx)`, optionally with `FOR JOIN|ORDER BY|GROUP BY`.
2958        // Swallow any sequence of these so the rest of the query parses.
2959        loop {
2960            let saved = self.pos;
2961            let is_hint = matches!(self.peek_type(), TokenType::Use | TokenType::Ignore)
2962                || self.check_keyword("FORCE");
2963            if !is_hint {
2964                break;
2965            }
2966            self.advance();
2967            if !self.check_keyword("INDEX") && !self.check_keyword("KEY") {
2968                self.pos = saved;
2969                break;
2970            }
2971            self.advance();
2972            // Optional `FOR JOIN | FOR ORDER BY | FOR GROUP BY`.
2973            if self.match_keyword("FOR") {
2974                if matches!(
2975                    self.peek_type(),
2976                    TokenType::Join | TokenType::Order | TokenType::Group
2977                ) {
2978                    self.advance();
2979                    let _ = self.match_token(TokenType::By);
2980                }
2981            }
2982            if self.match_token(TokenType::LParen) {
2983                let mut depth = 1;
2984                while depth > 0 {
2985                    match self.peek_type() {
2986                        TokenType::LParen => depth += 1,
2987                        TokenType::RParen => {
2988                            depth -= 1;
2989                            if depth == 0 {
2990                                self.advance();
2991                                break;
2992                            }
2993                        }
2994                        TokenType::Eof => break,
2995                        _ => {}
2996                    }
2997                    self.advance();
2998                }
2999            }
3000        }
3001
3002        // ClickHouse `FROM tbl [AS alias] FINAL` — swallow the FINAL modifier.
3003        // The token tokenizes as Identifier so check_keyword is enough.
3004        if self.check_keyword("FINAL") {
3005            self.advance();
3006        }
3007
3008        // MySQL: `FROM t PARTITION (p0[, p1, ...])` — swallow partition
3009        // selector. May appear before or after the alias; we accept it
3010        // here (i.e., before parse_optional_alias has run).
3011        if matches!(self.peek_type(), TokenType::Partition)
3012            && matches!(
3013                self.peek_offset(1).map(|t| &t.token_type),
3014                Some(TokenType::LParen)
3015            )
3016        {
3017            self.advance();
3018            self.advance();
3019            let mut depth = 1;
3020            while depth > 0 && !matches!(self.peek_type(), TokenType::Eof) {
3021                match self.peek_type() {
3022                    TokenType::LParen => depth += 1,
3023                    TokenType::RParen => {
3024                        depth -= 1;
3025                        if depth == 0 {
3026                            self.advance();
3027                            break;
3028                        }
3029                    }
3030                    _ => {}
3031                }
3032                self.advance();
3033            }
3034        }
3035
3036        Ok(TableSource::Table(table_ref))
3037    }
3038
3039    /// After parsing a base table source, check if PIVOT or UNPIVOT follows.
3040    fn parse_pivot_or_unpivot(&mut self, source: TableSource) -> Result<TableSource> {
3041        if self.match_token(TokenType::Pivot) {
3042            self.expect(TokenType::LParen)?;
3043            let aggregate = self.parse_expr()?;
3044            // Snowflake / Databricks: optional `AS <alias>` on the aggregate
3045            // expression: `PIVOT (sum(sales) AS sales FOR …)`.
3046            if self.peek_type() == &TokenType::As
3047                && self
3048                    .peek_offset(1)
3049                    .map(|t| {
3050                        matches!(
3051                            t.token_type,
3052                            TokenType::Identifier | TokenType::String | TokenType::Number
3053                        )
3054                    })
3055                    .unwrap_or(false)
3056            {
3057                self.advance();
3058                self.advance();
3059            }
3060            // Multi-aggregate PIVOT: `PIVOT (SUM(x), COUNT(x) FOR …)`. Drop
3061            // the extra aggregates — we only keep the first one in the AST.
3062            while self.match_token(TokenType::Comma) {
3063                let _ = self.parse_expr()?;
3064                if self.peek_type() == &TokenType::As
3065                    && self
3066                        .peek_offset(1)
3067                        .map(|t| {
3068                            matches!(
3069                                t.token_type,
3070                                TokenType::Identifier | TokenType::String | TokenType::Number
3071                            )
3072                        })
3073                        .unwrap_or(false)
3074                {
3075                    self.advance();
3076                    self.advance();
3077                }
3078            }
3079            self.expect_keyword("FOR")?;
3080            // Snowflake `FOR (col1, col2) IN …` — grouped pivot key. Use the
3081            // first column name as the AST's for_column.
3082            let for_column = if self.peek_type() == &TokenType::LParen {
3083                self.advance();
3084                let first = self.expect_name()?;
3085                while self.match_token(TokenType::Comma) {
3086                    let _ = self.expect_name()?;
3087                }
3088                self.expect(TokenType::RParen)?;
3089                first
3090            } else {
3091                self.expect_name()?
3092            };
3093            self.expect(TokenType::In)?;
3094            self.expect(TokenType::LParen)?;
3095            let in_values = self.parse_pivot_values()?;
3096            self.expect(TokenType::RParen)?;
3097            self.expect(TokenType::RParen)?;
3098            let (alias, alias_quote_style) = match self.parse_optional_alias()? {
3099                Some((name, qs)) => (Some(name), qs),
3100                None => (None, QuoteStyle::None),
3101            };
3102            return Ok(TableSource::Pivot {
3103                source: Box::new(source),
3104                aggregate: Box::new(aggregate),
3105                for_column,
3106                in_values,
3107                alias,
3108                alias_quote_style,
3109            });
3110        }
3111        if self.match_token(TokenType::Unpivot) {
3112            // BigQuery: `UNPIVOT INCLUDE|EXCLUDE NULLS (...)`.
3113            if self.check_keyword("INCLUDE") || self.check_keyword("EXCLUDE") {
3114                let saved = self.pos;
3115                self.advance();
3116                if !self.match_keyword("NULLS") {
3117                    self.pos = saved;
3118                }
3119            }
3120            self.expect(TokenType::LParen)?;
3121            // Snowflake/DuckDB allow a grouped value-column tuple:
3122            // `UNPIVOT ((col1, col2) FOR period IN (...))`. Swallow the
3123            // grouping parens — we only model a single value-column name.
3124            let value_column = if self.peek_type() == &TokenType::LParen {
3125                self.advance();
3126                let first = self.expect_name()?;
3127                while self.match_token(TokenType::Comma) {
3128                    let _ = self.expect_name()?;
3129                }
3130                self.expect(TokenType::RParen)?;
3131                first
3132            } else {
3133                self.expect_name()?
3134            };
3135            self.expect_keyword("FOR")?;
3136            let for_column = self.expect_name()?;
3137            self.expect(TokenType::In)?;
3138            self.expect(TokenType::LParen)?;
3139            let in_columns = self.parse_pivot_values()?;
3140            self.expect(TokenType::RParen)?;
3141            self.expect(TokenType::RParen)?;
3142            let (alias, alias_quote_style) = match self.parse_optional_alias()? {
3143                Some((name, qs)) => (Some(name), qs),
3144                None => (None, QuoteStyle::None),
3145            };
3146            return Ok(TableSource::Unpivot {
3147                source: Box::new(source),
3148                value_column,
3149                for_column,
3150                in_columns,
3151                alias,
3152                alias_quote_style,
3153            });
3154        }
3155        Ok(source)
3156    }
3157
3158    /// Parse comma-separated pivot values, each optionally aliased.
3159    fn parse_pivot_values(&mut self) -> Result<Vec<PivotValue>> {
3160        let mut values = Vec::new();
3161        loop {
3162            let value = self.parse_expr()?;
3163            // Snowflake / BigQuery permit string or numeric aliases on pivot
3164            // values: `(a, b) AS 'semester_1'` / `(a, b) AS 1`. Accept those
3165            // alongside the regular identifier alias.
3166            let (alias, alias_quote_style) = if self.match_token(TokenType::As)
3167                && matches!(self.peek_type(), TokenType::String | TokenType::Number)
3168            {
3169                let tok = self.advance().clone();
3170                (Some(tok.value), QuoteStyle::None)
3171            } else {
3172                match self.parse_optional_alias()? {
3173                    Some((name, qs)) => (Some(name), qs),
3174                    None => (None, QuoteStyle::None),
3175                }
3176            };
3177            values.push(PivotValue {
3178                value,
3179                alias,
3180                alias_quote_style,
3181            });
3182            if !self.match_token(TokenType::Comma) {
3183                break;
3184            }
3185        }
3186        Ok(values)
3187    }
3188
3189    fn parse_table_ref(&mut self) -> Result<TableRef> {
3190        // T-SQL table variable: `FROM @t` / `INTO @t` etc. The @ is its own
3191        // token; fuse with the following name into a single identifier.
3192        if matches!(self.peek_type(), TokenType::AtSign)
3193            && self
3194                .peek_offset(1)
3195                .map(|t| {
3196                    matches!(t.token_type, TokenType::Identifier)
3197                        || matches!(t.token_type, TokenType::AtSign)
3198                })
3199                .unwrap_or(false)
3200        {
3201            let mut name = String::from("@");
3202            self.advance();
3203            if matches!(self.peek_type(), TokenType::AtSign) {
3204                name.push('@');
3205                self.advance();
3206            }
3207            let n = self.advance().clone();
3208            name.push_str(&n.value);
3209            let (alias, alias_quote_style) = match self.parse_optional_alias()? {
3210                Some((a, qs)) => (Some(a), qs),
3211                None => (None, QuoteStyle::None),
3212            };
3213            return Ok(TableRef {
3214                catalog: None,
3215                schema: None,
3216                name,
3217                alias,
3218                name_quote_style: QuoteStyle::None,
3219                alias_quote_style,
3220            });
3221        }
3222        let (first, first_qs) = self.expect_name_with_quote()?;
3223
3224        // Check for schema.table or catalog.schema.table. We also tolerate 4+
3225        // part qualified names (DuckDB / SQL Server `srv.db.sch.tbl`) by
3226        // folding additional segments into the catalog field.
3227        let (catalog, schema, name, name_qs) = if self.match_token(TokenType::Dot) {
3228            let (second, second_qs) = self.expect_name_with_quote()?;
3229            if self.match_token(TokenType::Dot) {
3230                let (mut third, mut third_qs) = self.expect_name_with_quote()?;
3231                let mut catalog = first;
3232                let mut schema = second;
3233                while self.match_token(TokenType::Dot) {
3234                    let (next, next_qs) = self.expect_name_with_quote()?;
3235                    catalog.push('.');
3236                    catalog.push_str(&schema);
3237                    schema = third;
3238                    third = next;
3239                    third_qs = next_qs;
3240                }
3241                (Some(catalog), Some(schema), third, third_qs)
3242            } else {
3243                (None, Some(first), second, second_qs)
3244            }
3245        } else {
3246            (None, None, first, first_qs)
3247        };
3248
3249        let (alias, alias_quote_style) = match self.parse_optional_alias()? {
3250            Some((name, qs)) => (Some(name), qs),
3251            None => (None, QuoteStyle::None),
3252        };
3253
3254        Ok(TableRef {
3255            catalog,
3256            schema,
3257            name,
3258            alias,
3259            name_quote_style: name_qs,
3260            alias_quote_style,
3261        })
3262    }
3263
3264    /// Like `parse_table_ref` but does not consume an alias.
3265    fn parse_table_ref_no_alias(&mut self) -> Result<TableRef> {
3266        let (first, first_qs) = self.expect_name_with_quote()?;
3267
3268        let (catalog, schema, name, name_qs) = if self.match_token(TokenType::Dot) {
3269            let (second, second_qs) = self.expect_name_with_quote()?;
3270            if self.match_token(TokenType::Dot) {
3271                let (mut third, mut third_qs) = self.expect_name_with_quote()?;
3272                let mut catalog = first;
3273                let mut schema = second;
3274                while self.match_token(TokenType::Dot) {
3275                    let (next, next_qs) = self.expect_name_with_quote()?;
3276                    catalog.push('.');
3277                    catalog.push_str(&schema);
3278                    schema = third;
3279                    third = next;
3280                    third_qs = next_qs;
3281                }
3282                (Some(catalog), Some(schema), third, third_qs)
3283            } else {
3284                (None, Some(first), second, second_qs)
3285            }
3286        } else {
3287            (None, None, first, first_qs)
3288        };
3289
3290        Ok(TableRef {
3291            catalog,
3292            schema,
3293            name,
3294            alias: None,
3295            name_quote_style: name_qs,
3296            alias_quote_style: QuoteStyle::None,
3297        })
3298    }
3299
3300    fn parse_joins(&mut self) -> Result<Vec<JoinClause>> {
3301        let mut joins = Vec::new();
3302        loop {
3303            // Hive `LATERAL VIEW [OUTER] func(args) tbl_alias [AS col, ...]`.
3304            // Model as a CROSS JOIN over a table-function so the rest of the
3305            // query parses; the AS column list is dropped.
3306            if self.peek_type() == &TokenType::Lateral
3307                && self
3308                    .peek_offset(1)
3309                    .map(|t| t.value.eq_ignore_ascii_case("VIEW"))
3310                    .unwrap_or(false)
3311            {
3312                self.advance(); // LATERAL
3313                self.advance(); // VIEW
3314                let _outer = self.check_keyword("OUTER") && {
3315                    self.advance();
3316                    true
3317                };
3318                // func(args) — parse name and arg list
3319                let fname = self.expect_name().unwrap_or_default();
3320                let mut fargs = Vec::new();
3321                if self.match_token(TokenType::LParen) {
3322                    if self.peek_type() != &TokenType::RParen {
3323                        fargs.push(self.parse_expr()?);
3324                        while self.match_token(TokenType::Comma) {
3325                            fargs.push(self.parse_expr()?);
3326                        }
3327                    }
3328                    self.expect(TokenType::RParen)?;
3329                }
3330                let (alias, alias_quote_style) = match self.parse_optional_alias()? {
3331                    Some((name, qs)) => (Some(name), qs),
3332                    None => (None, QuoteStyle::None),
3333                };
3334                // Optional `[AS] col1[, col2, ...]` column list. Hive
3335                // allows the AS to be omitted entirely; Spark sometimes
3336                // emits `tbl_name col`. Consume names while we keep seeing
3337                // identifier-then-comma pairs.
3338                let _ = self.match_token(TokenType::As);
3339                if self.is_name_token() {
3340                    self.advance();
3341                    while self.match_token(TokenType::Comma) {
3342                        if !self.is_name_token() {
3343                            break;
3344                        }
3345                        self.advance();
3346                    }
3347                }
3348                joins.push(JoinClause {
3349                    join_type: JoinType::Cross,
3350                    table: TableSource::TableFunction {
3351                        name: fname,
3352                        args: fargs,
3353                        alias,
3354                        alias_quote_style,
3355                    },
3356                    on: None,
3357                    using: Vec::new(),
3358                });
3359                continue;
3360            }
3361            // ClickHouse: ARRAY JOIN / LEFT ARRAY JOIN — flatten arrays as join source.
3362            // We model it as a CROSS JOIN over the array expression.
3363            let saved_array = self.pos;
3364            let _left_array = self.match_token(TokenType::Left);
3365            if self.match_token(TokenType::Array) && self.match_token(TokenType::Join) {
3366                // parse the array expression(s) — comma-separated
3367                let mut sources = Vec::new();
3368                loop {
3369                    // ClickHouse permits inline array literals as the source:
3370                    //   ARRAY JOIN [1,2,3] AS x, [(...), (...)] AS y
3371                    // Wrap as Unnest so we don't reject the syntax.
3372                    let src = if matches!(self.peek_type(), TokenType::LBracket) {
3373                        let arr = self.parse_primary()?;
3374                        let (alias, alias_quote_style) = match self.parse_optional_alias()? {
3375                            Some((name, qs)) => (Some(name), qs),
3376                            None => (None, QuoteStyle::None),
3377                        };
3378                        TableSource::Unnest {
3379                            expr: Box::new(arr),
3380                            alias,
3381                            alias_quote_style,
3382                            with_offset: false,
3383                        }
3384                    } else {
3385                        self.parse_table_source()?
3386                    };
3387                    sources.push(src);
3388                    if !self.match_token(TokenType::Comma) {
3389                        break;
3390                    }
3391                }
3392                for src in sources {
3393                    joins.push(JoinClause {
3394                        join_type: JoinType::Cross,
3395                        table: src,
3396                        on: None,
3397                        using: Vec::new(),
3398                    });
3399                }
3400                continue;
3401            } else {
3402                self.pos = saved_array;
3403            }
3404            // ClickHouse / Hive join strictness modifiers — consume and drop:
3405            //   GLOBAL? ALL | ANY | SEMI | ANTI | ASOF [LEFT|RIGHT|INNER|OUTER] JOIN
3406            let saved_strictness = self.pos;
3407            let _global_prefix = self.check_keyword("GLOBAL") && {
3408                self.advance();
3409                true
3410            };
3411            let consumed_strictness = if self.match_token(TokenType::All) {
3412                true
3413            } else if self.match_token(TokenType::Any) {
3414                true
3415            } else if self.check_keyword("SEMI")
3416                || self.check_keyword("ANTI")
3417                || self.check_keyword("ASOF")
3418                || self.check_keyword("PASTE")
3419            {
3420                self.advance();
3421                // DuckDB / ClickHouse allow compound forms like
3422                // `ASOF ANTI JOIN` / `ASOF SEMI JOIN` — absorb a
3423                // following second strictness keyword too.
3424                if self.check_keyword("SEMI")
3425                    || self.check_keyword("ANTI")
3426                    || self.check_keyword("ASOF")
3427                {
3428                    self.advance();
3429                }
3430                true
3431            } else {
3432                _global_prefix
3433            };
3434            // If the strictness modifier wasn't followed by a join keyword,
3435            // rewind so we don't accidentally consume a stray ALL/ANY (e.g.
3436            // `ORDER BY ALL`).
3437            if consumed_strictness
3438                && !matches!(
3439                    self.peek_type(),
3440                    TokenType::Join
3441                        | TokenType::Inner
3442                        | TokenType::Left
3443                        | TokenType::Right
3444                        | TokenType::Full
3445                        | TokenType::Cross
3446                )
3447            {
3448                self.pos = saved_strictness;
3449            }
3450            let join_type = match self.peek_type() {
3451                // `FROM a, b` is treated as `FROM a CROSS JOIN b`. Note the
3452                // SQL standard gives comma a lower precedence than explicit
3453                // JOIN operators (so `FROM a, b JOIN c ON ...` should be
3454                // `a CROSS JOIN (b JOIN c ...)`), but we flatten everything
3455                // into a left-deep chain. Column resolution still works for
3456                // the common cases since the join order is associative when
3457                // ON-clauses only reference adjacent tables.
3458                TokenType::Comma => {
3459                    self.advance();
3460                    JoinType::Cross
3461                }
3462                // `NATURAL [LEFT|RIGHT|FULL [OUTER]] JOIN tbl` — auto-equi-join
3463                // on shared column names. We don't model NATURAL semantics yet;
3464                // promote to the corresponding non-natural join type and treat
3465                // the implicit USING clause as empty.
3466                t if matches!(t, TokenType::Identifier)
3467                    && self.peek().value.eq_ignore_ascii_case("NATURAL") =>
3468                {
3469                    self.advance(); // NATURAL
3470                    let jt = match self.peek_type() {
3471                        TokenType::Left => {
3472                            self.advance();
3473                            let _ = self.match_token(TokenType::Outer);
3474                            JoinType::Left
3475                        }
3476                        TokenType::Right => {
3477                            self.advance();
3478                            let _ = self.match_token(TokenType::Outer);
3479                            JoinType::Right
3480                        }
3481                        TokenType::Full => {
3482                            self.advance();
3483                            let _ = self.match_token(TokenType::Outer);
3484                            JoinType::Full
3485                        }
3486                        TokenType::Inner => {
3487                            self.advance();
3488                            JoinType::Inner
3489                        }
3490                        _ => JoinType::Inner,
3491                    };
3492                    self.expect(TokenType::Join)?;
3493                    jt
3494                }
3495                // MySQL `STRAIGHT_JOIN` — non-reordered INNER JOIN.
3496                t if matches!(t, TokenType::Identifier)
3497                    && self.peek().value.eq_ignore_ascii_case("STRAIGHT_JOIN") =>
3498                {
3499                    self.advance();
3500                    JoinType::Inner
3501                }
3502                TokenType::Join => {
3503                    self.advance();
3504                    JoinType::Inner
3505                }
3506                TokenType::Inner => {
3507                    self.advance();
3508                    self.expect(TokenType::Join)?;
3509                    JoinType::Inner
3510                }
3511                TokenType::Left => {
3512                    self.advance();
3513                    let _ = self.match_token(TokenType::Outer);
3514                    // Hive / Spark: LEFT SEMI JOIN / LEFT ANTI JOIN
3515                    let _ = self.check_keyword("SEMI") && {
3516                        self.advance();
3517                        true
3518                    } || self.check_keyword("ANTI") && {
3519                        self.advance();
3520                        true
3521                    };
3522                    // ClickHouse: LEFT ANY|ALL JOIN
3523                    let _ = self.match_token(TokenType::Any) || self.match_token(TokenType::All);
3524                    // Some dialects (Spark/Hive variants) allow a trailing
3525                    // OUTER after the strictness modifier.
3526                    let _ = self.match_token(TokenType::Outer);
3527                    self.expect(TokenType::Join)?;
3528                    JoinType::Left
3529                }
3530                TokenType::Right => {
3531                    self.advance();
3532                    let _ = self.match_token(TokenType::Outer);
3533                    let _ = self.check_keyword("SEMI") && {
3534                        self.advance();
3535                        true
3536                    } || self.check_keyword("ANTI") && {
3537                        self.advance();
3538                        true
3539                    };
3540                    let _ = self.match_token(TokenType::Any) || self.match_token(TokenType::All);
3541                    let _ = self.match_token(TokenType::Outer);
3542                    self.expect(TokenType::Join)?;
3543                    JoinType::Right
3544                }
3545                TokenType::Full => {
3546                    self.advance();
3547                    let _ = self.match_token(TokenType::Outer);
3548                    self.expect(TokenType::Join)?;
3549                    JoinType::Full
3550                }
3551                TokenType::Cross => {
3552                    self.advance();
3553                    // T-SQL `CROSS APPLY <source>` ≈ `CROSS JOIN LATERAL ...`.
3554                    if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("APPLY") {
3555                        self.advance();
3556                        JoinType::Cross
3557                    } else {
3558                        self.expect(TokenType::Join)?;
3559                        JoinType::Cross
3560                    }
3561                }
3562                TokenType::Outer => {
3563                    // T-SQL `OUTER APPLY <source>` ≈ `LEFT JOIN LATERAL ... ON TRUE`.
3564                    self.advance();
3565                    if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("APPLY") {
3566                        self.advance();
3567                        JoinType::Left
3568                    } else {
3569                        break;
3570                    }
3571                }
3572                _ => break,
3573            };
3574
3575            let table = self.parse_table_source()?;
3576            let mut on = None;
3577            let mut using = vec![];
3578
3579            if self.match_token(TokenType::On) {
3580                on = Some(self.parse_expr()?);
3581            } else if self.match_token(TokenType::Using) {
3582                // ClickHouse permits a bare column name without parens:
3583                // `JOIN t USING k`.
3584                if self.match_token(TokenType::LParen) {
3585                    using = vec![self.expect_name()?];
3586                    while self.match_token(TokenType::Comma) {
3587                        using.push(self.expect_name()?);
3588                    }
3589                    self.expect(TokenType::RParen)?;
3590                } else {
3591                    using = vec![self.expect_name()?];
3592                    while self.match_token(TokenType::Comma) {
3593                        if !self.is_name_token() {
3594                            break;
3595                        }
3596                        using.push(self.expect_name()?);
3597                    }
3598                }
3599            }
3600
3601            joins.push(JoinClause {
3602                join_type,
3603                table,
3604                on,
3605                using,
3606            });
3607        }
3608        Ok(joins)
3609    }
3610
3611    fn parse_order_by_items(&mut self) -> Result<Vec<OrderByItem>> {
3612        let mut items = Vec::new();
3613        // DuckDB / Snowflake `ORDER BY ALL` shortcut.
3614        if self.match_token(TokenType::All) {
3615            let ascending = if self.match_token(TokenType::Desc) {
3616                false
3617            } else {
3618                let _ = self.match_token(TokenType::Asc);
3619                true
3620            };
3621            items.push(OrderByItem {
3622                expr: Expr::Wildcard,
3623                ascending,
3624                nulls_first: None,
3625            });
3626            return Ok(items);
3627        }
3628        loop {
3629            // MySQL: `ORDER BY BINARY col [ASC|DESC]` — BINARY here is a
3630            // collation modifier on the sort key. Swallow it; the rest of
3631            // the expression parses normally.
3632            if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("BINARY") {
3633                let saved = self.pos;
3634                self.advance();
3635                // Only consume BINARY when followed by something that can
3636                // start an order-by key (name, literal, paren, etc.); if it
3637                // looks like the end of the list, rewind.
3638                if matches!(
3639                    self.peek_type(),
3640                    TokenType::Comma | TokenType::Semicolon | TokenType::Eof | TokenType::RParen
3641                ) {
3642                    self.pos = saved;
3643                }
3644            }
3645            let expr = self.parse_expr()?;
3646            // ClickHouse: `ORDER BY expr AS alias`. Swallow the alias.
3647            if self.match_token(TokenType::As) && self.is_name_token() {
3648                self.advance();
3649            }
3650            let ascending = if self.match_token(TokenType::Desc) {
3651                false
3652            } else {
3653                let _ = self.match_token(TokenType::Asc);
3654                true
3655            };
3656
3657            let nulls_first = if self.match_token(TokenType::Nulls) {
3658                if self.match_token(TokenType::First) {
3659                    Some(true)
3660                } else {
3661                    self.expect(TokenType::Identifier)?; // LAST
3662                    Some(false)
3663                }
3664            } else {
3665                None
3666            };
3667
3668            items.push(OrderByItem {
3669                expr,
3670                ascending,
3671                nulls_first,
3672            });
3673            if !self.match_token(TokenType::Comma) {
3674                break;
3675            }
3676        }
3677        Ok(items)
3678    }
3679
3680    fn parse_expr_list(&mut self) -> Result<Vec<Expr>> {
3681        let mut exprs = vec![self.parse_expr()?];
3682        while self.match_token(TokenType::Comma) {
3683            // Tolerate a trailing comma — DuckDB / PostgreSQL accept
3684            // `IN ('a', 'b', )` and similar list shapes.
3685            if matches!(self.peek_type(), TokenType::RParen | TokenType::RBracket) {
3686                break;
3687            }
3688            exprs.push(self.parse_expr()?);
3689        }
3690        Ok(exprs)
3691    }
3692
3693    /// Parse a comma-separated expression list where each item may carry an
3694    /// inline alias (`expr AS name` or `expr name`). Used for dialects (notably
3695    /// ClickHouse) that permit aliases inside partition/grouping lists.
3696    fn parse_expr_list_allow_item_alias(&mut self) -> Result<Vec<Expr>> {
3697        let mut exprs = Vec::new();
3698        loop {
3699            exprs.push(self.parse_expr()?);
3700            if self.match_token(TokenType::As) && self.is_name_token() {
3701                self.advance();
3702            }
3703            if !self.match_token(TokenType::Comma) {
3704                break;
3705            }
3706            if matches!(self.peek_type(), TokenType::RParen | TokenType::RBracket) {
3707                break;
3708            }
3709        }
3710        Ok(exprs)
3711    }
3712
3713    /// Parse array-literal elements: comma-separated expressions, each
3714    /// optionally followed by `AS alias` (ClickHouse lets bindings
3715    /// appear inside `[…]`). The closing token is the caller's
3716    /// responsibility.
3717    fn parse_array_items(&mut self, close: TokenType) -> Result<Vec<Expr>> {
3718        if self.peek_type() == &close {
3719            return Ok(vec![]);
3720        }
3721        let mut items = Vec::new();
3722        loop {
3723            let expr = self.parse_expr()?;
3724            if self.match_token(TokenType::As) {
3725                let _ = self.parse_optional_alias();
3726            }
3727            items.push(expr);
3728            if !self.match_token(TokenType::Comma) {
3729                break;
3730            }
3731        }
3732        Ok(items)
3733    }
3734
3735    /// Parse a GROUP BY list, which may contain regular expressions,
3736    /// CUBE(...), ROLLUP(...), and GROUPING SETS(...).
3737    fn parse_group_by_list(&mut self) -> Result<Vec<Expr>> {
3738        // DuckDB / Snowflake `GROUP BY ALL` shortcut — emit a wildcard
3739        // marker so downstream code can recognise it. PostgreSQL also
3740        // allows `GROUP BY ALL <col>, <col>` (treated identically to a
3741        // regular GROUP BY list); fall through to the normal parser when
3742        // the next token is a column expression rather than a clause
3743        // terminator.
3744        if self.match_token(TokenType::All) {
3745            let terminates = matches!(
3746                self.peek_type(),
3747                TokenType::Comma
3748                    | TokenType::Semicolon
3749                    | TokenType::Eof
3750                    | TokenType::RParen
3751                    | TokenType::Having
3752                    | TokenType::Order
3753                    | TokenType::Limit
3754                    | TokenType::Offset
3755                    | TokenType::Window
3756                    | TokenType::Union
3757                    | TokenType::Intersect
3758                    | TokenType::Except
3759                    | TokenType::Qualify
3760            );
3761            if terminates {
3762                return Ok(vec![Expr::Wildcard]);
3763            }
3764            // Followed by a real grouping expression — fall through.
3765        }
3766        let mut items = vec![self.parse_group_by_item()?];
3767        // ClickHouse: `GROUP BY col AS alias [, …]` — swallow alias.
3768        if self.match_token(TokenType::As) && self.is_name_token() {
3769            self.advance();
3770        }
3771        // MySQL: `GROUP BY col ASC|DESC [, …]` — swallow direction.
3772        let _ = self.match_token(TokenType::Asc) || self.match_token(TokenType::Desc);
3773        while self.match_token(TokenType::Comma) {
3774            items.push(self.parse_group_by_item()?);
3775            if self.match_token(TokenType::As) && self.is_name_token() {
3776                self.advance();
3777            }
3778            let _ = self.match_token(TokenType::Asc) || self.match_token(TokenType::Desc);
3779        }
3780        Ok(items)
3781    }
3782
3783    /// Parse a single GROUP BY item: a CUBE, ROLLUP, GROUPING SETS, or regular expression.
3784    fn parse_group_by_item(&mut self) -> Result<Expr> {
3785        match self.peek_type() {
3786            TokenType::Cube => {
3787                self.advance();
3788                self.expect(TokenType::LParen)?;
3789                let exprs = if self.peek_type() == &TokenType::RParen {
3790                    vec![]
3791                } else {
3792                    self.parse_group_by_element_list()?
3793                };
3794                self.expect(TokenType::RParen)?;
3795                Ok(Expr::Cube { exprs })
3796            }
3797            TokenType::Rollup => {
3798                self.advance();
3799                self.expect(TokenType::LParen)?;
3800                let exprs = if self.peek_type() == &TokenType::RParen {
3801                    vec![]
3802                } else {
3803                    self.parse_group_by_element_list()?
3804                };
3805                self.expect(TokenType::RParen)?;
3806                Ok(Expr::Rollup { exprs })
3807            }
3808            TokenType::Grouping => {
3809                // Could be GROUPING SETS or GROUPING() function
3810                let saved = self.pos;
3811                self.advance();
3812                if self.peek_type() == &TokenType::Sets {
3813                    // GROUPING SETS (...)
3814                    self.advance();
3815                    self.expect(TokenType::LParen)?;
3816                    let sets = self.parse_grouping_sets_elements()?;
3817                    self.expect(TokenType::RParen)?;
3818                    Ok(Expr::GroupingSets { sets })
3819                } else {
3820                    // It's the GROUPING() function, backtrack and parse as expression
3821                    self.pos = saved;
3822                    self.parse_expr()
3823                }
3824            }
3825            _ => self.parse_expr(),
3826        }
3827    }
3828
3829    /// Parse elements inside CUBE(...) or ROLLUP(...).
3830    /// Each element can be a single expression or a parenthesized tuple of expressions.
3831    fn parse_group_by_element_list(&mut self) -> Result<Vec<Expr>> {
3832        let mut items = vec![self.parse_group_by_element()?];
3833        while self.match_token(TokenType::Comma) {
3834            items.push(self.parse_group_by_element()?);
3835        }
3836        Ok(items)
3837    }
3838
3839    /// Parse a single element inside CUBE/ROLLUP: either `expr` or `(expr, expr, ...)`.
3840    fn parse_group_by_element(&mut self) -> Result<Expr> {
3841        if self.peek_type() == &TokenType::LParen {
3842            self.advance();
3843            let exprs = self.parse_expr_list()?;
3844            self.expect(TokenType::RParen)?;
3845            if exprs.len() == 1 {
3846                Ok(Expr::Nested(Box::new(exprs.into_iter().next().unwrap())))
3847            } else {
3848                Ok(Expr::Tuple(exprs))
3849            }
3850        } else {
3851            let e = self.parse_expr()?;
3852            // ClickHouse: `GROUP BY expr AS alias`. Swallow the alias.
3853            if self.match_token(TokenType::As) && self.is_name_token() {
3854                self.advance();
3855            }
3856            Ok(e)
3857        }
3858    }
3859
3860    /// Parse elements inside GROUPING SETS (...).
3861    /// Each element can be: (), (expr, ...), CUBE(...), ROLLUP(...), or a single expr.
3862    fn parse_grouping_sets_elements(&mut self) -> Result<Vec<Expr>> {
3863        let mut items = vec![self.parse_grouping_sets_element()?];
3864        while self.match_token(TokenType::Comma) {
3865            items.push(self.parse_grouping_sets_element()?);
3866        }
3867        Ok(items)
3868    }
3869
3870    /// Parse a single GROUPING SETS element.
3871    fn parse_grouping_sets_element(&mut self) -> Result<Expr> {
3872        match self.peek_type() {
3873            TokenType::Cube => {
3874                self.advance();
3875                self.expect(TokenType::LParen)?;
3876                let exprs = if self.peek_type() == &TokenType::RParen {
3877                    vec![]
3878                } else {
3879                    self.parse_group_by_element_list()?
3880                };
3881                self.expect(TokenType::RParen)?;
3882                Ok(Expr::Cube { exprs })
3883            }
3884            TokenType::Rollup => {
3885                self.advance();
3886                self.expect(TokenType::LParen)?;
3887                let exprs = if self.peek_type() == &TokenType::RParen {
3888                    vec![]
3889                } else {
3890                    self.parse_group_by_element_list()?
3891                };
3892                self.expect(TokenType::RParen)?;
3893                Ok(Expr::Rollup { exprs })
3894            }
3895            TokenType::LParen => {
3896                self.advance();
3897                if self.peek_type() == &TokenType::RParen {
3898                    // Empty grouping set: ()
3899                    self.advance();
3900                    Ok(Expr::Tuple(vec![]))
3901                } else {
3902                    let exprs = self.parse_expr_list()?;
3903                    self.expect(TokenType::RParen)?;
3904                    if exprs.len() == 1 {
3905                        Ok(Expr::Nested(Box::new(exprs.into_iter().next().unwrap())))
3906                    } else {
3907                        Ok(Expr::Tuple(exprs))
3908                    }
3909                }
3910            }
3911            _ => self.parse_expr(),
3912        }
3913    }
3914
3915    // ── INSERT ──────────────────────────────────────────────────────
3916
3917    fn parse_insert(&mut self) -> Result<InsertStatement> {
3918        // Accept MySQL `REPLACE INTO ...` as a synonym for `INSERT INTO ...`.
3919        if !self.match_token(TokenType::Insert) {
3920            self.expect(TokenType::Replace)?;
3921        }
3922        // SQLite / DuckDB conflict-resolution prefix:
3923        //   `INSERT OR REPLACE|IGNORE|FAIL|ABORT|ROLLBACK INTO ...`.
3924        // Swallow opaquely; we don't model conflict resolution at the
3925        // statement level (ON CONFLICT covers most cases downstream).
3926        if self.match_token(TokenType::Or) {
3927            if self.match_token(TokenType::Replace) {
3928                // matched
3929            } else if self.match_token(TokenType::Ignore) {
3930                // matched
3931            } else if self.is_name_token() {
3932                let v = self.peek().value.to_uppercase();
3933                if matches!(v.as_str(), "FAIL" | "ABORT" | "ROLLBACK") {
3934                    self.advance();
3935                }
3936            }
3937        }
3938        // MySQL modifiers between INSERT/REPLACE and INTO:
3939        //   `INSERT LOW_PRIORITY|DELAYED|HIGH_PRIORITY [IGNORE] INTO ...`,
3940        //   `INSERT IGNORE INTO ...`. Swallow them so the rest parses.
3941        loop {
3942            if self.match_token(TokenType::Ignore) {
3943                continue;
3944            }
3945            if self.is_name_token() {
3946                let v = self.peek().value.to_uppercase();
3947                if matches!(v.as_str(), "LOW_PRIORITY" | "DELAYED" | "HIGH_PRIORITY") {
3948                    self.advance();
3949                    continue;
3950                }
3951            }
3952            break;
3953        }
3954        let _ = self.match_token(TokenType::Into);
3955        // Hive: `INSERT OVERWRITE [LOCAL] DIRECTORY '/path'` or
3956        // `INSERT OVERWRITE TABLE tbl ...`. Consume OVERWRITE (tokenized as
3957        // an identifier) and any DIRECTORY clause that follows.
3958        if self.check_keyword("OVERWRITE") {
3959            self.advance();
3960            if self.check_keyword("LOCAL") {
3961                self.advance();
3962            }
3963            if self.check_keyword("DIRECTORY") {
3964                self.advance();
3965                // Consume `'path'` (string) and any STORED AS / ROW FORMAT
3966                // clauses until we hit SELECT/WITH/LParen/VALUES/EOF.
3967                if matches!(self.peek_type(), TokenType::String) {
3968                    self.advance();
3969                }
3970                while !matches!(
3971                    self.peek_type(),
3972                    TokenType::Select
3973                        | TokenType::With
3974                        | TokenType::LParen
3975                        | TokenType::Values
3976                        | TokenType::Eof
3977                        | TokenType::Semicolon
3978                ) {
3979                    self.advance();
3980                }
3981            }
3982        }
3983        // Hive: `INSERT INTO TABLE tbl ...` and `INSERT OVERWRITE TABLE tbl ...`.
3984        let _ = self.match_token(TokenType::Table);
3985        let table = self.parse_table_ref()?;
3986
3987        // Hive `PARTITION (k=v, ...)` between table and column list / source.
3988        if self.peek_type() == &TokenType::Partition {
3989            self.advance();
3990            if self.match_token(TokenType::LParen) {
3991                let mut depth = 1;
3992                while depth > 0 {
3993                    match self.peek_type() {
3994                        TokenType::LParen => depth += 1,
3995                        TokenType::RParen => depth -= 1,
3996                        TokenType::Eof => break,
3997                        _ => {}
3998                    }
3999                    if depth == 0 {
4000                        self.advance();
4001                        break;
4002                    }
4003                    self.advance();
4004                }
4005            }
4006        }
4007
4008        let columns = if self.match_token(TokenType::LParen) {
4009            // BigQuery / SQLFluff fixture: `INSERT INTO t (SELECT ... )` —
4010            // no column list, the parenthesized SELECT is the source.
4011            // Rewind to the `(` and let the source dispatch handle it.
4012            if matches!(self.peek_type(), TokenType::Select | TokenType::With) {
4013                self.pos -= 1;
4014                Vec::new()
4015            } else {
4016                // ClickHouse `INSERT INTO t (COLUMNS('.*') EXCEPT (...))` — when
4017                // the list contains a function call or anything other than plain
4018                // identifiers, fall back to a balanced-paren swallow.
4019                let saved = self.pos;
4020                let try_simple: Result<Vec<String>> = (|| {
4021                    let mut cols = vec![self.parse_dotted_name()?];
4022                    while self.match_token(TokenType::Comma) {
4023                        cols.push(self.parse_dotted_name()?);
4024                    }
4025                    self.expect(TokenType::RParen)?;
4026                    Ok(cols)
4027                })();
4028                match try_simple {
4029                    Ok(c) => c,
4030                    Err(_) => {
4031                        self.pos = saved;
4032                        let mut depth = 1_i32;
4033                        while depth > 0 && self.peek_type() != &TokenType::Eof {
4034                            match self.peek_type() {
4035                                TokenType::LParen => depth += 1,
4036                                TokenType::RParen => depth -= 1,
4037                                _ => {}
4038                            }
4039                            self.advance();
4040                        }
4041                        Vec::new()
4042                    }
4043                }
4044            }
4045        } else {
4046            vec![]
4047        };
4048
4049        // ClickHouse `INSERT INTO t [(cols)] SETTINGS k=v[, …] VALUES …`.
4050        // Swallow the SETTINGS clause before the source clause so the
4051        // surrounding parse completes.
4052        if self.check_keyword("SETTINGS") {
4053            self.advance();
4054            loop {
4055                if !self.is_name_token() {
4056                    break;
4057                }
4058                self.advance(); // key
4059                if !self.match_token(TokenType::Eq) {
4060                    break;
4061                }
4062                // value: number / string / identifier / unary-signed number
4063                let _ = self.match_token(TokenType::Minus) || self.match_token(TokenType::Plus);
4064                if matches!(self.peek_type(), TokenType::Number | TokenType::String)
4065                    || self.is_name_token()
4066                {
4067                    self.advance();
4068                }
4069                if !self.match_token(TokenType::Comma) {
4070                    break;
4071                }
4072            }
4073        }
4074
4075        let source = if self.match_token(TokenType::Values) || self.match_keyword("VALUE") {
4076            let mut rows = Vec::new();
4077            loop {
4078                self.expect(TokenType::LParen)?;
4079                // MySQL allows `VALUES ()` as an empty row to insert all
4080                // defaults — accept and emit as an empty row.
4081                let row = if self.peek_type() == &TokenType::RParen {
4082                    Vec::new()
4083                } else {
4084                    self.parse_expr_list()?
4085                };
4086                self.expect(TokenType::RParen)?;
4087                rows.push(row);
4088                // ClickHouse permits comma-less rows: `VALUES (1)(2)(3)`.
4089                if self.peek_type() == &TokenType::LParen {
4090                    continue;
4091                }
4092                if !self.match_token(TokenType::Comma) {
4093                    break;
4094                }
4095                // Trailing comma: `VALUES (1,2), (3,4),` — DuckDB / sqlfluff
4096                // fixture truncation. Accept and stop the row loop.
4097                if !matches!(self.peek_type(), TokenType::LParen) {
4098                    break;
4099                }
4100            }
4101            InsertSource::Values(rows)
4102        } else if matches!(
4103            self.peek_type(),
4104            TokenType::Select | TokenType::With | TokenType::LParen
4105        ) {
4106            InsertSource::Query(Box::new(self.parse_statement_inner()?))
4107        } else if self.match_token(TokenType::Default) {
4108            self.expect(TokenType::Values)?;
4109            InsertSource::Default
4110        } else if self.match_token(TokenType::Set) {
4111            // MySQL `INSERT INTO t SET col = val, col = val, ...`.
4112            // Collapse into a single-row VALUES placeholder by collecting
4113            // the right-hand expressions; column names are dropped.
4114            let mut row = Vec::new();
4115            loop {
4116                let _ = self.expect_name()?;
4117                self.expect(TokenType::Eq)?;
4118                row.push(self.parse_expr()?);
4119                if !self.match_token(TokenType::Comma) {
4120                    break;
4121                }
4122            }
4123            InsertSource::Values(vec![row])
4124        } else if self.peek_type() == &TokenType::From {
4125            // DuckDB `INSERT INTO t FROM source` shorthand for
4126            // `INSERT INTO t SELECT * FROM source`. Synthesize a SELECT *
4127            // statement so the existing query path handles it.
4128            self.advance();
4129            let from = Some(FromClause {
4130                source: self.parse_table_source()?,
4131            });
4132            let joins = self.parse_joins()?;
4133            let stmt = Statement::Select(SelectStatement {
4134                comments: vec![],
4135                ctes: vec![],
4136                distinct: false,
4137                top: None,
4138                columns: vec![SelectItem::Wildcard],
4139                from,
4140                joins,
4141                where_clause: None,
4142                group_by: vec![],
4143                having: None,
4144                order_by: vec![],
4145                limit: None,
4146                offset: None,
4147                fetch_first: None,
4148                qualify: None,
4149                window_definitions: vec![],
4150            });
4151            InsertSource::Query(Box::new(stmt))
4152        } else if self.peek().value.eq_ignore_ascii_case("FORMAT") {
4153            // ClickHouse `INSERT INTO t FORMAT name <raw payload>`.
4154            // Swallow the format name and the remainder of the statement
4155            // as opaque bytes; we cannot parse JSONEachRow / TabSeparated
4156            // payloads, but we should not reject the statement.
4157            self.advance();
4158            let _ = self.expect_name();
4159            while !matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon) {
4160                self.advance();
4161            }
4162            InsertSource::Default
4163        } else {
4164            return Err(SqlglotError::ParserError {
4165                message: "Expected VALUES, SELECT, or DEFAULT VALUES after INSERT".into(),
4166            });
4167        };
4168
4169        // MySQL 8.0.19+ row alias: `INSERT INTO t (cols) VALUES (...) AS
4170        // alias [(col_alias, ...)] ON DUPLICATE KEY UPDATE ...`. Swallow
4171        // the alias so the ON DUPLICATE clause parses.
4172        if self.peek_type() == &TokenType::As
4173            && self
4174                .peek_offset(1)
4175                .map(|t| {
4176                    matches!(
4177                        t.token_type,
4178                        TokenType::Identifier
4179                            | TokenType::Key
4180                            | TokenType::Year
4181                            | TokenType::Month
4182                            | TokenType::Day
4183                            | TokenType::Hour
4184                            | TokenType::Minute
4185                            | TokenType::Second
4186                    ) || t
4187                        .value
4188                        .chars()
4189                        .next()
4190                        .is_some_and(|c| c.is_alphabetic() || c == '_')
4191                })
4192                .unwrap_or(false)
4193        {
4194            self.advance(); // AS
4195            self.advance(); // alias name
4196            if self.match_token(TokenType::LParen) {
4197                let mut depth = 1_i32;
4198                while depth > 0 && !matches!(self.peek_type(), TokenType::Eof) {
4199                    match self.peek_type() {
4200                        TokenType::LParen => depth += 1,
4201                        TokenType::RParen => depth -= 1,
4202                        _ => {}
4203                    }
4204                    self.advance();
4205                }
4206            }
4207        }
4208
4209        // MySQL `ON DUPLICATE KEY UPDATE col=val, ...`. Swallow the clause.
4210        if self.peek_type() == &TokenType::On
4211            && self
4212                .peek_offset(1)
4213                .map(|t| t.value.eq_ignore_ascii_case("DUPLICATE"))
4214                .unwrap_or(false)
4215        {
4216            self.advance();
4217            self.advance();
4218            // KEY UPDATE
4219            if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("KEY") {
4220                self.advance();
4221            }
4222            if self.match_token(TokenType::Update) {
4223                // assignments until end-of-statement
4224                loop {
4225                    let _ = self.expect_name();
4226                    if !self.match_token(TokenType::Eq) {
4227                        break;
4228                    }
4229                    let _ = self.parse_expr();
4230                    if !self.match_token(TokenType::Comma) {
4231                        break;
4232                    }
4233                }
4234            }
4235        }
4236
4237        // ON CONFLICT
4238        let on_conflict = if self.match_token(TokenType::On) {
4239            if self.match_token(TokenType::Conflict) {
4240                let columns = if self.match_token(TokenType::LParen) {
4241                    self.parse_parenthesized_raw_items()?
4242                } else {
4243                    vec![]
4244                };
4245                self.expect(TokenType::Do)?;
4246                let action = if self.match_token(TokenType::Nothing) {
4247                    ConflictAction::DoNothing
4248                } else {
4249                    self.expect(TokenType::Update)?;
4250                    self.expect(TokenType::Set)?;
4251                    let mut assignments = Vec::new();
4252                    loop {
4253                        let col = self.expect_name()?;
4254                        self.expect(TokenType::Eq)?;
4255                        let val = self.parse_expr()?;
4256                        assignments.push((col, val));
4257                        if !self.match_token(TokenType::Comma) {
4258                            break;
4259                        }
4260                    }
4261                    ConflictAction::DoUpdate(assignments)
4262                };
4263                // Postgres / DuckDB allow `ON CONFLICT (...) DO UPDATE SET
4264                // ... WHERE predicate` to limit the update. Swallow the
4265                // predicate opaquely.
4266                if self.match_token(TokenType::Where) {
4267                    let _ = self.parse_expr()?;
4268                }
4269                Some(OnConflict { columns, action })
4270            } else {
4271                None
4272            }
4273        } else {
4274            None
4275        };
4276
4277        let returning = if self.match_token(TokenType::Returning) {
4278            self.parse_select_items()?
4279        } else {
4280            vec![]
4281        };
4282
4283        Ok(InsertStatement {
4284            comments: vec![],
4285            table,
4286            columns,
4287            source,
4288            on_conflict,
4289            returning,
4290        })
4291    }
4292
4293    // ── UPDATE ──────────────────────────────────────────────────────
4294
4295    fn parse_update(&mut self) -> Result<UpdateStatement> {
4296        self.expect(TokenType::Update)?;
4297        let table = self.parse_table_ref()?;
4298        // MySQL multi-table UPDATE: `UPDATE t1, t2 [, ...] SET ...`.
4299        // Swallow the additional table refs (we keep only the first as
4300        // the primary target).
4301        while self.match_token(TokenType::Comma) {
4302            let _ = self.parse_table_ref()?;
4303        }
4304        // PG SQL:2011 temporal `UPDATE t FOR PORTION OF col FROM a TO b
4305        // [AS alias] SET ...`. Swallow the qualifier verbatim.
4306        if self.check_keyword("FOR")
4307            && self
4308                .peek_offset(1)
4309                .map(|t| t.value.eq_ignore_ascii_case("PORTION"))
4310                .unwrap_or(false)
4311        {
4312            while !matches!(
4313                self.peek_type(),
4314                TokenType::Set | TokenType::Eof | TokenType::Semicolon
4315            ) {
4316                self.advance();
4317            }
4318        }
4319        // MySQL `UPDATE t PARTITION (p0[, p1]) SET ...` — swallow.
4320        if matches!(self.peek_type(), TokenType::Partition)
4321            && matches!(
4322                self.peek_offset(1).map(|t| &t.token_type),
4323                Some(TokenType::LParen)
4324            )
4325        {
4326            self.advance();
4327            self.advance();
4328            let mut depth = 1;
4329            while depth > 0 && !matches!(self.peek_type(), TokenType::Eof) {
4330                match self.peek_type() {
4331                    TokenType::LParen => depth += 1,
4332                    TokenType::RParen => {
4333                        depth -= 1;
4334                        if depth == 0 {
4335                            self.advance();
4336                            break;
4337                        }
4338                    }
4339                    _ => {}
4340                }
4341                self.advance();
4342            }
4343        }
4344        // MySQL multi-table UPDATE: `UPDATE t1 [LEFT|RIGHT|INNER|CROSS] JOIN
4345        // t2 ON ... SET ...`. Swallow the joins so the existing single-target
4346        // update parses; the joined tables are dropped from the AST.
4347        let _ = self.parse_joins();
4348        self.expect(TokenType::Set)?;
4349
4350        let mut assignments = Vec::new();
4351        loop {
4352            // Accept qualified LHS like `alias.col` (Oracle, T-SQL idiom),
4353            // and PG/Snowflake subscripts/field access on the LHS such as
4354            // `arr[1] = …`, `arr[1:3] = …`, `obj['k']`, `(a,b) = …`.
4355            // Accept LHS row-tuple `(a, b, c) = (rhs)` (PostgreSQL).
4356            if self.peek_type() == &TokenType::LParen {
4357                let saved = self.pos;
4358                self.advance();
4359                let mut depth = 1;
4360                while depth > 0 && self.peek_type() != &TokenType::Eof {
4361                    match self.peek_type() {
4362                        TokenType::LParen => depth += 1,
4363                        TokenType::RParen => depth -= 1,
4364                        _ => {}
4365                    }
4366                    self.advance();
4367                }
4368                if self.peek_type() == &TokenType::Eq {
4369                    self.advance();
4370                    let val = self.parse_expr()?;
4371                    assignments.push(("__tuple__".to_string(), val));
4372                    if !self.match_token(TokenType::Comma) {
4373                        break;
4374                    }
4375                    continue;
4376                }
4377                self.pos = saved;
4378            }
4379            let mut col = self.expect_name()?;
4380            while self.match_token(TokenType::Dot) {
4381                col.push('.');
4382                col.push_str(&self.expect_name()?);
4383            }
4384            // Swallow `[index]` / `[a:b]` subscripts in the LHS — we don't
4385            // model array-element assignment in the AST.
4386            while self.peek_type() == &TokenType::LBracket {
4387                self.advance();
4388                let mut depth = 1;
4389                while depth > 0 && self.peek_type() != &TokenType::Eof {
4390                    match self.peek_type() {
4391                        TokenType::LBracket => depth += 1,
4392                        TokenType::RBracket => depth -= 1,
4393                        _ => {}
4394                    }
4395                    self.advance();
4396                }
4397            }
4398            self.expect(TokenType::Eq)?;
4399            let val = self.parse_expr()?;
4400            assignments.push((col, val));
4401            if !self.match_token(TokenType::Comma) {
4402                break;
4403            }
4404        }
4405
4406        let from = if self.match_token(TokenType::From) {
4407            Some(FromClause {
4408                source: self.parse_table_source()?,
4409            })
4410        } else {
4411            None
4412        };
4413
4414        let where_clause = if self.match_token(TokenType::Where) {
4415            Some(self.parse_expr()?)
4416        } else {
4417            None
4418        };
4419
4420        // Teradata `PREFERRING <expr> [PARTITION BY <list>]` skyline
4421        // clause on UPDATE. Swallow up to a known terminator.
4422        if self.check_keyword("PREFERRING") {
4423            self.advance();
4424            loop {
4425                match self.peek_type() {
4426                    TokenType::Eof
4427                    | TokenType::Semicolon
4428                    | TokenType::RParen
4429                    | TokenType::Returning => break,
4430                    _ => self.advance(),
4431                };
4432            }
4433        }
4434
4435        // MySQL: `UPDATE … [ORDER BY …] [LIMIT N]`. Swallow.
4436        if self.match_token(TokenType::Order) {
4437            self.expect(TokenType::By)?;
4438            let _ = self.parse_order_by_items()?;
4439        }
4440        if self.match_token(TokenType::Limit) {
4441            let _ = self.parse_expr()?;
4442        }
4443
4444        let returning = if self.match_token(TokenType::Returning) {
4445            self.parse_select_items()?
4446        } else {
4447            vec![]
4448        };
4449
4450        Ok(UpdateStatement {
4451            comments: vec![],
4452            table,
4453            assignments,
4454            from,
4455            where_clause,
4456            returning,
4457        })
4458    }
4459
4460    // ── DELETE ──────────────────────────────────────────────────────
4461
4462    fn parse_delete(&mut self) -> Result<DeleteStatement> {
4463        self.expect(TokenType::Delete)?;
4464        // MySQL multi-table form: `DELETE t1[, t2, ...] FROM <join expr>`.
4465        // Swallow the leading table-alias list (we don't model it) before
4466        // the mandatory FROM.
4467        let mut multi_table = false;
4468        if !matches!(self.peek_type(), TokenType::From) {
4469            let saved = self.pos;
4470            if self.is_name_token() {
4471                self.advance();
4472                let _ = self.match_token(TokenType::Dot);
4473                if self.is_name_token() {
4474                    self.advance();
4475                }
4476                while self.match_token(TokenType::Comma) {
4477                    if !self.is_name_token() {
4478                        break;
4479                    }
4480                    self.advance();
4481                    let _ = self.match_token(TokenType::Dot);
4482                    if self.is_name_token() {
4483                        self.advance();
4484                    }
4485                }
4486                if matches!(self.peek_type(), TokenType::From) {
4487                    multi_table = true;
4488                } else {
4489                    self.pos = saved;
4490                }
4491            }
4492        }
4493        // BigQuery / some Snowflake forms allow `DELETE <table> WHERE …`
4494        // (FROM optional). If FROM is missing but the next token starts a
4495        // table-ref, treat it as the implicit FROM target.
4496        let from_optional = !matches!(self.peek_type(), TokenType::From);
4497        if !from_optional {
4498            self.expect(TokenType::From)?;
4499        }
4500        let table = self.parse_table_ref()?;
4501        // MySQL: `DELETE FROM t PARTITION (p0[, p1, ...])` — swallow
4502        // partition selector.
4503        if matches!(self.peek_type(), TokenType::Partition)
4504            && matches!(
4505                self.peek_offset(1).map(|t| &t.token_type),
4506                Some(TokenType::LParen)
4507            )
4508        {
4509            self.advance();
4510            self.advance();
4511            let mut depth = 1;
4512            while depth > 0 && !matches!(self.peek_type(), TokenType::Eof) {
4513                match self.peek_type() {
4514                    TokenType::LParen => depth += 1,
4515                    TokenType::RParen => {
4516                        depth -= 1;
4517                        if depth == 0 {
4518                            self.advance();
4519                            break;
4520                        }
4521                    }
4522                    _ => {}
4523                }
4524                self.advance();
4525            }
4526        }
4527        if multi_table {
4528            // Swallow JOIN clauses, additional comma-joined tables, and
4529            // any opaque tail up to USING / WHERE / RETURNING / ; / EOF.
4530            loop {
4531                if matches!(
4532                    self.peek_type(),
4533                    TokenType::Where
4534                        | TokenType::Using
4535                        | TokenType::Returning
4536                        | TokenType::Semicolon
4537                        | TokenType::Eof
4538                ) {
4539                    break;
4540                }
4541                self.advance();
4542            }
4543        }
4544
4545        let using = if self.match_token(TokenType::Using) {
4546            Some(FromClause {
4547                source: self.parse_table_source()?,
4548            })
4549        } else {
4550            None
4551        };
4552
4553        // Teradata `PREFERRING <expr> [PARTITION BY <list>]` skyline
4554        // clause on DELETE.
4555        if self.check_keyword("PREFERRING") {
4556            self.advance();
4557            loop {
4558                match self.peek_type() {
4559                    TokenType::Eof
4560                    | TokenType::Semicolon
4561                    | TokenType::Where
4562                    | TokenType::Returning
4563                    | TokenType::RParen => break,
4564                    _ => self.advance(),
4565                };
4566            }
4567        }
4568
4569        let where_clause = if self.match_token(TokenType::Where) {
4570            Some(self.parse_expr()?)
4571        } else {
4572            None
4573        };
4574
4575        // MySQL: `DELETE FROM tbl [WHERE ...] [ORDER BY ...] [LIMIT N]`.
4576        // Swallow ORDER BY and LIMIT modifiers — we don't model them on
4577        // DeleteStatement yet.
4578        if self.match_token(TokenType::Order) {
4579            self.expect(TokenType::By)?;
4580            let _ = self.parse_order_by_items()?;
4581        }
4582        if self.match_token(TokenType::Limit) {
4583            let _ = self.parse_expr()?;
4584        }
4585
4586        let returning = if self.match_token(TokenType::Returning) {
4587            self.parse_select_items()?
4588        } else {
4589            vec![]
4590        };
4591
4592        Ok(DeleteStatement {
4593            comments: vec![],
4594            table,
4595            using,
4596            where_clause,
4597            returning,
4598        })
4599    }
4600
4601    // ── MERGE ───────────────────────────────────────────────────────
4602
4603    fn parse_merge(&mut self) -> Result<MergeStatement> {
4604        self.expect(TokenType::Merge)?;
4605        let _ = self.match_token(TokenType::Into);
4606        let target = self.parse_table_ref()?;
4607
4608        self.expect(TokenType::Using)?;
4609        let source = self.parse_table_source()?;
4610
4611        // DuckDB supports `MERGE INTO t USING src USING (cols)` as a
4612        // shorthand for the ON condition (column-equality join, akin to
4613        // SQL USING for JOINs). Swallow the column list opaquely and
4614        // synthesize a trivial truthy ON expression so downstream parsing
4615        // continues. We don't model USING-style MERGE in the AST yet.
4616        let on = if self.match_token(TokenType::Using) {
4617            self.expect(TokenType::LParen)?;
4618            let _ = self.expect_name()?;
4619            while self.match_token(TokenType::Comma) {
4620                let _ = self.expect_name()?;
4621            }
4622            self.expect(TokenType::RParen)?;
4623            Expr::Boolean(true)
4624        } else {
4625            self.expect(TokenType::On)?;
4626            self.parse_expr()?
4627        };
4628
4629        let mut clauses = Vec::new();
4630        while self.match_token(TokenType::When) {
4631            clauses.push(self.parse_merge_clause()?);
4632        }
4633
4634        if clauses.is_empty() {
4635            return Err(SqlglotError::ParserError {
4636                message: "MERGE requires at least one WHEN clause".into(),
4637            });
4638        }
4639
4640        // OUTPUT clause (T-SQL extension)
4641        let output = if self.match_keyword("OUTPUT") {
4642            self.parse_select_items()?
4643        } else {
4644            vec![]
4645        };
4646
4647        // PostgreSQL: `MERGE … RETURNING <select_list>`. We don't yet model
4648        // RETURNING for MERGE, so swallow the items and discard them.
4649        if self.match_token(TokenType::Returning) {
4650            let _ = self.parse_select_items()?;
4651        }
4652
4653        Ok(MergeStatement {
4654            comments: vec![],
4655            target,
4656            source,
4657            on,
4658            clauses,
4659            output,
4660        })
4661    }
4662
4663    fn parse_merge_clause(&mut self) -> Result<MergeClause> {
4664        let kind = if self.match_token(TokenType::Not) {
4665            self.expect(TokenType::Matched)?;
4666            if self.match_keyword("BY") {
4667                if self.match_keyword("SOURCE") {
4668                    MergeClauseKind::NotMatchedBySource
4669                } else {
4670                    // BY TARGET is the default / explicit form
4671                    let _ = self.match_keyword("TARGET");
4672                    MergeClauseKind::NotMatched
4673                }
4674            } else {
4675                MergeClauseKind::NotMatched
4676            }
4677        } else {
4678            self.expect(TokenType::Matched)?;
4679            MergeClauseKind::Matched
4680        };
4681
4682        let condition = if self.match_token(TokenType::And) {
4683            Some(self.parse_expr()?)
4684        } else {
4685            None
4686        };
4687
4688        self.expect(TokenType::Then)?;
4689
4690        let action = self.parse_merge_action(&kind)?;
4691
4692        Ok(MergeClause {
4693            kind,
4694            condition,
4695            action,
4696        })
4697    }
4698
4699    fn parse_merge_action(&mut self, kind: &MergeClauseKind) -> Result<MergeAction> {
4700        if self.match_token(TokenType::Update) {
4701            self.expect(TokenType::Set)?;
4702            let mut assignments = Vec::new();
4703            loop {
4704                let mut col = self.expect_name()?;
4705                // Support dotted column names like target.col
4706                while self.match_token(TokenType::Dot) {
4707                    col.push('.');
4708                    col.push_str(&self.expect_name()?);
4709                }
4710                self.expect(TokenType::Eq)?;
4711                let val = self.parse_expr()?;
4712                assignments.push((col, val));
4713                if !self.match_token(TokenType::Comma) {
4714                    break;
4715                }
4716            }
4717            Ok(MergeAction::Update(assignments))
4718        } else if self.match_token(TokenType::Insert) {
4719            // INSERT ROW (BigQuery)
4720            if self.match_keyword("ROW") {
4721                return Ok(MergeAction::InsertRow);
4722            }
4723
4724            let columns = if self.match_token(TokenType::LParen) {
4725                let mut cols = vec![self.expect_name()?];
4726                while self.match_token(TokenType::Comma) {
4727                    cols.push(self.expect_name()?);
4728                }
4729                self.expect(TokenType::RParen)?;
4730                cols
4731            } else {
4732                vec![]
4733            };
4734
4735            self.expect(TokenType::Values)?;
4736            self.expect(TokenType::LParen)?;
4737            let values = self.parse_expr_list()?;
4738            self.expect(TokenType::RParen)?;
4739
4740            Ok(MergeAction::Insert { columns, values })
4741        } else if self.match_token(TokenType::Delete) {
4742            Ok(MergeAction::Delete)
4743        } else {
4744            Err(SqlglotError::ParserError {
4745                message: format!(
4746                    "Expected UPDATE, INSERT, or DELETE after WHEN {} THEN",
4747                    match kind {
4748                        MergeClauseKind::Matched => "MATCHED",
4749                        MergeClauseKind::NotMatched => "NOT MATCHED",
4750                        MergeClauseKind::NotMatchedBySource => "NOT MATCHED BY SOURCE",
4751                    }
4752                ),
4753            })
4754        }
4755    }
4756
4757    // ── CREATE ──────────────────────────────────────────────────────
4758
4759    fn parse_create(&mut self) -> Result<Statement> {
4760        self.expect(TokenType::Create)?;
4761
4762        let or_replace = if self.check_keyword("OR") {
4763            self.advance();
4764            self.expect(TokenType::Replace)?;
4765            true
4766        } else {
4767            false
4768        };
4769
4770        let temporary = self.match_token(TokenType::Temporary) || self.match_token(TokenType::Temp);
4771
4772        let materialized = self.match_token(TokenType::Materialized);
4773
4774        if self.match_token(TokenType::View) {
4775            return self
4776                .parse_create_view(or_replace, materialized)
4777                .map(Statement::CreateView);
4778        }
4779
4780        self.expect(TokenType::Table)?;
4781
4782        let if_not_exists = if self.match_token(TokenType::If) {
4783            self.expect(TokenType::Not)?;
4784            self.expect(TokenType::Exists)?;
4785            true
4786        } else {
4787            false
4788        };
4789
4790        let table = self.parse_table_ref_no_alias()?;
4791
4792        // CREATE TABLE ... AS SELECT ...
4793        if self.match_token(TokenType::As) {
4794            let query = self.parse_statement_inner()?;
4795            // Greenplum / Citus / etc. trailing `DISTRIBUTED BY (...)` /
4796            // `DISTRIBUTED RANDOMLY` / `DISTRIBUTED REPLICATED`. Swallow.
4797            if self.check_keyword("DISTRIBUTED") {
4798                self.advance();
4799                if self.check_keyword("BY") || matches!(self.peek_type(), TokenType::By) {
4800                    self.advance();
4801                    if self.match_token(TokenType::LParen) {
4802                        let mut depth = 1;
4803                        while depth > 0 {
4804                            match self.peek_type() {
4805                                TokenType::LParen => depth += 1,
4806                                TokenType::RParen => {
4807                                    depth -= 1;
4808                                    if depth == 0 {
4809                                        self.advance();
4810                                        break;
4811                                    }
4812                                }
4813                                TokenType::Eof => break,
4814                                _ => {}
4815                            }
4816                            self.advance();
4817                        }
4818                    }
4819                } else if self.is_name_token() {
4820                    // RANDOMLY / REPLICATED — single keyword
4821                    self.advance();
4822                }
4823            }
4824            return Ok(Statement::CreateTable(CreateTableStatement {
4825                comments: vec![],
4826                if_not_exists,
4827                temporary,
4828                table,
4829                columns: vec![],
4830                constraints: vec![],
4831                as_select: Some(Box::new(query)),
4832            }));
4833        }
4834
4835        self.expect(TokenType::LParen)?;
4836
4837        let mut columns = Vec::new();
4838        let mut constraints = Vec::new();
4839
4840        loop {
4841            // Check for table-level constraints
4842            if matches!(
4843                self.peek_type(),
4844                TokenType::Primary
4845                    | TokenType::Unique
4846                    | TokenType::Foreign
4847                    | TokenType::Check
4848                    | TokenType::Constraint
4849            ) {
4850                constraints.push(self.parse_table_constraint()?);
4851            } else if self.peek_type() != &TokenType::RParen {
4852                columns.push(self.parse_column_def()?);
4853            }
4854
4855            if !self.match_token(TokenType::Comma) {
4856                break;
4857            }
4858        }
4859        self.expect(TokenType::RParen)?;
4860
4861        // Tolerate dialect-specific trailing clauses (ClickHouse `ENGINE = X`,
4862        // `ORDER BY (...)`, `PARTITION BY ...`, `SETTINGS ...`, MySQL
4863        // `ENGINE=InnoDB DEFAULT CHARSET=utf8`, etc.) by consuming tokens
4864        // until the next statement boundary. Respects paren depth so a
4865        // top-level `;` inside `ORDER BY (a, b)` is not mistaken for end.
4866        self.skip_trailing_options();
4867
4868        Ok(Statement::CreateTable(CreateTableStatement {
4869            comments: vec![],
4870            if_not_exists,
4871            temporary,
4872            table,
4873            columns,
4874            constraints,
4875            as_select: None,
4876        }))
4877    }
4878
4879    /// Discard tokens up to (but not including) a top-level `;` or EOF.
4880    /// Used to skip dialect-specific tail clauses we don't model in the AST
4881    /// (CREATE TABLE engines, options, etc.).
4882    fn skip_trailing_options(&mut self) {
4883        let mut depth: i32 = 0;
4884        loop {
4885            match self.peek_type() {
4886                TokenType::Eof => break,
4887                TokenType::Semicolon if depth == 0 => break,
4888                TokenType::LParen => {
4889                    depth += 1;
4890                    self.advance();
4891                }
4892                TokenType::RParen => {
4893                    depth -= 1;
4894                    if depth < 0 {
4895                        break;
4896                    }
4897                    self.advance();
4898                }
4899                _ => {
4900                    self.advance();
4901                }
4902            }
4903        }
4904    }
4905
4906    fn parse_create_view(
4907        &mut self,
4908        or_replace: bool,
4909        materialized: bool,
4910    ) -> Result<CreateViewStatement> {
4911        let if_not_exists = if self.match_token(TokenType::If) {
4912            self.expect(TokenType::Not)?;
4913            self.expect(TokenType::Exists)?;
4914            true
4915        } else {
4916            false
4917        };
4918
4919        // Parse name without alias (so AS is not consumed as an alias)
4920        let name = self.parse_table_ref_no_alias()?;
4921
4922        let columns = if self.match_token(TokenType::LParen) {
4923            let mut cols = vec![self.expect_name()?];
4924            while self.match_token(TokenType::Comma) {
4925                cols.push(self.expect_name()?);
4926            }
4927            self.expect(TokenType::RParen)?;
4928            cols
4929        } else {
4930            vec![]
4931        };
4932
4933        self.expect(TokenType::As)?;
4934        let query = self.parse_statement_inner()?;
4935
4936        Ok(CreateViewStatement {
4937            comments: vec![],
4938            name,
4939            columns,
4940            query: Box::new(query),
4941            or_replace,
4942            materialized,
4943            if_not_exists,
4944        })
4945    }
4946
4947    fn parse_table_constraint(&mut self) -> Result<TableConstraint> {
4948        let name = if self.match_token(TokenType::Constraint) {
4949            Some(self.expect_name()?)
4950        } else {
4951            None
4952        };
4953
4954        if self.match_token(TokenType::Primary) {
4955            self.expect(TokenType::Key)?;
4956            self.expect(TokenType::LParen)?;
4957            let columns = self.parse_name_list()?;
4958            self.expect(TokenType::RParen)?;
4959            // TiDB / MySQL: `PRIMARY KEY (cols) GLOBAL|LOCAL` index scope
4960            // modifier and `USING BTREE|HASH` index-type modifier.
4961            if self.is_name_token()
4962                && matches!(
4963                    self.peek().value.to_uppercase().as_str(),
4964                    "GLOBAL" | "LOCAL"
4965                )
4966            {
4967                self.advance();
4968            }
4969            if self.match_token(TokenType::Using) && self.is_name_token() {
4970                self.advance();
4971            }
4972            self.swallow_constraint_modifiers();
4973            Ok(TableConstraint::PrimaryKey { name, columns })
4974        } else if self.match_token(TokenType::Unique) {
4975            let _ = self.match_token(TokenType::Index) || self.match_token(TokenType::Key);
4976            // Optional index name before `(`.
4977            if !matches!(self.peek_type(), TokenType::LParen) && self.is_name_token() {
4978                self.advance();
4979            }
4980            self.expect(TokenType::LParen)?;
4981            let columns = self.parse_name_list()?;
4982            self.expect(TokenType::RParen)?;
4983            if self.is_name_token()
4984                && matches!(
4985                    self.peek().value.to_uppercase().as_str(),
4986                    "GLOBAL" | "LOCAL"
4987                )
4988            {
4989                self.advance();
4990            }
4991            if self.match_token(TokenType::Using) && self.is_name_token() {
4992                self.advance();
4993            }
4994            self.swallow_constraint_modifiers();
4995            Ok(TableConstraint::Unique { name, columns })
4996        } else if self.match_token(TokenType::Foreign) {
4997            self.expect(TokenType::Key)?;
4998            self.expect(TokenType::LParen)?;
4999            let columns = self.parse_name_list()?;
5000            self.expect(TokenType::RParen)?;
5001            self.expect(TokenType::References)?;
5002            let ref_table = self.parse_table_ref()?;
5003            self.expect(TokenType::LParen)?;
5004            let ref_columns = self.parse_name_list()?;
5005            self.expect(TokenType::RParen)?;
5006
5007            // PG / ANSI `MATCH FULL | PARTIAL | SIMPLE` clause — swallow.
5008            if self.check_keyword("MATCH") {
5009                self.advance();
5010                if self.is_name_token() {
5011                    self.advance();
5012                }
5013            }
5014
5015            let mut on_delete = None;
5016            let mut on_update = None;
5017            // Accept ON DELETE / ON UPDATE clauses in any order. Match the
5018            // ON keyword only when the following token is DELETE / UPDATE
5019            // so a misplaced ON UPDATE doesn't consume the bare ON token
5020            // and orphan the rest of the action list.
5021            while self.peek_type() == &TokenType::On {
5022                let next = self.peek_offset(1).map(|t| &t.token_type);
5023                if matches!(next, Some(TokenType::Delete)) {
5024                    self.advance();
5025                    self.advance();
5026                    on_delete = Some(self.parse_referential_action()?);
5027                } else if matches!(next, Some(TokenType::Update)) {
5028                    self.advance();
5029                    self.advance();
5030                    on_update = Some(self.parse_referential_action()?);
5031                } else {
5032                    break;
5033                }
5034            }
5035
5036            self.swallow_constraint_modifiers();
5037            Ok(TableConstraint::ForeignKey {
5038                name,
5039                columns,
5040                ref_table,
5041                ref_columns,
5042                on_delete,
5043                on_update,
5044            })
5045        } else if self.match_token(TokenType::Check) {
5046            self.expect(TokenType::LParen)?;
5047            let expr = self.parse_expr()?;
5048            self.expect(TokenType::RParen)?;
5049            self.swallow_constraint_modifiers();
5050            Ok(TableConstraint::Check { name, expr })
5051        } else {
5052            Err(SqlglotError::ParserError {
5053                message: "Expected constraint type".into(),
5054            })
5055        }
5056    }
5057
5058    /// Swallow trailing constraint modifiers shared by FK / CHECK / PK /
5059    /// UNIQUE: `NOT VALID`, `[NOT] ENFORCED`, `DEFERRABLE`, `NOT DEFERRABLE`,
5060    /// `INITIALLY DEFERRED | IMMEDIATE`, `NO INHERIT`. Best-effort — we
5061    /// don't model them in the AST.
5062    fn swallow_constraint_modifiers(&mut self) {
5063        loop {
5064            if self.check_keyword("NOT")
5065                && self
5066                    .peek_offset(1)
5067                    .map(|t| t.value.to_uppercase())
5068                    .as_deref()
5069                    .is_some_and(|v| matches!(v, "VALID" | "ENFORCED" | "DEFERRABLE"))
5070            {
5071                self.advance();
5072                self.advance();
5073                continue;
5074            }
5075            if self.check_keyword("ENFORCED")
5076                || self.check_keyword("DEFERRABLE")
5077                || self.check_keyword("CLUSTERED")
5078                || self.check_keyword("NONCLUSTERED")
5079                || self.check_keyword("INVISIBLE")
5080                || self.check_keyword("VISIBLE")
5081            {
5082                self.advance();
5083                continue;
5084            }
5085            if self.check_keyword("INITIALLY") {
5086                self.advance();
5087                if self.is_name_token() {
5088                    self.advance();
5089                }
5090                continue;
5091            }
5092            if self.check_keyword("NO")
5093                && self
5094                    .peek_offset(1)
5095                    .map(|t| t.value.eq_ignore_ascii_case("INHERIT"))
5096                    .unwrap_or(false)
5097            {
5098                self.advance();
5099                self.advance();
5100                continue;
5101            }
5102            break;
5103        }
5104    }
5105
5106    fn parse_referential_action(&mut self) -> Result<ReferentialAction> {
5107        if self.match_token(TokenType::Cascade) {
5108            Ok(ReferentialAction::Cascade)
5109        } else if self.match_token(TokenType::Restrict) {
5110            Ok(ReferentialAction::Restrict)
5111        } else if self.match_token(TokenType::Set) {
5112            if self.match_token(TokenType::Null) {
5113                Ok(ReferentialAction::SetNull)
5114            } else if self.match_token(TokenType::Default) {
5115                Ok(ReferentialAction::SetDefault)
5116            } else {
5117                Err(SqlglotError::ParserError {
5118                    message: "Expected NULL or DEFAULT after SET".into(),
5119                })
5120            }
5121        } else if self.check_keyword("NO") {
5122            self.advance();
5123            self.expect(TokenType::Identifier)?; // ACTION
5124            Ok(ReferentialAction::NoAction)
5125        } else {
5126            Err(SqlglotError::ParserError {
5127                message: "Expected referential action (CASCADE, RESTRICT, SET NULL, SET DEFAULT, NO ACTION)".into(),
5128            })
5129        }
5130    }
5131
5132    fn parse_name_list(&mut self) -> Result<Vec<String>> {
5133        let mut names = vec![self.expect_name()?];
5134        while self.match_token(TokenType::Comma) {
5135            names.push(self.expect_name()?);
5136        }
5137        Ok(names)
5138    }
5139
5140    /// Parse a dotted column reference for INSERT column lists:
5141    /// `name` or `parent.child` (ClickHouse nested columns).
5142    fn parse_dotted_name(&mut self) -> Result<String> {
5143        let mut name = self.expect_name()?;
5144        while self.peek_type() == &TokenType::Dot {
5145            let next = self.peek_offset(1).map(|t| t.token_type.clone());
5146            let next_is_namelike = matches!(
5147                next,
5148                Some(TokenType::Identifier)
5149                    | Some(TokenType::Star)
5150                    | Some(TokenType::Int)
5151                    | Some(TokenType::BigInt)
5152                    | Some(TokenType::Text)
5153                    | Some(TokenType::Date)
5154                    | Some(TokenType::Timestamp)
5155            );
5156            if !next_is_namelike {
5157                break;
5158            }
5159            self.advance(); // .
5160            if self.peek_type() == &TokenType::Star {
5161                name.push('.');
5162                name.push('*');
5163                self.advance();
5164                break;
5165            }
5166            let part = self.expect_name()?;
5167            name.push('.');
5168            name.push_str(&part);
5169        }
5170        Ok(name)
5171    }
5172
5173    fn parse_column_def(&mut self) -> Result<ColumnDef> {
5174        let name = self.expect_name()?;
5175        let data_type = self.parse_data_type()?;
5176
5177        let mut nullable = None;
5178        let mut default = None;
5179        let mut primary_key = false;
5180        let mut unique = false;
5181        let mut auto_increment = false;
5182        let mut collation = None;
5183        let mut comment = None;
5184
5185        loop {
5186            if self.match_token(TokenType::Not) {
5187                self.expect(TokenType::Null)?;
5188                nullable = Some(false);
5189            } else if self.peek_type() == &TokenType::Null {
5190                self.advance();
5191                nullable = Some(true);
5192            } else if self.peek_type() == &TokenType::As
5193                && matches!(
5194                    self.peek_offset(1).map(|t| &t.token_type),
5195                    Some(TokenType::LParen)
5196                )
5197            {
5198                // SQLite / MySQL generated-column shorthand:
5199                //   `col TYPE AS (expr) [STORED|VIRTUAL|PERSISTENT]`.
5200                // Swallow AS, the parenthesised expression (depth-balanced),
5201                // and the optional storage-kind keyword.
5202                self.advance(); // AS
5203                self.advance(); // (
5204                let mut depth: i32 = 1;
5205                while depth > 0 {
5206                    match self.peek_type() {
5207                        TokenType::LParen => {
5208                            depth += 1;
5209                            self.advance();
5210                        }
5211                        TokenType::RParen => {
5212                            depth -= 1;
5213                            self.advance();
5214                        }
5215                        TokenType::Eof => break,
5216                        _ => {
5217                            self.advance();
5218                        }
5219                    }
5220                }
5221                if self.is_name_token()
5222                    && matches!(
5223                        self.peek().value.to_uppercase().as_str(),
5224                        "STORED" | "VIRTUAL" | "PERSISTENT" | "PERSISTED"
5225                    )
5226                {
5227                    self.advance();
5228                }
5229            } else if self.match_token(TokenType::Default) {
5230                // SQL Server / IBM `DEFAULT NEXT VALUE FOR seq[.qual]`.
5231                if self.is_name_token()
5232                    && self.peek().value.eq_ignore_ascii_case("NEXT")
5233                    && self
5234                        .peek_offset(1)
5235                        .map(|t| t.value.eq_ignore_ascii_case("VALUE"))
5236                        .unwrap_or(false)
5237                    && self
5238                        .peek_offset(2)
5239                        .map(|t| t.value.eq_ignore_ascii_case("FOR"))
5240                        .unwrap_or(false)
5241                {
5242                    self.advance();
5243                    self.advance();
5244                    self.advance();
5245                    let mut seq = self.expect_name()?;
5246                    while self.match_token(TokenType::Dot) {
5247                        seq.push('.');
5248                        seq.push_str(&self.expect_name()?);
5249                    }
5250                    default = Some(Expr::Function {
5251                        name: "NEXT_VALUE_FOR".to_string(),
5252                        args: vec![Expr::Column {
5253                            table: None,
5254                            name: seq,
5255                            quote_style: QuoteStyle::None,
5256                            table_quote_style: QuoteStyle::None,
5257                        }],
5258                        distinct: false,
5259                        filter: None,
5260                        over: None,
5261                        order_by: Vec::new(),
5262                        within_group: false,
5263                    });
5264                } else {
5265                    default = Some(self.parse_expr()?);
5266                }
5267            } else if self.match_token(TokenType::Primary) {
5268                self.expect(TokenType::Key)?;
5269                primary_key = true;
5270            } else if self.match_token(TokenType::Unique) {
5271                unique = true;
5272            } else if self.match_token(TokenType::AutoIncrement) {
5273                auto_increment = true;
5274            } else if self.match_token(TokenType::Collate) {
5275                collation = Some(self.expect_name()?);
5276            } else if self.match_token(TokenType::Comment) {
5277                let tok = self.expect(TokenType::String)?;
5278                comment = Some(tok.value);
5279            } else if self.match_token(TokenType::References) {
5280                // Inline foreign key — skip for now
5281                let _ = self.parse_table_ref()?;
5282                if self.match_token(TokenType::LParen) {
5283                    while !self.match_token(TokenType::RParen) {
5284                        self.advance();
5285                    }
5286                }
5287            } else if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("GENERATED") {
5288                // SQL:2003 / MySQL / PG / SQL Server identity / computed
5289                // column: `GENERATED ALWAYS AS (expr) [VIRTUAL|STORED]`,
5290                // `GENERATED ALWAYS AS IDENTITY [(...)]`,
5291                // `GENERATED BY DEFAULT AS IDENTITY [(...)]`. Swallow up
5292                // through the trailing parenthesised body if present and
5293                // let the next loop iteration pick up VIRTUAL/STORED.
5294                self.advance();
5295                if self.is_name_token()
5296                    && (self.peek().value.eq_ignore_ascii_case("ALWAYS")
5297                        || self.peek().value.eq_ignore_ascii_case("BY"))
5298                {
5299                    self.advance();
5300                    if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("DEFAULT") {
5301                        self.advance();
5302                    }
5303                }
5304                if self.match_token(TokenType::As) {
5305                    if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("IDENTITY") {
5306                        self.advance();
5307                    } else if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("ROW")
5308                    {
5309                        // SQL Server `GENERATED AS ROW START | END`.
5310                        self.advance();
5311                        if self.is_name_token() {
5312                            self.advance();
5313                        }
5314                    }
5315                }
5316                if self.peek_type() == &TokenType::LParen {
5317                    let mut depth = 0_i32;
5318                    self.advance();
5319                    depth += 1;
5320                    while depth > 0 {
5321                        match self.peek_type() {
5322                            TokenType::LParen => depth += 1,
5323                            TokenType::RParen => {
5324                                depth -= 1;
5325                                if depth == 0 {
5326                                    self.advance();
5327                                    break;
5328                                }
5329                            }
5330                            TokenType::Eof => break,
5331                            _ => {}
5332                        }
5333                        self.advance();
5334                    }
5335                }
5336            } else if self.is_name_token()
5337                && matches!(
5338                    self.peek().value.to_uppercase().as_str(),
5339                    "CODEC"
5340                        | "TTL"
5341                        | "MATERIALIZED"
5342                        | "ALIAS"
5343                        | "EPHEMERAL"
5344                        | "PERSISTED"
5345                        | "PERSISTENT"
5346                        | "VIRTUAL"
5347                        | "STORED"
5348                        | "ENCODE"
5349                        | "ENCRYPT"
5350                        | "MASKED"
5351                        | "INVISIBLE"
5352                        | "VISIBLE"
5353                        | "ENFORCED"
5354                        | "OPTIONS"
5355                        | "COMPRESSION"
5356                        | "SORTKEY"
5357                        | "DISTKEY"
5358                        | "CHARSET"
5359                        | "CHARACTER"
5360                        | "SRID"
5361                        | "FORMAT"
5362                        | "TAG"
5363                        | "MASKING"
5364                )
5365            {
5366                // ClickHouse / Snowflake / Redshift column modifiers. Consume
5367                // the keyword and the optional parenthesised body (`CODEC(...)`,
5368                // `TTL expr`, etc.) so the rest of the column def parses.
5369                self.advance();
5370                if self.peek_type() == &TokenType::LParen {
5371                    let mut depth = 0_i32;
5372                    self.advance();
5373                    depth += 1;
5374                    while depth > 0 {
5375                        match self.peek_type() {
5376                            TokenType::LParen => depth += 1,
5377                            TokenType::RParen => {
5378                                depth -= 1;
5379                                if depth == 0 {
5380                                    self.advance();
5381                                    break;
5382                                }
5383                            }
5384                            TokenType::Eof => break,
5385                            _ => {}
5386                        }
5387                        self.advance();
5388                    }
5389                } else {
5390                    // Best-effort: swallow an expression up to comma /
5391                    // top-level RParen / column-def boundary, balancing
5392                    // nested parens (e.g. `TTL toDate('2000-01-02')`,
5393                    // `ALIAS arrayResize(emptyArrayUInt32(), length(\`Arr.C2\`))`).
5394                    let mut depth: i32 = 0;
5395                    loop {
5396                        match self.peek_type() {
5397                            TokenType::LParen => {
5398                                depth += 1;
5399                                self.advance();
5400                            }
5401                            TokenType::RParen => {
5402                                if depth == 0 {
5403                                    break;
5404                                }
5405                                depth -= 1;
5406                                self.advance();
5407                            }
5408                            TokenType::Comma if depth == 0 => break,
5409                            TokenType::Eof => break,
5410                            _ => {
5411                                self.advance();
5412                            }
5413                        }
5414                    }
5415                }
5416            } else {
5417                break;
5418            }
5419        }
5420
5421        Ok(ColumnDef {
5422            name,
5423            data_type,
5424            nullable,
5425            default,
5426            primary_key,
5427            unique,
5428            auto_increment,
5429            collation,
5430            comment,
5431        })
5432    }
5433
5434    fn parse_data_type(&mut self) -> Result<DataType> {
5435        let token = self.peek().clone();
5436        // DuckDB / Spark template syntax: `${var}` (or `?` placeholder) used
5437        // where a data type is expected. Lower to `Unknown(name)` so the
5438        // surrounding expression parses.
5439        if matches!(token.token_type, TokenType::Parameter) {
5440            self.advance();
5441            return Ok(DataType::Unknown(token.value));
5442        }
5443        let type_result = match &token.token_type {
5444            TokenType::Int | TokenType::Integer => {
5445                self.advance();
5446                Ok(DataType::Int)
5447            }
5448            TokenType::BigInt => {
5449                self.advance();
5450                Ok(DataType::BigInt)
5451            }
5452            TokenType::SmallInt => {
5453                self.advance();
5454                Ok(DataType::SmallInt)
5455            }
5456            TokenType::TinyInt => {
5457                self.advance();
5458                Ok(DataType::TinyInt)
5459            }
5460            TokenType::Float => {
5461                self.advance();
5462                Ok(DataType::Float)
5463            }
5464            TokenType::Double => {
5465                self.advance();
5466                let _ = self.match_keyword("PRECISION");
5467                Ok(DataType::Double)
5468            }
5469            TokenType::Real => {
5470                self.advance();
5471                Ok(DataType::Real)
5472            }
5473            TokenType::Decimal | TokenType::Numeric => {
5474                let is_numeric = token.token_type == TokenType::Numeric;
5475                self.advance();
5476                let (precision, scale) = self.parse_type_params()?;
5477                if is_numeric {
5478                    Ok(DataType::Numeric { precision, scale })
5479                } else {
5480                    Ok(DataType::Decimal { precision, scale })
5481                }
5482            }
5483            TokenType::Varchar => {
5484                self.advance();
5485                let len = self.parse_single_type_param()?;
5486                Ok(DataType::Varchar(len))
5487            }
5488            TokenType::Char => {
5489                self.advance();
5490                let len = self.parse_single_type_param()?;
5491                Ok(DataType::Char(len))
5492            }
5493            TokenType::Text => {
5494                self.advance();
5495                Ok(DataType::Text)
5496            }
5497            TokenType::Boolean => {
5498                self.advance();
5499                Ok(DataType::Boolean)
5500            }
5501            TokenType::Date => {
5502                self.advance();
5503                Ok(DataType::Date)
5504            }
5505            TokenType::Timestamp => {
5506                self.advance();
5507                let precision = self.parse_single_type_param()?;
5508                let with_tz = if self.match_keyword("WITH") {
5509                    let _ = self.match_keyword("LOCAL");
5510                    let _ = self.match_keyword("TIME");
5511                    let _ = self.match_keyword("ZONE");
5512                    true
5513                } else if self.match_keyword("WITHOUT") {
5514                    let _ = self.match_keyword("TIME");
5515                    let _ = self.match_keyword("ZONE");
5516                    false
5517                } else {
5518                    false
5519                };
5520                Ok(DataType::Timestamp { precision, with_tz })
5521            }
5522            TokenType::TimestampTz => {
5523                self.advance();
5524                let precision = self.parse_single_type_param()?;
5525                Ok(DataType::Timestamp {
5526                    precision,
5527                    with_tz: true,
5528                })
5529            }
5530            TokenType::Time => {
5531                self.advance();
5532                let precision = self.parse_single_type_param()?;
5533                Ok(DataType::Time { precision })
5534            }
5535            TokenType::Interval => {
5536                self.advance();
5537                Ok(DataType::Interval)
5538            }
5539            TokenType::Blob => {
5540                self.advance();
5541                Ok(DataType::Blob)
5542            }
5543            TokenType::Bytea => {
5544                self.advance();
5545                Ok(DataType::Bytea)
5546            }
5547            TokenType::Json => {
5548                self.advance();
5549                Ok(DataType::Json)
5550            }
5551            TokenType::Jsonb => {
5552                self.advance();
5553                Ok(DataType::Jsonb)
5554            }
5555            TokenType::Uuid => {
5556                self.advance();
5557                Ok(DataType::Uuid)
5558            }
5559            TokenType::Array => {
5560                self.advance();
5561                if self.match_token(TokenType::Lt) {
5562                    let inner = self.parse_data_type()?;
5563                    self.expect(TokenType::Gt)?;
5564                    Ok(DataType::Array(Some(Box::new(inner))))
5565                } else {
5566                    Ok(DataType::Array(None))
5567                }
5568            }
5569            TokenType::Struct => {
5570                self.advance();
5571                // STRUCT<a INT, b STRING> (Hive/Spark) or STRUCT(a INT, b INT) (DuckDB).
5572                // Swallow the body — we don't model named struct fields in the AST.
5573                let close = if self.match_token(TokenType::Lt) {
5574                    Some(TokenType::Gt)
5575                } else if self.match_token(TokenType::LParen) {
5576                    Some(TokenType::RParen)
5577                } else {
5578                    None
5579                };
5580                if let Some(close_tok) = close {
5581                    let mut depth = 1_i32;
5582                    while depth > 0 {
5583                        if self.peek_type() == &TokenType::Eof {
5584                            break;
5585                        }
5586                        if self.peek_type() == &close_tok {
5587                            depth -= 1;
5588                            if depth == 0 {
5589                                self.advance();
5590                                break;
5591                            }
5592                        } else if matches!(self.peek_type(), TokenType::Lt | TokenType::LParen)
5593                            && (self.peek_type() == &TokenType::Lt && close_tok == TokenType::Gt
5594                                || self.peek_type() == &TokenType::LParen
5595                                    && close_tok == TokenType::RParen)
5596                        {
5597                            depth += 1;
5598                        }
5599                        self.advance();
5600                    }
5601                }
5602                Ok(DataType::Unknown("STRUCT".to_string()))
5603            }
5604            TokenType::Map => {
5605                self.advance();
5606                let close = if self.match_token(TokenType::Lt) {
5607                    Some(TokenType::Gt)
5608                } else if self.match_token(TokenType::LParen) {
5609                    Some(TokenType::RParen)
5610                } else {
5611                    None
5612                };
5613                if let Some(close_tok) = close {
5614                    let mut depth = 1_i32;
5615                    while depth > 0 {
5616                        if self.peek_type() == &TokenType::Eof {
5617                            break;
5618                        }
5619                        if self.peek_type() == &close_tok {
5620                            depth -= 1;
5621                            if depth == 0 {
5622                                self.advance();
5623                                break;
5624                            }
5625                        } else if (self.peek_type() == &TokenType::Lt && close_tok == TokenType::Gt)
5626                            || (self.peek_type() == &TokenType::LParen
5627                                && close_tok == TokenType::RParen)
5628                        {
5629                            depth += 1;
5630                        }
5631                        self.advance();
5632                    }
5633                }
5634                Ok(DataType::Unknown("MAP".to_string()))
5635            }
5636            TokenType::Identifier => {
5637                let name = token.value.to_uppercase();
5638                self.advance();
5639                match name.as_str() {
5640                    "STRING" => Ok(DataType::String),
5641                    "BINARY" => {
5642                        let len = self.parse_single_type_param()?;
5643                        Ok(DataType::Binary(len))
5644                    }
5645                    "VARBINARY" => {
5646                        let len = self.parse_single_type_param()?;
5647                        Ok(DataType::Varbinary(len))
5648                    }
5649                    "DATETIME" => Ok(DataType::DateTime),
5650                    "BYTES" => Ok(DataType::Bytes),
5651                    "VARIANT" => Ok(DataType::Variant),
5652                    "OBJECT" => Ok(DataType::Object),
5653                    "XML" => Ok(DataType::Xml),
5654                    "INET" => Ok(DataType::Inet),
5655                    "CIDR" => Ok(DataType::Cidr),
5656                    "MACADDR" => Ok(DataType::Macaddr),
5657                    "BIT" => {
5658                        // Postgres `BIT VARYING(n)` is the same as VARBIT.
5659                        // Swallow the VARYING keyword if present and parse
5660                        // the length normally.
5661                        if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("VARYING")
5662                        {
5663                            self.advance();
5664                            let len = self.parse_single_type_param()?;
5665                            return Ok(DataType::Varbinary(len));
5666                        }
5667                        let len = self.parse_single_type_param()?;
5668                        Ok(DataType::Bit(len))
5669                    }
5670                    "MONEY" => Ok(DataType::Money),
5671                    "SERIAL" => Ok(DataType::Serial),
5672                    "BIGSERIAL" => Ok(DataType::BigSerial),
5673                    "SMALLSERIAL" => Ok(DataType::SmallSerial),
5674                    "REGCLASS" => Ok(DataType::Regclass),
5675                    "REGTYPE" => Ok(DataType::Regtype),
5676                    "HSTORE" => Ok(DataType::Hstore),
5677                    "GEOGRAPHY" => Ok(DataType::Geography),
5678                    "GEOMETRY" => Ok(DataType::Geometry),
5679                    "SUPER" => Ok(DataType::Super),
5680                    _ => Ok(DataType::Unknown(name)),
5681                }
5682            }
5683            _ => {
5684                // Fallback: accept any keyword-like token as an unknown
5685                // data type by its textual value. Covers PostgreSQL `cube`,
5686                // `lseg`, `path`, `polygon`, and any vendor-specific type
5687                // name that happens to collide with a TokenType variant.
5688                let v = token.value.clone();
5689                if !v.is_empty() && v.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
5690                    self.advance();
5691                    Ok(DataType::Unknown(v.to_uppercase()))
5692                } else {
5693                    Err(SqlglotError::ParserError {
5694                        message: format!("Expected data type, got {:?}", token.token_type),
5695                    })
5696                }
5697            }
5698        };
5699
5700        // PostgreSQL opt_array_bounds: typename[], typename[N], typename[][]...
5701        let mut dt = type_result?;
5702        while self.match_token(TokenType::LBracket) {
5703            // Consume optional integer bound (PostgreSQL ignores it but accepts it)
5704            let _ = self.match_token(TokenType::Number);
5705            self.expect(TokenType::RBracket)?;
5706            dt = DataType::Array(Some(Box::new(dt)));
5707        }
5708        // ClickHouse parameterized types: `DateTime('Asia/Dubai')`,
5709        // `Nullable(String)`, `Array(Int32)`, `Enum8('a' = 1, 'b' = 2)`,
5710        // `Decimal(9, 2)`, etc. The base type was already produced — swallow
5711        // the parenthesized parameter list so the surrounding expression
5712        // continues to parse.
5713        if self.peek_type() == &TokenType::LParen {
5714            let saved = self.pos;
5715            self.advance();
5716            let mut depth = 1;
5717            let mut ok = true;
5718            while depth > 0 {
5719                match self.peek_type() {
5720                    TokenType::LParen => depth += 1,
5721                    TokenType::RParen => {
5722                        depth -= 1;
5723                        if depth == 0 {
5724                            self.advance();
5725                            break;
5726                        }
5727                    }
5728                    TokenType::Eof => {
5729                        ok = false;
5730                        break;
5731                    }
5732                    _ => {}
5733                }
5734                self.advance();
5735            }
5736            if !ok {
5737                self.pos = saved;
5738            }
5739        }
5740        Ok(dt)
5741    }
5742
5743    fn parse_type_params(&mut self) -> Result<(Option<u32>, Option<u32>)> {
5744        if self.match_token(TokenType::LParen) {
5745            let p: Option<u32> = self.expect(TokenType::Number)?.value.parse().ok();
5746            let s = if self.match_token(TokenType::Comma) {
5747                self.expect(TokenType::Number)?.value.parse().ok()
5748            } else {
5749                None
5750            };
5751            self.expect(TokenType::RParen)?;
5752            Ok((p, s))
5753        } else {
5754            Ok((None, None))
5755        }
5756    }
5757
5758    fn parse_single_type_param(&mut self) -> Result<Option<u32>> {
5759        if self.match_token(TokenType::LParen) {
5760            // Handle TSQL MAX keyword (e.g. VARBINARY(MAX), VARCHAR(MAX))
5761            if self.check_keyword("MAX") {
5762                self.advance(); // consume MAX
5763                self.expect(TokenType::RParen)?;
5764                return Ok(None);
5765            }
5766            let n: Option<u32> = self.expect(TokenType::Number)?.value.parse().ok();
5767            self.expect(TokenType::RParen)?;
5768            Ok(n)
5769        } else {
5770            Ok(None)
5771        }
5772    }
5773
5774    // ── DROP ────────────────────────────────────────────────────────
5775
5776    fn parse_drop(&mut self) -> Result<Statement> {
5777        self.expect(TokenType::Drop)?;
5778
5779        if self.match_token(TokenType::Materialized) {
5780            self.expect(TokenType::View)?;
5781            let if_exists = if self.match_token(TokenType::If) {
5782                self.expect(TokenType::Exists)?;
5783                true
5784            } else {
5785                false
5786            };
5787            let name = self.parse_table_ref()?;
5788            // MySQL/MariaDB allow comma-list — swallow the rest.
5789            while self.match_token(TokenType::Comma) {
5790                let _ = self.parse_table_ref()?;
5791            }
5792            // Trailing CASCADE / RESTRICT.
5793            let _ = self.match_token(TokenType::Cascade) || self.match_token(TokenType::Restrict);
5794            return Ok(Statement::DropView(DropViewStatement {
5795                comments: vec![],
5796                name,
5797                if_exists,
5798                materialized: true,
5799            }));
5800        }
5801
5802        if self.match_token(TokenType::View) {
5803            let if_exists = if self.match_token(TokenType::If) {
5804                self.expect(TokenType::Exists)?;
5805                true
5806            } else {
5807                false
5808            };
5809            let name = self.parse_table_ref()?;
5810            while self.match_token(TokenType::Comma) {
5811                let _ = self.parse_table_ref()?;
5812            }
5813            let _ = self.match_token(TokenType::Cascade) || self.match_token(TokenType::Restrict);
5814            return Ok(Statement::DropView(DropViewStatement {
5815                comments: vec![],
5816                name,
5817                if_exists,
5818                materialized: false,
5819            }));
5820        }
5821
5822        // DROP <kind> ... — preserve as a Command for non-TABLE/VIEW drops
5823        // (FUNCTION, PROCEDURE, SCHEMA, DATABASE, INDEX, ROLE, USER, …).
5824        if self.peek_type() != &TokenType::Table {
5825            // Already consumed DROP; capture the remainder.
5826            let body = self.consume_raw_to_statement_end();
5827            return Ok(Statement::Command(CommandStatement {
5828                comments: vec![],
5829                kind: "DROP".to_string(),
5830                body,
5831            }));
5832        }
5833
5834        self.expect(TokenType::Table)?;
5835
5836        let if_exists = if self.match_token(TokenType::If) {
5837            self.expect(TokenType::Exists)?;
5838            true
5839        } else {
5840            false
5841        };
5842
5843        let table = self.parse_table_ref()?;
5844        // MySQL / MariaDB: `DROP TABLE [IF EXISTS] t1, t2, …`. Swallow the
5845        // extra table names so the statement parses.
5846        while self.match_token(TokenType::Comma) {
5847            let _ = self.parse_table_ref()?;
5848        }
5849        let cascade = self.match_token(TokenType::Cascade);
5850        // Tolerate Doris / StarRocks / Oracle trailing modifiers on DROP TABLE
5851        // (`FORCE`, `PURGE`, `RESTRICT`).
5852        while !matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon) {
5853            if self.is_name_token()
5854                && matches!(
5855                    self.peek().value.to_uppercase().as_str(),
5856                    "FORCE" | "PURGE" | "RESTRICT"
5857                )
5858            {
5859                self.advance();
5860            } else if matches!(self.peek_type(), TokenType::Restrict) {
5861                self.advance();
5862            } else {
5863                break;
5864            }
5865        }
5866
5867        Ok(Statement::DropTable(DropTableStatement {
5868            comments: vec![],
5869            if_exists,
5870            table,
5871            cascade,
5872        }))
5873    }
5874
5875    // ── ALTER TABLE ─────────────────────────────────────────────────
5876
5877    fn parse_alter_table(&mut self) -> Result<AlterTableStatement> {
5878        self.expect(TokenType::Alter)?;
5879        self.expect(TokenType::Table)?;
5880        let table = self.parse_table_ref_no_alias()?;
5881
5882        let mut actions = Vec::new();
5883        loop {
5884            let action = self.parse_alter_action()?;
5885            actions.push(action);
5886            if !self.match_token(TokenType::Comma) {
5887                break;
5888            }
5889        }
5890
5891        Ok(AlterTableStatement {
5892            comments: vec![],
5893            table,
5894            actions,
5895        })
5896    }
5897
5898    fn parse_alter_action(&mut self) -> Result<AlterTableAction> {
5899        // Hive multi-partition continuation after a comma:
5900        // `ALTER TABLE t DROP PARTITION (a), PARTITION (b)`. Swallow the
5901        // bare PARTITION clause.
5902        if self.peek_type() == &TokenType::Partition {
5903            self.advance();
5904            let mut depth: i32 = 0;
5905            while !matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon)
5906                && (depth > 0 || !matches!(self.peek_type(), TokenType::Comma))
5907            {
5908                match self.peek_type() {
5909                    TokenType::LParen => depth += 1,
5910                    TokenType::RParen => depth = depth.saturating_sub(1),
5911                    _ => {}
5912                }
5913                self.advance();
5914            }
5915            return Ok(AlterTableAction::DropColumn {
5916                name: String::new(),
5917                if_exists: false,
5918            });
5919        }
5920        if self.match_keyword("ADD") {
5921            if matches!(
5922                self.peek_type(),
5923                TokenType::Constraint
5924                    | TokenType::Primary
5925                    | TokenType::Unique
5926                    | TokenType::Foreign
5927                    | TokenType::Check
5928            ) {
5929                let constraint = self.parse_table_constraint()?;
5930                self.swallow_constraint_modifiers();
5931                Ok(AlterTableAction::AddConstraint(constraint))
5932            } else if self.check_keyword("EXCLUDE") {
5933                // PG `ADD EXCLUDE [USING method] (col WITH op [, ...]) [WHERE
5934                // (predicate)] [DEFERRABLE …]` — swallow opaquely until we
5935                // hit a top-level statement boundary or comma.
5936                let mut depth: i32 = 0;
5937                while !matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon)
5938                    && (depth > 0 || !matches!(self.peek_type(), TokenType::Comma))
5939                {
5940                    match self.peek_type() {
5941                        TokenType::LParen => depth += 1,
5942                        TokenType::RParen => depth = depth.saturating_sub(1),
5943                        _ => {}
5944                    }
5945                    self.advance();
5946                }
5947                Ok(AlterTableAction::DropColumn {
5948                    name: String::new(),
5949                    if_exists: false,
5950                })
5951            } else if self.check_keyword("INDEX")
5952                || self.check_keyword("KEY")
5953                || self.check_keyword("PROJECTION")
5954                || self.check_keyword("STATISTICS")
5955            {
5956                // ClickHouse / MySQL `ADD INDEX [name] expr TYPE x GRANULARITY n
5957                // [AFTER y]`, `ADD KEY ...`, `ADD PROJECTION ...`. The body
5958                // is heterogeneous; swallow it opaquely up to the next
5959                // top-level Comma / Semicolon / EOF.
5960                let mut depth: i32 = 0;
5961                while !matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon)
5962                    && (depth > 0 || !matches!(self.peek_type(), TokenType::Comma))
5963                {
5964                    match self.peek_type() {
5965                        TokenType::LParen => depth += 1,
5966                        TokenType::RParen => depth = depth.saturating_sub(1),
5967                        _ => {}
5968                    }
5969                    self.advance();
5970                }
5971                Ok(AlterTableAction::DropColumn {
5972                    name: String::new(),
5973                    if_exists: false,
5974                })
5975            } else if self.check_keyword("COLUMNS") {
5976                // Hive / Spark / Databricks `ALTER TABLE … ADD COLUMNS
5977                // (col type [, col type]*)` or the comma-list form
5978                // `ADD COLUMNS col type, col type`. Swallow opaquely.
5979                self.advance();
5980                let mut depth: i32 = 0;
5981                while !matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon)
5982                    && (depth > 0 || !matches!(self.peek_type(), TokenType::Comma))
5983                {
5984                    match self.peek_type() {
5985                        TokenType::LParen => depth += 1,
5986                        TokenType::RParen => depth = depth.saturating_sub(1),
5987                        _ => {}
5988                    }
5989                    self.advance();
5990                    if depth == 0
5991                        && matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon)
5992                    {
5993                        break;
5994                    }
5995                }
5996                Ok(AlterTableAction::DropColumn {
5997                    name: String::new(),
5998                    if_exists: false,
5999                })
6000            } else {
6001                let _ = self.match_keyword("COLUMN");
6002                let col = self.parse_column_def()?;
6003                // ClickHouse: `ADD COLUMN name type AFTER other` / `FIRST` —
6004                // consume the placement modifier so the rest of the action
6005                // list parses.
6006                if self.check_keyword("AFTER") {
6007                    self.advance();
6008                    if self.is_name_token() {
6009                        self.advance();
6010                    }
6011                } else if self.check_keyword("FIRST") {
6012                    self.advance();
6013                }
6014                Ok(AlterTableAction::AddColumn(col))
6015            }
6016        } else if self.match_token(TokenType::Drop) {
6017            // Hive: `DROP IF EXISTS PARTITION (…), PARTITION (…)`. The
6018            // optional `IF EXISTS` precedes PARTITION.
6019            if self.peek_type() == &TokenType::If
6020                && self
6021                    .peek_offset(1)
6022                    .map(|t| matches!(t.token_type, TokenType::Exists))
6023                    .unwrap_or(false)
6024                && self
6025                    .peek_offset(2)
6026                    .map(|t| matches!(t.token_type, TokenType::Partition))
6027                    .unwrap_or(false)
6028            {
6029                self.advance(); // IF
6030                self.advance(); // EXISTS
6031            }
6032            // MySQL / TiDB: `DROP INDEX|KEY name`, `DROP PRIMARY KEY`,
6033            // `DROP FOREIGN KEY name`, `DROP CONSTRAINT name`,
6034            // `DROP PARTITION (...)`, `DROP CHECK name`. We don't have a
6035            // dedicated AST node for these, so swallow them to end-of-action.
6036            if matches!(
6037                self.peek_type(),
6038                TokenType::Index
6039                    | TokenType::Primary
6040                    | TokenType::Foreign
6041                    | TokenType::Constraint
6042                    | TokenType::Check
6043                    | TokenType::Partition
6044                    | TokenType::Unique
6045            ) || self.check_keyword("KEY")
6046                || self.check_keyword("FEATURE")
6047                || self.check_keyword("PROJECTION")
6048                || self.check_keyword("STATISTICS")
6049                || self.check_keyword("INDEX")
6050                || self.check_keyword("DISTRIBUTION")
6051            {
6052                let mut depth: i32 = 0;
6053                while !matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon)
6054                    && (depth > 0 || !matches!(self.peek_type(), TokenType::Comma))
6055                {
6056                    match self.peek_type() {
6057                        TokenType::LParen => depth += 1,
6058                        TokenType::RParen => depth = depth.saturating_sub(1),
6059                        _ => {}
6060                    }
6061                    self.advance();
6062                }
6063                return Ok(AlterTableAction::DropColumn {
6064                    name: String::new(),
6065                    if_exists: false,
6066                });
6067            }
6068            let _ = self.match_keyword("COLUMN");
6069            let if_exists = if self.match_token(TokenType::If) {
6070                self.expect(TokenType::Exists)?;
6071                true
6072            } else {
6073                false
6074            };
6075            let mut name = self.expect_name()?;
6076            // ClickHouse `DROP COLUMN nested.col` — accept dotted suffixes;
6077            // we collapse them into the column name string for now.
6078            while self.peek_type() == &TokenType::Dot {
6079                self.advance();
6080                if !self.is_name_token() {
6081                    break;
6082                }
6083                name.push('.');
6084                name.push_str(&self.peek().value);
6085                self.advance();
6086            }
6087            Ok(AlterTableAction::DropColumn { name, if_exists })
6088        } else if self.match_keyword("RENAME") {
6089            if self.match_keyword("COLUMN") {
6090                let old_name = self.expect_name()?;
6091                self.expect(TokenType::Identifier)?; // TO
6092                let new_name = self.expect_name()?;
6093                Ok(AlterTableAction::RenameColumn { old_name, new_name })
6094            } else if self.match_keyword("TO") {
6095                let mut new_name = self.expect_name()?;
6096                while self.match_token(TokenType::Dot) {
6097                    new_name.push('.');
6098                    new_name.push_str(&self.expect_name()?);
6099                }
6100                Ok(AlterTableAction::RenameTable { new_name })
6101            } else {
6102                Err(SqlglotError::ParserError {
6103                    message: "Expected COLUMN or TO after RENAME".into(),
6104                })
6105            }
6106        } else {
6107            Err(SqlglotError::ParserError {
6108                message: "Expected ADD, DROP, or RENAME in ALTER TABLE".into(),
6109            })
6110        }
6111    }
6112
6113    /// Try [`parse_alter_table`]; on failure, rewind and capture the entire
6114    /// `ALTER …` statement verbatim as a [`Statement::Command`]. This covers
6115    /// the long tail of vendor-specific ALTER forms — MySQL `ALTER TABLE …
6116    /// CONVERT TO CHARACTER SET … COLLATE …`, Hive `ALTER TABLE … PARTITION
6117    /// (…) COMPACT 'major'`, T-SQL `ALTER TABLE … WITH (…) CHECK CONSTRAINT
6118    /// …`, etc. (Gap 5)
6119    fn parse_alter_or_command(&mut self) -> Result<Statement> {
6120        let saved = self.pos;
6121        let saved_comments = self.pending_comments.clone();
6122        match self.parse_alter_table() {
6123            Ok(stmt) => Ok(Statement::AlterTable(stmt)),
6124            Err(_) => {
6125                self.pos = saved;
6126                self.pending_comments = saved_comments;
6127                self.parse_command_kind("ALTER")
6128            }
6129        }
6130    }
6131
6132    /// Try [`parse_create`]; on failure, rewind and capture the entire
6133    /// `CREATE …` statement verbatim as a [`Statement::Command`]. Also
6134    /// handles the `CREATE TABLE t AS VALUES (…)` form (Gap 7) and rarer
6135    /// `CREATE OPERATOR / AGGREGATE / SEQUENCE / FUNCTION / TEXT SEARCH
6136    /// CONFIGURATION / …` (Gap 4).
6137    fn parse_create_or_command(&mut self) -> Result<Statement> {
6138        let saved = self.pos;
6139        let saved_comments = self.pending_comments.clone();
6140        match self.parse_create() {
6141            Ok(stmt) => Ok(stmt),
6142            Err(_) => {
6143                self.pos = saved;
6144                self.pending_comments = saved_comments;
6145                self.parse_command_kind("CREATE")
6146            }
6147        }
6148    }
6149
6150    // ── TRUNCATE ────────────────────────────────────────────────────
6151
6152    fn parse_truncate(&mut self) -> Result<TruncateStatement> {
6153        self.expect(TokenType::Truncate)?;
6154        let _ = self.match_token(TokenType::Table);
6155        let table = self.parse_table_ref()?;
6156        Ok(TruncateStatement {
6157            comments: vec![],
6158            table,
6159        })
6160    }
6161
6162    // ── Transaction ─────────────────────────────────────────────────
6163
6164    fn parse_transaction(&mut self) -> Result<TransactionStatement> {
6165        match self.peek_type() {
6166            TokenType::Begin => {
6167                self.advance();
6168                let _ = self.match_token(TokenType::Transaction);
6169                let _ = self.match_keyword("WORK");
6170                Ok(TransactionStatement::Begin)
6171            }
6172            TokenType::Commit => {
6173                self.advance();
6174                let _ = self.match_token(TokenType::Transaction);
6175                let _ = self.match_keyword("WORK");
6176                // SQL-standard COMMIT [WORK] [AND [NO] CHAIN]
6177                if self.match_token(TokenType::And) {
6178                    let _ = self.match_token(TokenType::Not);
6179                    let _ = self.match_keyword("NO");
6180                    let _ = self.match_keyword("CHAIN");
6181                }
6182                Ok(TransactionStatement::Commit)
6183            }
6184            TokenType::Rollback => {
6185                self.advance();
6186                let _ = self.match_token(TokenType::Transaction);
6187                let _ = self.match_keyword("WORK");
6188                if self.match_keyword("TO") {
6189                    let _ = self.match_token(TokenType::Savepoint);
6190                    let name = self.expect_name()?;
6191                    Ok(TransactionStatement::RollbackTo(name))
6192                } else {
6193                    // ROLLBACK [WORK] [AND [NO] CHAIN]
6194                    if self.match_token(TokenType::And) {
6195                        let _ = self.match_token(TokenType::Not);
6196                        let _ = self.match_keyword("NO");
6197                        let _ = self.match_keyword("CHAIN");
6198                    }
6199                    Ok(TransactionStatement::Rollback)
6200                }
6201            }
6202            TokenType::Savepoint => {
6203                self.advance();
6204                let name = self.expect_name()?;
6205                Ok(TransactionStatement::Savepoint(name))
6206            }
6207            _ => Err(SqlglotError::ParserError {
6208                message: "Expected transaction statement".into(),
6209            }),
6210        }
6211    }
6212
6213    // ── EXPLAIN ─────────────────────────────────────────────────────
6214
6215    fn parse_explain(&mut self) -> Result<ExplainStatement> {
6216        self.expect(TokenType::Explain)?;
6217        let analyze = self.match_token(TokenType::Analyze);
6218        // PostgreSQL `EXPLAIN (VERBOSE, COSTS OFF, ...)` option block, plus
6219        // unparenthesized `VERBOSE` / `FORMAT TEXT|JSON|YAML`.
6220        if self.match_token(TokenType::LParen) {
6221            let mut depth = 1;
6222            while depth > 0 {
6223                match self.peek_type() {
6224                    TokenType::Eof => break,
6225                    TokenType::LParen => depth += 1,
6226                    TokenType::RParen => {
6227                        depth -= 1;
6228                        if depth == 0 {
6229                            self.advance();
6230                            break;
6231                        }
6232                    }
6233                    _ => {}
6234                }
6235                self.advance();
6236            }
6237        } else {
6238            // Optional bare keywords: VERBOSE / FORMAT [=] <name|string>
6239            loop {
6240                if self.check_keyword("VERBOSE") {
6241                    self.advance();
6242                    continue;
6243                }
6244                if self.check_keyword("FORMAT") {
6245                    self.advance();
6246                    let _ = self.match_token(TokenType::Eq);
6247                    // Format name can be an identifier (TEXT/JSON/YAML/XML/...)
6248                    // or a string literal (`'plan_tree'`).
6249                    if matches!(self.peek_type(), TokenType::String | TokenType::Identifier)
6250                        || self.is_name_token()
6251                    {
6252                        self.advance();
6253                    }
6254                    continue;
6255                }
6256                break;
6257            }
6258            // Hive / Spark EXPLAIN modifiers: EXTENDED, LOCKS, AUTHORIZATION,
6259            // DEPENDENCY, VECTORIZATION [ONLY] [SUMMARY|OPERATOR|EXPRESSION|DETAIL],
6260            // CBO, AST, REWRITE, FORMATTED, LOGICAL, NODE. Also ClickHouse
6261            // `EXPLAIN indexes=1 actions=1 …` bare options. Consume any
6262            // identifier-like tokens (and optional `= value`) until we hit a
6263            // statement-starting keyword.
6264            loop {
6265                match self.peek_type() {
6266                    TokenType::Select
6267                    | TokenType::With
6268                    | TokenType::Insert
6269                    | TokenType::Update
6270                    | TokenType::Delete
6271                    | TokenType::Merge
6272                    | TokenType::Create
6273                    | TokenType::Drop
6274                    | TokenType::Alter
6275                    | TokenType::Truncate
6276                    | TokenType::LParen
6277                    | TokenType::Eof
6278                    | TokenType::Semicolon => break,
6279                    TokenType::Identifier => {
6280                        self.advance();
6281                        if self.match_token(TokenType::Eq) {
6282                            // value: number, string, or identifier
6283                            if matches!(self.peek_type(), TokenType::Number | TokenType::String)
6284                                || self.is_name_token()
6285                            {
6286                                self.advance();
6287                            }
6288                        }
6289                        // Optional comma between options
6290                        // (ClickHouse `dump_tree = 1, dump_ast = 1 …`).
6291                        let _ = self.match_token(TokenType::Comma);
6292                    }
6293                    _ => {
6294                        // Also accept unreserved keyword-style modifiers
6295                        // (ONLY, FORMATTED, EXTENDED, etc. that tokenize as
6296                        // their own variants). Bail when we hit anything
6297                        // that isn't a plain name token.
6298                        if self.is_name_token() {
6299                            self.advance();
6300                        } else {
6301                            break;
6302                        }
6303                    }
6304                }
6305            }
6306        }
6307        let statement = self.parse_statement_inner()?;
6308        Ok(ExplainStatement {
6309            comments: vec![],
6310            analyze,
6311            statement: Box::new(statement),
6312        })
6313    }
6314
6315    // ── USE ─────────────────────────────────────────────────────────
6316
6317    fn parse_use(&mut self) -> Result<UseStatement> {
6318        self.expect(TokenType::Use)?;
6319        // Optional kind: USE DATABASE / SCHEMA / CATALOG / WAREHOUSE / ROLE
6320        // (DuckDB / Snowflake / Spark). Swallow the leading keyword.
6321        let _ = matches!(self.peek_type(), TokenType::Database | TokenType::Schema) && {
6322            self.advance();
6323            true
6324        } || (self.is_name_token()
6325            && matches!(
6326                self.peek().value.to_uppercase().as_str(),
6327                "CATALOG" | "WAREHOUSE" | "ROLE"
6328            )
6329            && {
6330                self.advance();
6331                true
6332            });
6333        // `USE default` (Hive): `default` is a keyword, accept it as a name.
6334        let mut name = if matches!(self.peek_type(), TokenType::Default) {
6335            let v = self.peek().value.clone();
6336            self.advance();
6337            v
6338        } else if self.is_name_token()
6339            && self.peek().value.eq_ignore_ascii_case("IDENTIFIER")
6340            && matches!(
6341                self.peek_offset(1).map(|t| &t.token_type),
6342                Some(TokenType::LParen)
6343            )
6344        {
6345            // Snowflake / Databricks IDENTIFIER('name') indirection —
6346            // swallow the call and use a synthetic name.
6347            self.advance(); // IDENTIFIER
6348            self.advance(); // (
6349            let mut depth: i32 = 1;
6350            while depth > 0 {
6351                match self.peek_type() {
6352                    TokenType::LParen => {
6353                        depth += 1;
6354                        self.advance();
6355                    }
6356                    TokenType::RParen => {
6357                        depth -= 1;
6358                        self.advance();
6359                    }
6360                    TokenType::Eof => break,
6361                    _ => {
6362                        self.advance();
6363                    }
6364                }
6365            }
6366            "IDENTIFIER".to_string()
6367        } else {
6368            self.expect_name()?
6369        };
6370        while self.match_token(TokenType::Dot) {
6371            name.push('.');
6372            if matches!(self.peek_type(), TokenType::Default) {
6373                name.push_str(&self.peek().value);
6374                self.advance();
6375            } else {
6376                name.push_str(&self.expect_name()?);
6377            }
6378        }
6379        Ok(UseStatement {
6380            comments: vec![],
6381            name,
6382        })
6383    }
6384
6385    // ══════════════════════════════════════════════════════════════
6386    // Expression parsing (precedence climbing)
6387    // ══════════════════════════════════════════════════════════════
6388
6389    fn parse_expr(&mut self) -> Result<Expr> {
6390        // DuckDB lambda: `lambda x: body` or `lambda x, y: body`. Lower to a
6391        // `Function("lambda", [name(s), body])` placeholder so the call parses.
6392        if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("lambda") {
6393            let saved = self.pos;
6394            self.advance();
6395            let mut names: Vec<Expr> = Vec::new();
6396            let mut ok = self.is_name_token();
6397            while ok {
6398                let n = self.advance().clone();
6399                names.push(Expr::Column {
6400                    table: None,
6401                    name: n.value.clone(),
6402                    table_quote_style: QuoteStyle::None,
6403                    quote_style: QuoteStyle::None,
6404                });
6405                if !self.match_token(TokenType::Comma) {
6406                    break;
6407                }
6408                if !self.is_name_token() {
6409                    ok = false;
6410                    break;
6411                }
6412            }
6413            if ok && self.match_token(TokenType::Colon) {
6414                let body = self.parse_expr()?;
6415                let mut args = names;
6416                args.push(body);
6417                return Ok(Expr::Function {
6418                    name: "lambda".to_string(),
6419                    args,
6420                    distinct: false,
6421                    filter: None,
6422                    over: None,
6423                    order_by: Vec::new(),
6424                    within_group: false,
6425                });
6426            }
6427            self.pos = saved;
6428        }
6429        // DuckDB / PostgreSQL named-argument prefix `name := value` and
6430        // BigQuery `name => value` — discard the name so the surrounding
6431        // function call parses. Only triggered when the lookahead clearly
6432        // matches the named-arg shape.
6433        if self.is_name_token() {
6434            let next = self.peek_offset(1).map(|t| &t.token_type);
6435            let after = self.peek_offset(2).map(|t| &t.token_type);
6436            if matches!(next, Some(TokenType::Colon)) && matches!(after, Some(TokenType::Eq)) {
6437                self.advance();
6438                self.advance();
6439                self.advance();
6440            } else if matches!(next, Some(TokenType::DoubleArrow)) {
6441                self.advance();
6442                self.advance();
6443            } else if matches!(next, Some(TokenType::Eq)) && matches!(after, Some(TokenType::Gt)) {
6444                // `name => value` tokenized as `Eq Gt` (no DoubleArrow merge).
6445                self.advance();
6446                self.advance();
6447                self.advance();
6448            }
6449        }
6450        let cond = self.parse_or_expr()?;
6451        // MySQL session-variable assignment in expression position:
6452        // `@var := expr`. Tokenized as `Colon Eq`. Lower to `BinaryOp Eq`
6453        // so the surrounding query parses.
6454        if matches!(self.peek_type(), TokenType::Colon)
6455            && matches!(
6456                self.peek_offset(1).map(|t| &t.token_type),
6457                Some(TokenType::Eq)
6458            )
6459        {
6460            self.advance();
6461            self.advance();
6462            let rhs = self.parse_expr()?;
6463            return Ok(Expr::BinaryOp {
6464                left: Box::new(cond),
6465                op: BinaryOperator::Eq,
6466                right: Box::new(rhs),
6467            });
6468        }
6469        // ClickHouse C-style ternary: `cond ? then : else`. Tokenized as
6470        // `Parameter('?')` followed later by `Colon`. Lower to a CASE.
6471        if matches!(self.peek_type(), TokenType::Parameter) && self.peek().value == "?" {
6472            self.advance();
6473            let then_branch = self.parse_or_expr()?;
6474            if self.match_token(TokenType::Colon) {
6475                let else_branch = self.parse_expr()?;
6476                return Ok(Expr::Case {
6477                    operand: None,
6478                    when_clauses: vec![(cond, then_branch)],
6479                    else_clause: Some(Box::new(else_branch)),
6480                });
6481            }
6482        }
6483        Ok(cond)
6484    }
6485
6486    fn parse_or_expr(&mut self) -> Result<Expr> {
6487        let mut left = self.parse_and_expr()?;
6488        while self.match_token(TokenType::Or) {
6489            let right = self.parse_and_expr()?;
6490            left = Expr::BinaryOp {
6491                left: Box::new(left),
6492                op: BinaryOperator::Or,
6493                right: Box::new(right),
6494            };
6495        }
6496        Ok(left)
6497    }
6498
6499    fn parse_and_expr(&mut self) -> Result<Expr> {
6500        let mut left = self.parse_not_expr()?;
6501        while self.match_token(TokenType::And) {
6502            let right = self.parse_not_expr()?;
6503            left = Expr::BinaryOp {
6504                left: Box::new(left),
6505                op: BinaryOperator::And,
6506                right: Box::new(right),
6507            };
6508        }
6509        Ok(left)
6510    }
6511
6512    fn parse_not_expr(&mut self) -> Result<Expr> {
6513        if self.match_token(TokenType::Not) {
6514            let expr = self.parse_not_expr()?;
6515            Ok(Expr::UnaryOp {
6516                op: UnaryOperator::Not,
6517                expr: Box::new(expr),
6518            })
6519        } else {
6520            self.parse_comparison()
6521        }
6522    }
6523
6524    fn parse_comparison(&mut self) -> Result<Expr> {
6525        let mut left = self.parse_addition()?;
6526
6527        loop {
6528            // ClickHouse distributed predicates: `expr GLOBAL [NOT] IN (...)`
6529            // and `expr GLOBAL JOIN ...`. The keyword tokenizes as a plain
6530            // identifier — swallow it so the following predicate parses.
6531            if self.check_keyword("GLOBAL") {
6532                let next = self.peek_offset(1).map(|t| &t.token_type);
6533                if matches!(next, Some(TokenType::In) | Some(TokenType::Not)) {
6534                    self.advance();
6535                }
6536            }
6537            // ANSI / Postgres `period1 OVERLAPS period2` — model as Eq for
6538            // acceptance purposes.
6539            if self.check_keyword("OVERLAPS") {
6540                self.advance();
6541                let right = self.parse_addition()?;
6542                left = Expr::BinaryOp {
6543                    left: Box::new(left),
6544                    op: BinaryOperator::Eq,
6545                    right: Box::new(right),
6546                };
6547                continue;
6548            }
6549            // MySQL JSON `value MEMBER OF (json_array_expr)` — model as Eq.
6550            if self.check_keyword("MEMBER")
6551                && self
6552                    .peek_offset(1)
6553                    .map(|t| t.value.eq_ignore_ascii_case("OF"))
6554                    .unwrap_or(false)
6555            {
6556                self.advance();
6557                self.advance();
6558                let right = self.parse_addition()?;
6559                left = Expr::BinaryOp {
6560                    left: Box::new(left),
6561                    op: BinaryOperator::Eq,
6562                    right: Box::new(right),
6563                };
6564                continue;
6565            }
6566            // PostgreSQL geometric and full-text operators that tokenize as
6567            // multi-character sequences our tokenizer doesn't fuse:
6568            //   `<->`  (distance)         tokens: Lt, Arrow
6569            //   `&&` `&<` `&>`            (array / range overlap)
6570            //   `@@`                      (text search match)
6571            //   `|>` `<|`                 (range left/right of)
6572            // Lower all of them to a generic Eq so the surrounding
6573            // expression parses; the bench only cares about acceptance.
6574            {
6575                let p0 = self.peek_type().clone();
6576                let p1 = self.peek_offset(1).map(|t| t.token_type.clone());
6577                let p2 = self.peek_offset(2).map(|t| t.token_type.clone());
6578                let p1v = self
6579                    .peek_offset(1)
6580                    .map(|t| t.value.clone())
6581                    .unwrap_or_default();
6582                let consume_count = match (&p0, &p1, &p2) {
6583                    // <-> distance
6584                    (TokenType::Lt, Some(TokenType::Arrow), _) => 2,
6585                    // && overlap
6586                    (TokenType::BitwiseAnd, Some(TokenType::BitwiseAnd), _) => 2,
6587                    // &<| / &>| geometric variants
6588                    (TokenType::BitwiseAnd, Some(TokenType::Lt), Some(TokenType::BitwiseOr))
6589                    | (TokenType::BitwiseAnd, Some(TokenType::Gt), Some(TokenType::BitwiseOr)) => 3,
6590                    // &< / &>
6591                    (TokenType::BitwiseAnd, Some(TokenType::Lt), _)
6592                    | (TokenType::BitwiseAnd, Some(TokenType::Gt), _) => 2,
6593                    // @@ and @?
6594                    (TokenType::AtSign, Some(TokenType::AtSign), _) => 2,
6595                    // |> and <|
6596                    (TokenType::BitwiseOr, Some(TokenType::Gt), _)
6597                    | (TokenType::Lt, Some(TokenType::BitwiseOr), _) => 2,
6598                    // <<| / >>|
6599                    (TokenType::ShiftLeft, Some(TokenType::BitwiseOr), _)
6600                    | (TokenType::ShiftRight, Some(TokenType::BitwiseOr), _) => 2,
6601                    // ^@ starts_with operator
6602                    (TokenType::BitwiseXor, Some(TokenType::AtSign), _) => 2,
6603                    _ if matches!(p0, TokenType::AtSign)
6604                        && matches!(p1, Some(TokenType::Parameter))
6605                        && p1v == "?" =>
6606                    {
6607                        2
6608                    }
6609                    _ => 0,
6610                };
6611                if consume_count > 0 {
6612                    for _ in 0..consume_count {
6613                        self.advance();
6614                    }
6615                    let right = self.parse_addition()?;
6616                    left = Expr::BinaryOp {
6617                        left: Box::new(left),
6618                        op: BinaryOperator::Eq,
6619                        right: Box::new(right),
6620                    };
6621                    continue;
6622                }
6623            }
6624            let op = match self.peek_type() {
6625                TokenType::Eq => Some(BinaryOperator::Eq),
6626                TokenType::Neq => Some(BinaryOperator::Neq),
6627                TokenType::Lt => Some(BinaryOperator::Lt),
6628                TokenType::Gt => Some(BinaryOperator::Gt),
6629                TokenType::LtEq => {
6630                    // Hive / MySQL `<=>` null-safe equality tokenizes as `Lte Gt`.
6631                    if matches!(
6632                        self.peek_offset(1).map(|t| &t.token_type),
6633                        Some(TokenType::Gt)
6634                    ) {
6635                        self.advance();
6636                        self.advance();
6637                        let right = self.parse_addition()?;
6638                        left = Expr::BinaryOp {
6639                            left: Box::new(left),
6640                            op: BinaryOperator::Eq,
6641                            right: Box::new(right),
6642                        };
6643                        continue;
6644                    }
6645                    Some(BinaryOperator::LtEq)
6646                }
6647                TokenType::GtEq => Some(BinaryOperator::GtEq),
6648                TokenType::AtArrow => Some(BinaryOperator::AtArrow),
6649                TokenType::ArrowAt => Some(BinaryOperator::ArrowAt),
6650                // PostgreSQL geometric / regex operators starting with `~`:
6651                //   ~=, ~<, ~>, ~<=, ~>=, ~~, ~~*, !~, !~*. We lower all of
6652                //   them to a generic Eq comparison so the surrounding
6653                //   expression parses; the bench only cares about acceptance.
6654                TokenType::BitwiseNot => {
6655                    self.advance();
6656                    // Optional follow-up: =, <, >, <=, >=, ~, ~*, *.
6657                    let _ = match self.peek_type() {
6658                        TokenType::Eq
6659                        | TokenType::Lt
6660                        | TokenType::Gt
6661                        | TokenType::LtEq
6662                        | TokenType::GtEq
6663                        | TokenType::Star
6664                        | TokenType::BitwiseNot => {
6665                            self.advance();
6666                            // Allow `~~*` (LIKE-like, case-insensitive).
6667                            if self.peek_type() == &TokenType::Star {
6668                                self.advance();
6669                            }
6670                            true
6671                        }
6672                        _ => false,
6673                    };
6674                    let right = self.parse_addition()?;
6675                    left = Expr::BinaryOp {
6676                        left: Box::new(left),
6677                        op: BinaryOperator::Eq,
6678                        right: Box::new(right),
6679                    };
6680                    continue;
6681                }
6682                _ => None,
6683            };
6684
6685            if let Some(op) = op {
6686                self.advance();
6687                // ClickHouse / SQLite accept `==` as a synonym for `=`.
6688                if matches!(op, BinaryOperator::Eq) && self.peek_type() == &TokenType::Eq {
6689                    self.advance();
6690                }
6691                if matches!(self.peek_type(), TokenType::Any | TokenType::Some) {
6692                    self.advance();
6693                    self.expect(TokenType::LParen)?;
6694                    let right = if matches!(self.peek_type(), TokenType::Select | TokenType::With) {
6695                        Expr::Subquery(Box::new(self.parse_statement_inner()?))
6696                    } else {
6697                        self.parse_expr()?
6698                    };
6699                    self.expect(TokenType::RParen)?;
6700                    left = Expr::AnyOp {
6701                        expr: Box::new(left),
6702                        op,
6703                        right: Box::new(right),
6704                    };
6705                } else if self.peek_type() == &TokenType::All {
6706                    self.advance();
6707                    self.expect(TokenType::LParen)?;
6708                    let right = if matches!(self.peek_type(), TokenType::Select | TokenType::With) {
6709                        Expr::Subquery(Box::new(self.parse_statement_inner()?))
6710                    } else {
6711                        self.parse_expr()?
6712                    };
6713                    self.expect(TokenType::RParen)?;
6714                    left = Expr::AllOp {
6715                        expr: Box::new(left),
6716                        op,
6717                        right: Box::new(right),
6718                    };
6719                } else {
6720                    let right = self.parse_addition()?;
6721                    left = Expr::BinaryOp {
6722                        left: Box::new(left),
6723                        op,
6724                        right: Box::new(right),
6725                    };
6726                }
6727            } else if self.peek_type() == &TokenType::Is {
6728                self.advance();
6729                let negated = self.match_token(TokenType::Not);
6730                if self.match_token(TokenType::True) {
6731                    left = Expr::IsBool {
6732                        expr: Box::new(left),
6733                        value: true,
6734                        negated,
6735                    };
6736                } else if self.match_token(TokenType::False) {
6737                    left = Expr::IsBool {
6738                        expr: Box::new(left),
6739                        value: false,
6740                        negated,
6741                    };
6742                } else if self.match_token(TokenType::Distinct) {
6743                    // SQL-standard `IS [NOT] DISTINCT FROM y` — null-safe
6744                    // comparison. We lower it to `(x <> y OR (x IS NULL) <>
6745                    // (y IS NULL))` for `DISTINCT FROM` (negated == false) and
6746                    // its inverse for `NOT DISTINCT FROM`. To keep the AST
6747                    // simple, model both as a binary inequality / equality
6748                    // wrapped in BinaryOp so the surrounding query parses.
6749                    self.expect(TokenType::From)?;
6750                    let right = self.parse_addition()?;
6751                    let op = if negated {
6752                        BinaryOperator::Eq
6753                    } else {
6754                        BinaryOperator::Neq
6755                    };
6756                    left = Expr::BinaryOp {
6757                        left: Box::new(left),
6758                        op,
6759                        right: Box::new(right),
6760                    };
6761                } else if matches!(self.peek_type(), TokenType::Json | TokenType::Jsonb)
6762                    || self.peek().value.eq_ignore_ascii_case("DOCUMENT")
6763                    || self.peek().value.eq_ignore_ascii_case("UNKNOWN")
6764                {
6765                    // PG / Db2 / SQL:2016 `expr IS [NOT] JSON [VALUE|ARRAY|
6766                    // OBJECT|SCALAR] [WITH|WITHOUT UNIQUE [KEYS]]`,
6767                    // `IS [NOT] DOCUMENT`, `IS [NOT] UNKNOWN`. We don't model
6768                    // these — fold to IsNull as a placeholder so the surrounding
6769                    // expression parses.
6770                    self.advance();
6771                    // Optional JSON kind keyword.
6772                    if matches!(
6773                        self.peek().value.to_uppercase().as_str(),
6774                        "VALUE" | "ARRAY" | "OBJECT" | "SCALAR"
6775                    ) && self.is_name_token()
6776                    {
6777                        self.advance();
6778                    }
6779                    // Optional `WITH|WITHOUT UNIQUE [KEYS]`.
6780                    if matches!(
6781                        self.peek().value.to_uppercase().as_str(),
6782                        "WITH" | "WITHOUT"
6783                    ) && self.is_name_token()
6784                    {
6785                        self.advance();
6786                        if self.peek().value.eq_ignore_ascii_case("UNIQUE") {
6787                            self.advance();
6788                            if self.peek().value.eq_ignore_ascii_case("KEYS") {
6789                                self.advance();
6790                            }
6791                        }
6792                    }
6793                    left = Expr::IsNull {
6794                        expr: Box::new(left),
6795                        negated,
6796                    };
6797                } else {
6798                    self.expect(TokenType::Null)?;
6799                    left = Expr::IsNull {
6800                        expr: Box::new(left),
6801                        negated,
6802                    };
6803                }
6804            } else if matches!(
6805                self.peek_type(),
6806                TokenType::Not
6807                    | TokenType::In
6808                    | TokenType::Like
6809                    | TokenType::ILike
6810                    | TokenType::Between
6811            ) {
6812                // Peek ahead: if NOT, only consume it if followed by IN/LIKE/ILIKE/BETWEEN
6813                if self.peek_type() == &TokenType::Not {
6814                    let saved_pos = self.pos;
6815                    self.advance(); // consume NOT
6816                    if !matches!(
6817                        self.peek_type(),
6818                        TokenType::In | TokenType::Like | TokenType::ILike | TokenType::Between
6819                    ) {
6820                        // NOT is not part of a comparison predicate — restore position
6821                        self.pos = saved_pos;
6822                        break;
6823                    }
6824                    // NOT was consumed, negated = true
6825                }
6826                let negated =
6827                    self.pos > 0 && self.tokens[self.pos - 1].token_type == TokenType::Not;
6828
6829                if self.match_token(TokenType::In) {
6830                    // ClickHouse: `x IN [1, 2, 3]` — array literal directly
6831                    // after IN. Parse the array as the RHS and model as a
6832                    // single-element InList so downstream code emits IN (…).
6833                    if matches!(self.peek_type(), TokenType::LBracket) {
6834                        let rhs = self.parse_primary()?;
6835                        left = Expr::InList {
6836                            expr: Box::new(left),
6837                            list: vec![rhs],
6838                            negated,
6839                        };
6840                        continue;
6841                    }
6842                    // ClickHouse: `x IN funcCall(...)` / `x IN tableName` —
6843                    // bare function call or identifier as RHS. Parse a
6844                    // single primary expression and wrap as InList.
6845                    if !matches!(self.peek_type(), TokenType::LParen) {
6846                        let rhs = self.parse_primary()?;
6847                        left = Expr::InList {
6848                            expr: Box::new(left),
6849                            list: vec![rhs],
6850                            negated,
6851                        };
6852                        continue;
6853                    }
6854                    self.expect(TokenType::LParen)?;
6855                    // Check for subquery
6856                    if matches!(self.peek_type(), TokenType::Select | TokenType::With) {
6857                        let subquery = self.parse_statement_inner()?;
6858                        // ClickHouse accepts `IN ((SELECT ...) AS alias)`.
6859                        if self.match_token(TokenType::As) && self.is_name_token() {
6860                            self.advance();
6861                        } else if self.is_name_token() {
6862                            // also tolerate alias without AS
6863                            self.advance();
6864                        }
6865                        self.expect(TokenType::RParen)?;
6866                        left = Expr::InSubquery {
6867                            expr: Box::new(left),
6868                            subquery: Box::new(subquery),
6869                            negated,
6870                        };
6871                    } else {
6872                        let list = self.parse_expr_list()?;
6873                        self.expect(TokenType::RParen)?;
6874                        left = Expr::InList {
6875                            expr: Box::new(left),
6876                            list,
6877                            negated,
6878                        };
6879                    }
6880                } else if self.match_token(TokenType::Like) {
6881                    let pattern = self.parse_addition()?;
6882                    let escape = if self.match_token(TokenType::Escape) {
6883                        Some(Box::new(self.parse_primary()?))
6884                    } else {
6885                        None
6886                    };
6887                    left = Expr::Like {
6888                        expr: Box::new(left),
6889                        pattern: Box::new(pattern),
6890                        negated,
6891                        escape,
6892                    };
6893                } else if self.match_token(TokenType::ILike) {
6894                    let pattern = self.parse_addition()?;
6895                    let escape = if self.match_token(TokenType::Escape) {
6896                        Some(Box::new(self.parse_primary()?))
6897                    } else {
6898                        None
6899                    };
6900                    left = Expr::ILike {
6901                        expr: Box::new(left),
6902                        pattern: Box::new(pattern),
6903                        negated,
6904                        escape,
6905                    };
6906                } else if self.match_token(TokenType::Between) {
6907                    let low = self.parse_addition()?;
6908                    self.expect(TokenType::And)?;
6909                    let high = self.parse_addition()?;
6910                    left = Expr::Between {
6911                        expr: Box::new(left),
6912                        low: Box::new(low),
6913                        high: Box::new(high),
6914                        negated,
6915                    };
6916                } else {
6917                    break;
6918                }
6919            } else if self.check_keyword("SIMILAR") {
6920                // SIMILAR TO pattern [ESCAPE escape_char]
6921                self.advance(); // consume SIMILAR
6922                self.expect_keyword("TO")?;
6923                let pattern = self.parse_addition()?;
6924                let escape = if self.match_token(TokenType::Escape) {
6925                    Some(Box::new(self.parse_primary()?))
6926                } else {
6927                    None
6928                };
6929                left = Expr::SimilarTo {
6930                    expr: Box::new(left),
6931                    pattern: Box::new(pattern),
6932                    negated: false,
6933                    escape,
6934                };
6935            } else if self.peek_type() == &TokenType::Not && self.check_keyword_offset("SIMILAR", 1)
6936            {
6937                // NOT SIMILAR TO pattern [ESCAPE escape_char]
6938                self.advance(); // consume NOT
6939                self.advance(); // consume SIMILAR
6940                self.expect_keyword("TO")?;
6941                let pattern = self.parse_addition()?;
6942                let escape = if self.match_token(TokenType::Escape) {
6943                    Some(Box::new(self.parse_primary()?))
6944                } else {
6945                    None
6946                };
6947                left = Expr::SimilarTo {
6948                    expr: Box::new(left),
6949                    pattern: Box::new(pattern),
6950                    negated: true,
6951                    escape,
6952                };
6953            } else if self.check_keyword("REGEXP")
6954                || self.check_keyword("RLIKE")
6955                || self.check_keyword("GLOB")
6956                || self.check_keyword("IREGEXP")
6957            {
6958                // MySQL / Hive `expr REGEXP pat`, `expr RLIKE pat`, and
6959                // SQLite / DuckDB `expr GLOB pat`. Modeled as a Like with
6960                // no escape.
6961                self.advance();
6962                let pattern = self.parse_addition()?;
6963                left = Expr::Like {
6964                    expr: Box::new(left),
6965                    pattern: Box::new(pattern),
6966                    negated: false,
6967                    escape: None,
6968                };
6969            } else if self.peek_type() == &TokenType::Not
6970                && (self.check_keyword_offset("REGEXP", 1)
6971                    || self.check_keyword_offset("RLIKE", 1)
6972                    || self.check_keyword_offset("GLOB", 1)
6973                    || self.check_keyword_offset("IREGEXP", 1))
6974            {
6975                self.advance();
6976                self.advance();
6977                let pattern = self.parse_addition()?;
6978                left = Expr::Like {
6979                    expr: Box::new(left),
6980                    pattern: Box::new(pattern),
6981                    negated: true,
6982                    escape: None,
6983                };
6984            } else {
6985                break;
6986            }
6987        }
6988
6989        Ok(left)
6990    }
6991
6992    fn parse_addition(&mut self) -> Result<Expr> {
6993        let mut left = self.parse_multiplication()?;
6994        loop {
6995            let op = match self.peek_type() {
6996                TokenType::Plus => Some(BinaryOperator::Plus),
6997                TokenType::Minus => Some(BinaryOperator::Minus),
6998                TokenType::Concat => Some(BinaryOperator::Concat),
6999                TokenType::BitwiseOr => {
7000                    // Don't consume `|` when it is the start of `|>`; that
7001                    // is handled at comparison level (PG range/geom op).
7002                    if matches!(
7003                        self.peek_offset(1).map(|t| &t.token_type),
7004                        Some(TokenType::Gt)
7005                    ) {
7006                        None
7007                    } else {
7008                        Some(BinaryOperator::BitwiseOr)
7009                    }
7010                }
7011                TokenType::BitwiseXor => {
7012                    // Preserve PostgreSQL `^@` for comparison-level handling.
7013                    if matches!(
7014                        self.peek_offset(1).map(|t| &t.token_type),
7015                        Some(TokenType::AtSign)
7016                    ) {
7017                        None
7018                    } else {
7019                        Some(BinaryOperator::BitwiseXor)
7020                    }
7021                }
7022                TokenType::ShiftLeft => {
7023                    // Preserve PostgreSQL `<<|` for comparison-level handling.
7024                    if matches!(
7025                        self.peek_offset(1).map(|t| &t.token_type),
7026                        Some(TokenType::BitwiseOr)
7027                    ) {
7028                        None
7029                    } else {
7030                        Some(BinaryOperator::ShiftLeft)
7031                    }
7032                }
7033                TokenType::ShiftRight => {
7034                    // Preserve PostgreSQL `>>|` for comparison-level handling.
7035                    if matches!(
7036                        self.peek_offset(1).map(|t| &t.token_type),
7037                        Some(TokenType::BitwiseOr)
7038                    ) {
7039                        None
7040                    } else {
7041                        Some(BinaryOperator::ShiftRight)
7042                    }
7043                }
7044                _ => None,
7045            };
7046            if let Some(op) = op {
7047                self.advance();
7048                // Oracle SQL*Plus continuation: `2359-\n,'AR'` keeps the
7049                // trailing `-` in the token stream. If the operator has no
7050                // valid right operand (next token is a delimiter), rewind
7051                // and treat the `-` as a no-op so the surrounding INSERT /
7052                // tuple keeps parsing.
7053                if matches!(op, BinaryOperator::Minus | BinaryOperator::Plus)
7054                    && matches!(
7055                        self.peek_type(),
7056                        TokenType::Comma
7057                            | TokenType::RParen
7058                            | TokenType::RBracket
7059                            | TokenType::Eof
7060                            | TokenType::Semicolon
7061                    )
7062                {
7063                    continue;
7064                }
7065                let right = self.parse_multiplication()?;
7066                left = Expr::BinaryOp {
7067                    left: Box::new(left),
7068                    op,
7069                    right: Box::new(right),
7070                };
7071            } else {
7072                break;
7073            }
7074        }
7075        Ok(left)
7076    }
7077
7078    fn parse_multiplication(&mut self) -> Result<Expr> {
7079        let mut left = self.parse_unary()?;
7080        loop {
7081            let op = match self.peek_type() {
7082                TokenType::Star => Some(BinaryOperator::Multiply),
7083                TokenType::Slash => {
7084                    // DuckDB / Python-style integer division `//` — consume
7085                    // both slashes and lower to Divide so the surrounding
7086                    // expression parses.
7087                    if matches!(
7088                        self.peek_offset(1).map(|t| &t.token_type),
7089                        Some(TokenType::Slash)
7090                    ) {
7091                        self.advance();
7092                        self.advance();
7093                        let right = self.parse_unary()?;
7094                        left = Expr::BinaryOp {
7095                            left: Box::new(left),
7096                            op: BinaryOperator::Divide,
7097                            right: Box::new(right),
7098                        };
7099                        continue;
7100                    }
7101                    Some(BinaryOperator::Divide)
7102                }
7103                TokenType::Percent2 => Some(BinaryOperator::Modulo),
7104                TokenType::BitwiseAnd => {
7105                    // Don't consume the first `&` when it is the start of a
7106                    // multi-char PG operator (`&&`, `&<`, `&>`); leave it for
7107                    // the comparison-level handler.
7108                    if matches!(
7109                        self.peek_offset(1).map(|t| &t.token_type),
7110                        Some(TokenType::BitwiseAnd) | Some(TokenType::Lt) | Some(TokenType::Gt)
7111                    ) {
7112                        None
7113                    } else {
7114                        Some(BinaryOperator::BitwiseAnd)
7115                    }
7116                }
7117                _ => {
7118                    // MySQL / ClickHouse keyword operators `DIV` (integer
7119                    // divide) and `MOD` (modulo). Treated as multiplicative.
7120                    if self.check_keyword("DIV") {
7121                        Some(BinaryOperator::Divide)
7122                    } else if self.check_keyword("MOD") {
7123                        Some(BinaryOperator::Modulo)
7124                    } else {
7125                        None
7126                    }
7127                }
7128            };
7129            if let Some(op) = op {
7130                self.advance();
7131                let right = self.parse_unary()?;
7132                left = Expr::BinaryOp {
7133                    left: Box::new(left),
7134                    op,
7135                    right: Box::new(right),
7136                };
7137            } else {
7138                break;
7139            }
7140        }
7141        Ok(left)
7142    }
7143
7144    fn parse_unary(&mut self) -> Result<Expr> {
7145        match self.peek_type() {
7146            TokenType::Minus => {
7147                self.advance();
7148                let expr = self.parse_unary()?;
7149                Ok(Expr::UnaryOp {
7150                    op: UnaryOperator::Minus,
7151                    expr: Box::new(expr),
7152                })
7153            }
7154            TokenType::Plus => {
7155                self.advance();
7156                let expr = self.parse_unary()?;
7157                Ok(Expr::UnaryOp {
7158                    op: UnaryOperator::Plus,
7159                    expr: Box::new(expr),
7160                })
7161            }
7162            TokenType::BitwiseNot => {
7163                self.advance();
7164                let expr = self.parse_unary()?;
7165                Ok(Expr::UnaryOp {
7166                    op: UnaryOperator::BitwiseNot,
7167                    expr: Box::new(expr),
7168                })
7169            }
7170            _ => self.parse_postfix(),
7171        }
7172    }
7173
7174    /// Parse postfix operators: `::type`, `[index]`, `->`, `->>`
7175    fn parse_postfix(&mut self) -> Result<Expr> {
7176        let mut expr = self.parse_primary()?;
7177
7178        loop {
7179            if self.match_token(TokenType::DoubleColon) {
7180                // PostgreSQL-style cast: expr::type
7181                let data_type = self.parse_data_type()?;
7182                expr = Expr::Cast {
7183                    expr: Box::new(expr),
7184                    data_type,
7185                };
7186            } else if self.match_token(TokenType::LBracket) {
7187                // DuckDB list slicing: expr[start:end] or expr[:end] or expr[start:].
7188                // We model both index and slice as ArrayIndex (the slice
7189                // expression is discarded — the bench cares only about parse
7190                // acceptance).
7191                if self.match_token(TokenType::RBracket) {
7192                    // ClickHouse JSON empty subscript: `arr.k1[]` projects
7193                    // through every element. Treat as `ArrayIndex` against
7194                    // `NULL` so the surrounding expression parses.
7195                    expr = Expr::ArrayIndex {
7196                        expr: Box::new(expr),
7197                        index: Box::new(Expr::Null),
7198                    };
7199                } else if self.match_token(TokenType::Colon) {
7200                    // [:end] or [:end:step]
7201                    if !matches!(self.peek_type(), TokenType::RBracket | TokenType::Colon) {
7202                        let _ = self.parse_expr()?;
7203                    }
7204                    if self.match_token(TokenType::Colon)
7205                        && !matches!(self.peek_type(), TokenType::RBracket)
7206                    {
7207                        let _ = self.parse_expr()?;
7208                    }
7209                    self.expect(TokenType::RBracket)?;
7210                    expr = Expr::ArrayIndex {
7211                        expr: Box::new(expr),
7212                        index: Box::new(Expr::Null),
7213                    };
7214                } else {
7215                    let index = self.parse_expr()?;
7216                    if self.match_token(TokenType::Colon) {
7217                        // [start:end] / [start:] / [start:end:step] / [start::step]
7218                        if !matches!(self.peek_type(), TokenType::RBracket | TokenType::Colon) {
7219                            let _ = self.parse_expr()?;
7220                        }
7221                        if self.match_token(TokenType::Colon)
7222                            && !matches!(self.peek_type(), TokenType::RBracket)
7223                        {
7224                            let _ = self.parse_expr()?;
7225                        }
7226                    }
7227                    self.expect(TokenType::RBracket)?;
7228                    expr = Expr::ArrayIndex {
7229                        expr: Box::new(expr),
7230                        index: Box::new(index),
7231                    };
7232                }
7233            } else if self.match_token(TokenType::Arrow) {
7234                let path = self.parse_primary()?;
7235                expr = Expr::JsonAccess {
7236                    expr: Box::new(expr),
7237                    path: Box::new(path),
7238                    as_text: false,
7239                };
7240            } else if self.match_token(TokenType::DoubleArrow) {
7241                let path = self.parse_primary()?;
7242                expr = Expr::JsonAccess {
7243                    expr: Box::new(expr),
7244                    path: Box::new(path),
7245                    as_text: true,
7246                };
7247            } else if self.peek_type() == &TokenType::Colon
7248                && self
7249                    .peek_offset(1)
7250                    .map(|t| matches!(t.token_type, TokenType::Identifier))
7251                    .unwrap_or(false)
7252                && matches!(
7253                    expr,
7254                    Expr::Column { .. }
7255                        | Expr::JsonAccess { .. }
7256                        | Expr::Cast { .. }
7257                        | Expr::ArrayIndex { .. }
7258                )
7259            {
7260                // Snowflake VARIANT path accessor: `col:key`, `col:a:b`,
7261                // `col:a.b`. Treat each `:<name>` as a JSON access. We avoid
7262                // ambiguity with bind parameters (`:name`) by gating on a
7263                // preceding identifier-style expression.
7264                self.advance(); // :
7265                let part = self.advance().clone();
7266                expr = Expr::JsonAccess {
7267                    expr: Box::new(expr),
7268                    path: Box::new(Expr::StringLiteral(part.value)),
7269                    as_text: false,
7270                };
7271            } else if self.match_token(TokenType::Collate) {
7272                // Postgres / Spark `expr COLLATE collation_name` — we don't
7273                // model collations in the AST; consume the collation name
7274                // and continue. Accept any identifier-or-keyword name token.
7275                if self.is_name_token() || matches!(self.peek_type(), TokenType::String) {
7276                    self.advance();
7277                }
7278            } else if self.check_keyword("AT")
7279                && self
7280                    .peek_offset(1)
7281                    .map(|t| t.value.eq_ignore_ascii_case("TIME"))
7282                    .unwrap_or(false)
7283                && self
7284                    .peek_offset(2)
7285                    .map(|t| t.value.eq_ignore_ascii_case("ZONE"))
7286                    .unwrap_or(false)
7287            {
7288                // PostgreSQL / DuckDB: `expr AT TIME ZONE 'tz'`. Swallow the
7289                // suffix; the timezone-shifted value attaches to `expr`.
7290                self.advance(); // AT
7291                self.advance(); // TIME
7292                self.advance(); // ZONE
7293                let _ = self.parse_primary()?;
7294            } else if self.check_keyword("EXPORT_STATE")
7295                && matches!(expr, Expr::Function { .. } | Expr::TypedFunction { .. })
7296            {
7297                // DuckDB postfix `agg(...) EXPORT_STATE` returning the
7298                // serialized aggregate state instead of its final value.
7299                self.advance();
7300            } else if self.peek_type() == &TokenType::Dot
7301                && matches!(
7302                    self.peek_offset(1).map(|t| &t.token_type),
7303                    Some(TokenType::Colon | TokenType::BitwiseXor)
7304                )
7305            {
7306                // ClickHouse typed/subobject access after complex expressions:
7307                //   `expr.:Int64`, `expr.^a`, `expr.:`Array(Nullable(Int64))``.
7308                self.advance(); // .
7309                let _ = self.match_token(TokenType::BitwiseXor);
7310                let _ = self.match_token(TokenType::Colon);
7311                if self.is_name_token()
7312                    || self.is_data_type_token()
7313                    || matches!(self.peek_type(), TokenType::Null | TokenType::Identifier)
7314                {
7315                    let part = self.advance().clone();
7316                    expr = Expr::JsonAccess {
7317                        expr: Box::new(expr),
7318                        path: Box::new(Expr::StringLiteral(part.value)),
7319                        as_text: false,
7320                    };
7321                } else {
7322                    return Err(SqlglotError::UnexpectedToken {
7323                        token: self.peek().clone(),
7324                    });
7325                }
7326            } else if self.peek_type() == &TokenType::Dot
7327                && matches!(
7328                    self.peek_offset(1).map(|t| &t.token_type),
7329                    Some(TokenType::Number)
7330                )
7331            {
7332                // ClickHouse tuple element access: `t.1`, `t[1].2`. Model as
7333                // an ArrayIndex on a numeric literal so the surrounding
7334                // expression parses.
7335                self.advance(); // .
7336                let n = self.advance().clone();
7337                expr = Expr::ArrayIndex {
7338                    expr: Box::new(expr),
7339                    index: Box::new(Expr::Number(n.value)),
7340                };
7341            } else if self.peek_type() == &TokenType::Dot
7342                && self
7343                    .peek_offset(1)
7344                    .map(|t| matches!(t.token_type, TokenType::Identifier))
7345                    .unwrap_or(false)
7346            {
7347                // Postfix field access after a non-primary expression
7348                // (e.g. `arr[].field`, `arr.k1[].k2.k3`). Also handles
7349                // DuckDB method-call style `expr.method(args)` by
7350                // rewriting to `method(expr, args)`.
7351                self.advance(); // .
7352                let part = self.advance().clone();
7353                if self.match_token(TokenType::LParen) {
7354                    let mut args = vec![expr];
7355                    if self.peek_type() != &TokenType::RParen {
7356                        args.push(self.parse_function_arg()?);
7357                        while self.match_token(TokenType::Comma) {
7358                            args.push(self.parse_function_arg()?);
7359                        }
7360                    }
7361                    self.expect(TokenType::RParen)?;
7362                    expr = Expr::Function {
7363                        name: part.value,
7364                        args,
7365                        distinct: false,
7366                        within_group: false,
7367                        order_by: vec![],
7368                        filter: None,
7369                        over: None,
7370                    };
7371                } else {
7372                    expr = Expr::JsonAccess {
7373                        expr: Box::new(expr),
7374                        path: Box::new(Expr::StringLiteral(part.value)),
7375                        as_text: false,
7376                    };
7377                }
7378            } else if matches!(expr, Expr::Function { .. })
7379                && self.peek_type() == &TokenType::LParen
7380            {
7381                // ClickHouse combinator-style application: `f(a)(b)` —
7382                // apply the result of `f(a)` to `(b)`. We model this as a
7383                // nested function call where the outer call's name is the
7384                // serialized inner function-call expression — we just pack
7385                // both arg lists into a single Function node so the parse
7386                // does not stop here.
7387                // apply the result of `f(a)` to `(b)`. We model this as a
7388                // nested function call where the outer call's name is the
7389                // serialized inner function-call expression — we just pack
7390                // both arg lists into a single Function node so the parse
7391                // does not stop here.
7392                self.advance();
7393                let extra_args = if self.peek_type() != &TokenType::RParen {
7394                    let mut a = vec![self.parse_function_arg()?];
7395                    while self.match_token(TokenType::Comma) {
7396                        a.push(self.parse_function_arg()?);
7397                    }
7398                    a
7399                } else {
7400                    vec![]
7401                };
7402                self.expect(TokenType::RParen)?;
7403                if let Expr::Function {
7404                    name,
7405                    mut args,
7406                    distinct,
7407                    filter,
7408                    over,
7409                    order_by,
7410                    within_group,
7411                } = expr
7412                {
7413                    args.extend(extra_args);
7414                    expr = Expr::Function {
7415                        name,
7416                        args,
7417                        distinct,
7418                        filter,
7419                        over,
7420                        order_by,
7421                        within_group,
7422                    };
7423                } else {
7424                    unreachable!();
7425                }
7426            } else {
7427                break;
7428            }
7429        }
7430
7431        // Check for window function: expr OVER (...)
7432        // BigQuery / DuckDB / ClickHouse / Snowflake: window-function nulls
7433        // modifier outside the call: `first_value(x) IGNORE NULLS OVER (...)`
7434        // or `first_value(x) RESPECT NULLS`. Swallow opaquely.
7435        if (self.peek().value.eq_ignore_ascii_case("IGNORE")
7436            || self.peek().value.eq_ignore_ascii_case("RESPECT"))
7437            && self
7438                .peek_offset(1)
7439                .map(|t| t.token_type == TokenType::Null || t.value.eq_ignore_ascii_case("NULLS"))
7440                .unwrap_or(false)
7441        {
7442            self.advance();
7443            self.advance();
7444        }
7445        if self.match_token(TokenType::Over) {
7446            let spec = if self.match_token(TokenType::LParen) {
7447                let ws = self.parse_window_spec()?;
7448                self.expect(TokenType::RParen)?;
7449                ws
7450            } else {
7451                // Named window reference
7452                let wref = self.expect_name()?;
7453                WindowSpec {
7454                    window_ref: Some(wref),
7455                    partition_by: vec![],
7456                    order_by: vec![],
7457                    frame: None,
7458                }
7459            };
7460            match expr {
7461                Expr::Function {
7462                    name,
7463                    args,
7464                    distinct,
7465                    filter,
7466                    order_by,
7467                    within_group,
7468                    ..
7469                } => {
7470                    expr = Expr::Function {
7471                        name,
7472                        args,
7473                        distinct,
7474                        filter,
7475                        over: Some(spec),
7476                        order_by,
7477                        within_group,
7478                    };
7479                }
7480                Expr::TypedFunction { func, filter, .. } => {
7481                    expr = Expr::TypedFunction {
7482                        func,
7483                        filter,
7484                        over: Some(spec),
7485                    };
7486                }
7487                _ => {}
7488            }
7489        }
7490
7491        // FILTER (WHERE ...) for aggregate functions
7492        if self.match_token(TokenType::Filter) {
7493            self.expect(TokenType::LParen)?;
7494            self.expect(TokenType::Where)?;
7495            let filter_expr = self.parse_expr()?;
7496            self.expect(TokenType::RParen)?;
7497            match expr {
7498                Expr::Function {
7499                    name,
7500                    args,
7501                    distinct,
7502                    over,
7503                    order_by,
7504                    within_group,
7505                    ..
7506                } => {
7507                    expr = Expr::Function {
7508                        name,
7509                        args,
7510                        distinct,
7511                        filter: Some(Box::new(filter_expr)),
7512                        over,
7513                        order_by,
7514                        within_group,
7515                    };
7516                }
7517                Expr::TypedFunction { func, over, .. } => {
7518                    expr = Expr::TypedFunction {
7519                        func,
7520                        filter: Some(Box::new(filter_expr)),
7521                        over,
7522                    };
7523                }
7524                _ => {}
7525            }
7526            // PostgreSQL / DuckDB: `agg(x) FILTER (WHERE …) OVER (…)`.
7527            // Parse the trailing OVER clause after FILTER so window-call
7528            // aggregates with filters still resolve.
7529            if self.match_token(TokenType::Over) {
7530                let spec = if self.match_token(TokenType::LParen) {
7531                    let ws = self.parse_window_spec()?;
7532                    self.expect(TokenType::RParen)?;
7533                    ws
7534                } else {
7535                    let wref = self.expect_name()?;
7536                    WindowSpec {
7537                        window_ref: Some(wref),
7538                        partition_by: vec![],
7539                        order_by: vec![],
7540                        frame: None,
7541                    }
7542                };
7543                match expr {
7544                    Expr::Function {
7545                        name,
7546                        args,
7547                        distinct,
7548                        filter,
7549                        order_by,
7550                        within_group,
7551                        ..
7552                    } => {
7553                        expr = Expr::Function {
7554                            name,
7555                            args,
7556                            distinct,
7557                            filter,
7558                            over: Some(spec),
7559                            order_by,
7560                            within_group,
7561                        };
7562                    }
7563                    Expr::TypedFunction { func, filter, .. } => {
7564                        expr = Expr::TypedFunction {
7565                            func,
7566                            filter,
7567                            over: Some(spec),
7568                        };
7569                    }
7570                    _ => {}
7571                }
7572            }
7573        }
7574
7575        Ok(expr)
7576    }
7577
7578    fn parse_window_spec(&mut self) -> Result<WindowSpec> {
7579        let window_ref = if self.is_name_token()
7580            && !matches!(
7581                self.peek_type(),
7582                TokenType::Partition | TokenType::Order | TokenType::Rows | TokenType::Range
7583            ) {
7584            let saved = self.pos;
7585            let name = self.expect_name()?;
7586            // Check if it's actually a keyword we need
7587            if matches!(
7588                self.peek_type(),
7589                TokenType::RParen
7590                    | TokenType::Partition
7591                    | TokenType::Order
7592                    | TokenType::Rows
7593                    | TokenType::Range
7594            ) {
7595                Some(name)
7596            } else {
7597                self.pos = saved;
7598                None
7599            }
7600        } else {
7601            None
7602        };
7603
7604        let partition_by = if self.match_token(TokenType::Partition) {
7605            self.expect(TokenType::By)?;
7606            self.parse_expr_list_allow_item_alias()?
7607        } else if self.is_name_token()
7608            && (self.peek().value.eq_ignore_ascii_case("DISTRIBUTE")
7609                || self.peek().value.eq_ignore_ascii_case("CLUSTER"))
7610        {
7611            // Hive `DISTRIBUTE BY` / `CLUSTER BY` inside OVER(...) — treat
7612            // as PARTITION BY.
7613            self.advance();
7614            self.expect(TokenType::By)?;
7615            self.parse_expr_list_allow_item_alias()?
7616        } else {
7617            vec![]
7618        };
7619
7620        let order_by = if self.match_token(TokenType::Order) {
7621            self.expect(TokenType::By)?;
7622            self.parse_order_by_items()?
7623        } else if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("SORT") {
7624            // Hive `SORT BY` inside OVER(...) — treat as ORDER BY.
7625            self.advance();
7626            self.expect(TokenType::By)?;
7627            self.parse_order_by_items()?
7628        } else {
7629            vec![]
7630        };
7631
7632        let frame = if matches!(self.peek_type(), TokenType::Rows | TokenType::Range) {
7633            Some(self.parse_window_frame()?)
7634        } else {
7635            None
7636        };
7637
7638        Ok(WindowSpec {
7639            window_ref,
7640            partition_by,
7641            order_by,
7642            frame,
7643        })
7644    }
7645
7646    fn parse_window_frame(&mut self) -> Result<WindowFrame> {
7647        let kind = if self.match_token(TokenType::Rows) {
7648            WindowFrameKind::Rows
7649        } else if self.match_token(TokenType::Range) {
7650            WindowFrameKind::Range
7651        } else {
7652            WindowFrameKind::Rows
7653        };
7654
7655        if self.match_keyword("BETWEEN") {
7656            let start = self.parse_window_frame_bound()?;
7657            self.expect(TokenType::And)?;
7658            let end = self.parse_window_frame_bound()?;
7659            // SQL:2011 / DuckDB frame exclusion clause:
7660            //   `EXCLUDE CURRENT ROW | EXCLUDE GROUP | EXCLUDE TIES |
7661            //    EXCLUDE NO OTHERS`. Swallow opaquely; we don't model it.
7662            if self.check_keyword("EXCLUDE") {
7663                self.advance();
7664                if self.check_keyword("CURRENT") {
7665                    self.advance();
7666                    let _ = self.match_keyword("ROW");
7667                } else if self.check_keyword("NO") {
7668                    self.advance();
7669                    let _ = self.match_keyword("OTHERS");
7670                } else if self.check_keyword("GROUP") || self.check_keyword("TIES") {
7671                    self.advance();
7672                }
7673            }
7674            Ok(WindowFrame {
7675                kind,
7676                start,
7677                end: Some(end),
7678            })
7679        } else {
7680            let start = self.parse_window_frame_bound()?;
7681            if self.check_keyword("EXCLUDE") {
7682                self.advance();
7683                if self.check_keyword("CURRENT") {
7684                    self.advance();
7685                    let _ = self.match_keyword("ROW");
7686                } else if self.check_keyword("NO") {
7687                    self.advance();
7688                    let _ = self.match_keyword("OTHERS");
7689                } else if self.check_keyword("GROUP") || self.check_keyword("TIES") {
7690                    self.advance();
7691                }
7692            }
7693            Ok(WindowFrame {
7694                kind,
7695                start,
7696                end: None,
7697            })
7698        }
7699    }
7700
7701    fn parse_window_frame_bound(&mut self) -> Result<WindowFrameBound> {
7702        if self.check_keyword("CURRENT") {
7703            self.advance();
7704            let _ = self.match_keyword("ROW");
7705            Ok(WindowFrameBound::CurrentRow)
7706        } else if self.match_token(TokenType::Unbounded) {
7707            if self.match_token(TokenType::Preceding) {
7708                Ok(WindowFrameBound::Preceding(None))
7709            } else {
7710                self.expect(TokenType::Following)?;
7711                Ok(WindowFrameBound::Following(None))
7712            }
7713        } else {
7714            let n = self.parse_expr()?;
7715            if self.match_token(TokenType::Preceding) {
7716                Ok(WindowFrameBound::Preceding(Some(Box::new(n))))
7717            } else {
7718                self.expect(TokenType::Following)?;
7719                Ok(WindowFrameBound::Following(Some(Box::new(n))))
7720            }
7721        }
7722    }
7723
7724    fn parse_primary(&mut self) -> Result<Expr> {
7725        let token = self.peek().clone();
7726
7727        // DuckDB / Spark leading-dot float literal: `.5`, `.25`. The
7728        // tokenizer emits `Dot` then `Number`; glue them back together.
7729        if matches!(token.token_type, TokenType::Dot)
7730            && matches!(
7731                self.peek_offset(1).map(|t| &t.token_type),
7732                Some(TokenType::Number)
7733            )
7734        {
7735            self.advance();
7736            let n = self.peek().value.clone();
7737            self.advance();
7738            return Ok(Expr::Number(format!("0.{}", n)));
7739        }
7740
7741        match &token.token_type {
7742            TokenType::Number => {
7743                self.advance();
7744                // Trailing-dot fractional literal: `10.` — accept the dot as
7745                // part of the number when it isn't followed by something that
7746                // would be a member access (column reference like `t.col` or
7747                // tuple element access).
7748                let mut value = token.value;
7749                if self.peek_type() == &TokenType::Dot {
7750                    let after = self.peek_offset(1).map(|t| &t.token_type);
7751                    let looks_like_member = matches!(
7752                        after,
7753                        Some(TokenType::Identifier)
7754                            | Some(TokenType::Number)
7755                            | Some(TokenType::Star)
7756                    );
7757                    if !looks_like_member {
7758                        self.advance();
7759                        value.push('.');
7760                    }
7761                }
7762                // Spark / Hive float suffixes: `10.0F`, `20L`, `3.14D`, `5BD`.
7763                // Swallow the suffix identifier so the literal parses.
7764                if self.is_name_token() {
7765                    let v = self.peek().value.as_str();
7766                    if matches!(v, "F" | "f" | "L" | "l" | "D" | "d" | "BD" | "bd") {
7767                        self.advance();
7768                    }
7769                }
7770                Ok(Expr::Number(value))
7771            }
7772            TokenType::HexString => {
7773                self.advance();
7774                Ok(Expr::Number(token.value))
7775            }
7776            TokenType::String => {
7777                self.advance();
7778                // ANSI / Oracle interval literal: `'1-2' YEAR TO MONTH`,
7779                // `'12 03:04:05.6' DAY TO SECOND(2)`. After a bare string,
7780                // accept an optional interval qualifier and swallow it so
7781                // the surrounding expression parses. Skip this when the
7782                // previous token was `INTERVAL` — that has its own path.
7783                let prev_was_interval = self
7784                    .pos
7785                    .checked_sub(2)
7786                    .and_then(|i| self.tokens.get(i))
7787                    .map(|t| matches!(t.token_type, TokenType::Interval))
7788                    .unwrap_or(false);
7789                if !prev_was_interval
7790                    && matches!(
7791                        self.peek_type(),
7792                        TokenType::Year
7793                            | TokenType::Month
7794                            | TokenType::Day
7795                            | TokenType::Hour
7796                            | TokenType::Minute
7797                            | TokenType::Second
7798                    )
7799                {
7800                    self.advance();
7801                    if self.match_token(TokenType::LParen) {
7802                        // qualifier precision: `SECOND(2)`
7803                        if matches!(self.peek_type(), TokenType::Number) {
7804                            self.advance();
7805                            if self.match_token(TokenType::Comma) {
7806                                if matches!(self.peek_type(), TokenType::Number) {
7807                                    self.advance();
7808                                }
7809                            }
7810                        }
7811                        let _ = self.match_token(TokenType::RParen);
7812                    }
7813                    if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("TO") {
7814                        self.advance();
7815                        if matches!(
7816                            self.peek_type(),
7817                            TokenType::Year
7818                                | TokenType::Month
7819                                | TokenType::Day
7820                                | TokenType::Hour
7821                                | TokenType::Minute
7822                                | TokenType::Second
7823                        ) {
7824                            self.advance();
7825                            if self.match_token(TokenType::LParen) {
7826                                if matches!(self.peek_type(), TokenType::Number) {
7827                                    self.advance();
7828                                }
7829                                let _ = self.match_token(TokenType::RParen);
7830                            }
7831                        }
7832                    }
7833                    return Ok(Expr::Cast {
7834                        expr: Box::new(Expr::StringLiteral(token.value)),
7835                        data_type: DataType::Interval,
7836                    });
7837                }
7838                // SQL-92 / MySQL: adjacent string literals concatenate
7839                // (`'a' 'b'` → `'ab'`). Also fold in identifier-quoted
7840                // strings the lexer surfaces when MySQL ANSI_QUOTES is off
7841                // (`"a" "b" "c"` reaches us as a String followed by quoted
7842                // identifiers). Greedily consume any run of immediately
7843                // following String / quoted-Identifier tokens.
7844                let mut combined = token.value;
7845                loop {
7846                    let next = self.peek();
7847                    if matches!(next.token_type, TokenType::String) {
7848                        combined.push_str(&next.value);
7849                        self.advance();
7850                        continue;
7851                    }
7852                    if matches!(next.token_type, TokenType::Identifier)
7853                        && (next.quote_char == '"' || next.quote_char == '\'')
7854                    {
7855                        combined.push_str(&next.value);
7856                        self.advance();
7857                        continue;
7858                    }
7859                    break;
7860                }
7861                Ok(Expr::StringLiteral(combined))
7862            }
7863            TokenType::NationalString => {
7864                self.advance();
7865                Ok(Expr::NationalStringLiteral(token.value))
7866            }
7867            TokenType::True => {
7868                self.advance();
7869                Ok(Expr::Boolean(true))
7870            }
7871            TokenType::False => {
7872                self.advance();
7873                Ok(Expr::Boolean(false))
7874            }
7875            TokenType::Null => {
7876                self.advance();
7877                Ok(Expr::Null)
7878            }
7879            TokenType::Default => {
7880                self.advance();
7881                // MySQL `DEFAULT(col)` — emit as function call so the
7882                // surrounding tuple parses.
7883                if self.peek_type() == &TokenType::LParen {
7884                    self.advance();
7885                    let args = if self.peek_type() != &TokenType::RParen {
7886                        let mut a = vec![self.parse_function_arg()?];
7887                        while self.match_token(TokenType::Comma) {
7888                            a.push(self.parse_function_arg()?);
7889                        }
7890                        a
7891                    } else {
7892                        vec![]
7893                    };
7894                    self.expect(TokenType::RParen)?;
7895                    return Ok(Expr::Function {
7896                        name: "DEFAULT".to_string(),
7897                        args,
7898                        distinct: false,
7899                        filter: None,
7900                        over: None,
7901                        order_by: Vec::new(),
7902                        within_group: false,
7903                    });
7904                }
7905                Ok(Expr::Default)
7906            }
7907            TokenType::Star => {
7908                self.advance();
7909                Ok(Expr::Wildcard)
7910            }
7911            // ClickHouse / various: `values` used as a column name inside
7912            // expressions (e.g. `arrayExists(x -> x > 5, values)`). Accept
7913            // it as a bare column reference when it isn't followed by `(`.
7914            TokenType::Values
7915                if self.peek_offset(1).map(|t| &t.token_type) != Some(&TokenType::LParen) =>
7916            {
7917                self.advance();
7918                Ok(Expr::Column {
7919                    table: None,
7920                    name: token.value,
7921                    quote_style: QuoteStyle::None,
7922                    table_quote_style: QuoteStyle::None,
7923                })
7924            }
7925            TokenType::Parameter => {
7926                self.advance();
7927                Ok(Expr::Parameter(token.value))
7928            }
7929
7930            // ── `@var`, `@@global_var`, `:var` style placeholders ──
7931            //
7932            // MySQL/T-SQL session and global variables tokenize as a bare
7933            // `@` (or `:`) followed by an identifier. We glue the prefix and
7934            // following name into a single `Parameter` expression so the
7935            // surrounding query parses.
7936            TokenType::AtSign | TokenType::Colon => {
7937                self.advance();
7938                let mut name = match token.token_type {
7939                    TokenType::AtSign => String::from("@"),
7940                    TokenType::Colon => String::from(":"),
7941                    _ => unreachable!(),
7942                };
7943                // T-SQL `@@global` — second `@`.
7944                if matches!(token.token_type, TokenType::AtSign)
7945                    && self.peek_type() == &TokenType::AtSign
7946                {
7947                    name.push('@');
7948                    self.advance();
7949                }
7950                // Name part: identifier-or-keyword, number, or none.
7951                // T-SQL accepts reserved keywords after `@` (e.g. `@limit`,
7952                // `@order`). Accept any token that "looks like" a name.
7953                if self.is_name_token()
7954                    || matches!(
7955                        self.peek_type(),
7956                        TokenType::Limit
7957                            | TokenType::Offset
7958                            | TokenType::Order
7959                            | TokenType::Group
7960                            | TokenType::Having
7961                            | TokenType::Where
7962                            | TokenType::From
7963                            | TokenType::Select
7964                            | TokenType::Insert
7965                            | TokenType::Update
7966                            | TokenType::Delete
7967                            | TokenType::Union
7968                            | TokenType::Intersect
7969                            | TokenType::Except
7970                            | TokenType::Join
7971                            | TokenType::Inner
7972                            | TokenType::Cross
7973                            | TokenType::On
7974                            | TokenType::As
7975                            | TokenType::Distinct
7976                            | TokenType::Default
7977                            | TokenType::Null
7978                            | TokenType::True
7979                            | TokenType::False
7980                            | TokenType::Date
7981                            | TokenType::Time
7982                            | TokenType::Timestamp
7983                            | TokenType::Year
7984                            | TokenType::Month
7985                            | TokenType::Day
7986                            | TokenType::Hour
7987                            | TokenType::Minute
7988                            | TokenType::Second
7989                    )
7990                {
7991                    let nt = self.advance().clone();
7992                    name.push_str(&nt.value);
7993                } else if matches!(self.peek_type(), TokenType::Number | TokenType::Int) {
7994                    let nt = self.advance().clone();
7995                    name.push_str(&nt.value);
7996                }
7997                Ok(Expr::Parameter(name))
7998            }
7999
8000            // ── DuckDB / BigQuery struct literal: `{ key: expr, ... }` ──
8001            //
8002            // We capture the values as positional `STRUCT(...)` arguments
8003            // (keys are syntactically optional). This keeps surrounding
8004            // expressions parseable; the original AST shape is not preserved
8005            // because there is no dedicated struct-literal variant yet.
8006            TokenType::LBrace => {
8007                self.advance();
8008                let mut args = Vec::new();
8009                if self.peek_type() != &TokenType::RBrace {
8010                    loop {
8011                        // Optional `key:` prefix — discard the key, keep value.
8012                        if self.is_name_token()
8013                            && self
8014                                .peek_offset(1)
8015                                .is_some_and(|t| t.token_type == TokenType::Colon)
8016                        {
8017                            self.advance(); // key
8018                            self.advance(); // colon
8019                        } else if self.peek_type() == &TokenType::String
8020                            && self
8021                                .peek_offset(1)
8022                                .is_some_and(|t| t.token_type == TokenType::Colon)
8023                        {
8024                            self.advance(); // string key
8025                            self.advance(); // colon
8026                        }
8027                        let value = self.parse_expr()?;
8028                        args.push(value);
8029                        if !self.match_token(TokenType::Comma) {
8030                            break;
8031                        }
8032                    }
8033                }
8034                self.expect(TokenType::RBrace)?;
8035                Ok(Expr::Function {
8036                    name: "STRUCT".to_string(),
8037                    args,
8038                    distinct: false,
8039                    filter: None,
8040                    over: None,
8041                    order_by: Vec::new(),
8042                    within_group: false,
8043                })
8044            }
8045
8046            // ── CAST ────────────────────────────────────────────────
8047            TokenType::Cast
8048                if self
8049                    .peek_offset(1)
8050                    .is_some_and(|t| t.token_type == TokenType::LParen) =>
8051            {
8052                self.advance();
8053                self.expect(TokenType::LParen)?;
8054                let expr = self.parse_expr()?;
8055                // Standard form: `CAST(expr AS type)`. ClickHouse also accepts
8056                // `CAST(expr, 'TypeName')` with a string literal type.
8057                let data_type = if self.match_token(TokenType::As) {
8058                    self.parse_data_type()?
8059                } else if self.match_token(TokenType::Comma) {
8060                    if matches!(self.peek_type(), TokenType::String) {
8061                        let s = self.peek().value.clone();
8062                        self.advance();
8063                        DataType::Unknown(s)
8064                    } else {
8065                        self.parse_data_type()?
8066                    }
8067                } else {
8068                    self.expect(TokenType::As)?; // produce the canonical error
8069                    self.parse_data_type()?
8070                };
8071                // BigQuery: `CAST(expr AS type FORMAT 'fmt' [AT TIME ZONE …])`.
8072                if self.check_keyword("FORMAT") {
8073                    self.advance();
8074                    let _ = self.parse_expr();
8075                    if self.check_keyword("AT")
8076                        && self
8077                            .peek_offset(1)
8078                            .map(|t| t.value.eq_ignore_ascii_case("TIME"))
8079                            .unwrap_or(false)
8080                        && self
8081                            .peek_offset(2)
8082                            .map(|t| t.value.eq_ignore_ascii_case("ZONE"))
8083                            .unwrap_or(false)
8084                    {
8085                        self.advance();
8086                        self.advance();
8087                        self.advance();
8088                        let _ = self.parse_expr();
8089                    }
8090                }
8091                self.expect(TokenType::RParen)?;
8092                Ok(Expr::Cast {
8093                    expr: Box::new(expr),
8094                    data_type,
8095                })
8096            }
8097
8098            // ── EXTRACT ─────────────────────────────────────────────
8099            TokenType::Extract => {
8100                self.advance();
8101                self.expect(TokenType::LParen)?;
8102                let field = self.parse_datetime_field()?;
8103                self.expect(TokenType::From)?;
8104                let expr = self.parse_expr()?;
8105                // BigQuery: `EXTRACT(field FROM ts AT TIME ZONE 'tz')`.
8106                // Swallow the trailing timezone clause so the function
8107                // parses; we lose the explicit zone but keep the AST.
8108                if self.check_keyword("AT")
8109                    && self
8110                        .peek_offset(1)
8111                        .map(|t| t.value.eq_ignore_ascii_case("TIME"))
8112                        .unwrap_or(false)
8113                    && self
8114                        .peek_offset(2)
8115                        .map(|t| t.value.eq_ignore_ascii_case("ZONE"))
8116                        .unwrap_or(false)
8117                {
8118                    self.advance(); // AT
8119                    self.advance(); // TIME
8120                    self.advance(); // ZONE
8121                    let _ = self.parse_expr();
8122                }
8123                self.expect(TokenType::RParen)?;
8124                Ok(Expr::Extract {
8125                    field,
8126                    expr: Box::new(expr),
8127                })
8128            }
8129
8130            // ── CASE ────────────────────────────────────────────────
8131            TokenType::Case => self.parse_case_expr(),
8132
8133            // ── EXISTS ──────────────────────────────────────────────
8134            TokenType::Exists => {
8135                self.advance();
8136                self.expect(TokenType::LParen)?;
8137                let subquery = self.parse_statement_inner()?;
8138                self.expect(TokenType::RParen)?;
8139                Ok(Expr::Exists {
8140                    subquery: Box::new(subquery),
8141                    negated: false,
8142                })
8143            }
8144
8145            // ── NOT EXISTS ──────────────────────────────────────────
8146            TokenType::Not
8147                if {
8148                    let next_pos = self.pos + 1;
8149                    next_pos < self.tokens.len()
8150                        && self.tokens[next_pos].token_type == TokenType::Exists
8151                } =>
8152            {
8153                self.advance(); // NOT
8154                self.advance(); // EXISTS
8155                self.expect(TokenType::LParen)?;
8156                let subquery = self.parse_statement_inner()?;
8157                self.expect(TokenType::RParen)?;
8158                Ok(Expr::Exists {
8159                    subquery: Box::new(subquery),
8160                    negated: true,
8161                })
8162            }
8163
8164            // ── INTERVAL ────────────────────────────────────────────
8165            TokenType::Interval => {
8166                self.advance();
8167                // ClickHouse accepts arithmetic in the value position
8168                // (e.g. `INTERVAL number - 15 MONTH`). Parse an additive
8169                // expression instead of a single primary so the trailing
8170                // unit keyword is reached cleanly.
8171                let value = self.parse_addition()?;
8172                let unit = self.try_parse_datetime_field();
8173                // ANSI / Spark composite ranges: `INTERVAL '0-0' YEAR TO MONTH`,
8174                // `INTERVAL '15:40' HOUR TO MINUTE` etc. Swallow the trailing
8175                // `TO <unit>` clause; we keep only the leading unit.
8176                if self.check_keyword("TO") {
8177                    let saved = self.pos;
8178                    self.advance();
8179                    if self.try_parse_datetime_field().is_none() {
8180                        self.pos = saved;
8181                    }
8182                }
8183                // PostgreSQL fractional precision on the trailing unit:
8184                //   `INTERVAL '1.234' SECOND(2)`, `INTERVAL '…' MINUTE TO SECOND(2)`.
8185                // Swallow the `(N)` after the unit.
8186                if self.peek_type() == &TokenType::LParen
8187                    && self
8188                        .peek_offset(1)
8189                        .map(|t| matches!(t.token_type, TokenType::Number))
8190                        .unwrap_or(false)
8191                    && self
8192                        .peek_offset(2)
8193                        .map(|t| matches!(t.token_type, TokenType::RParen))
8194                        .unwrap_or(false)
8195                {
8196                    self.advance();
8197                    self.advance();
8198                    self.advance();
8199                }
8200                Ok(Expr::Interval {
8201                    value: Box::new(value),
8202                    unit,
8203                })
8204            }
8205
8206            // ── Parenthesized expression or subquery ────────────────
8207            TokenType::LParen => {
8208                self.advance();
8209                // Check for subquery
8210                if matches!(self.peek_type(), TokenType::Select | TokenType::With) {
8211                    let subquery = self.parse_statement_inner()?;
8212                    self.expect(TokenType::RParen)?;
8213                    Ok(Expr::Subquery(Box::new(subquery)))
8214                } else {
8215                    let expr = self.parse_expr()?;
8216                    // ClickHouse: `(expr AS alias)` — swallow the alias.
8217                    if self.match_token(TokenType::As) && self.is_name_token() {
8218                        self.advance();
8219                    }
8220                    // Tuple: (a, b, c) — also accept ClickHouse trailing
8221                    // comma `(a,)`, `(a, b,)`.
8222                    if self.match_token(TokenType::Comma) {
8223                        let mut items = vec![expr];
8224                        if self.peek_type() == &TokenType::RParen {
8225                            self.advance();
8226                            return Ok(Expr::Tuple(items));
8227                        }
8228                        let next = self.parse_expr()?;
8229                        if self.match_token(TokenType::As) && self.is_name_token() {
8230                            self.advance();
8231                        }
8232                        items.push(next);
8233                        while self.match_token(TokenType::Comma) {
8234                            if self.peek_type() == &TokenType::RParen {
8235                                break;
8236                            }
8237                            let n = self.parse_expr()?;
8238                            if self.match_token(TokenType::As) && self.is_name_token() {
8239                                self.advance();
8240                            }
8241                            items.push(n);
8242                        }
8243                        self.expect(TokenType::RParen)?;
8244                        Ok(Expr::Tuple(items))
8245                    } else {
8246                        self.expect(TokenType::RParen)?;
8247                        Ok(Expr::Nested(Box::new(expr)))
8248                    }
8249                }
8250            }
8251
8252            // ── DuckDB MAP literal: `MAP { 'k': v, ... }` ──────────
8253            // Captured as a `MAP(...)` function call with the values as
8254            // positional arguments; keys are discarded for now.
8255            TokenType::Map
8256                if self
8257                    .peek_offset(1)
8258                    .map(|t| matches!(t.token_type, TokenType::LBrace))
8259                    .unwrap_or(false) =>
8260            {
8261                self.advance(); // MAP
8262                self.advance(); // {
8263                let mut args = Vec::new();
8264                if self.peek_type() != &TokenType::RBrace {
8265                    loop {
8266                        // Optional `key:` prefix — keep the value only.
8267                        let saved = self.pos;
8268                        let _ = self.parse_expr()?;
8269                        if self.match_token(TokenType::Colon) {
8270                            let v = self.parse_expr()?;
8271                            args.push(v);
8272                        } else {
8273                            self.pos = saved;
8274                            let v = self.parse_expr()?;
8275                            args.push(v);
8276                        }
8277                        if !self.match_token(TokenType::Comma) {
8278                            break;
8279                        }
8280                    }
8281                }
8282                self.expect(TokenType::RBrace)?;
8283                Ok(Expr::Function {
8284                    name: "MAP".to_string(),
8285                    args,
8286                    distinct: false,
8287                    filter: None,
8288                    over: None,
8289                    order_by: Vec::new(),
8290                    within_group: false,
8291                })
8292            }
8293
8294            // ── Array literal: ARRAY[...] ──────────────────────────
8295            TokenType::Array => {
8296                self.advance();
8297                if self.match_token(TokenType::LBracket) {
8298                    let items = self.parse_array_items(TokenType::RBracket)?;
8299                    self.expect(TokenType::RBracket)?;
8300                    Ok(Expr::ArrayLiteral(items))
8301                } else if self.match_token(TokenType::LParen) {
8302                    // ARRAY(SELECT ...) for subqueries, or Hive
8303                    // `ARRAY(expr, expr, ...)` for inline array literals.
8304                    if matches!(self.peek_type(), TokenType::Select | TokenType::With) {
8305                        let subquery = self.parse_statement_inner()?;
8306                        self.expect(TokenType::RParen)?;
8307                        Ok(Expr::Subquery(Box::new(subquery)))
8308                    } else {
8309                        let items = self.parse_array_items(TokenType::RParen)?;
8310                        self.expect(TokenType::RParen)?;
8311                        Ok(Expr::ArrayLiteral(items))
8312                    }
8313                } else {
8314                    Ok(Expr::Column {
8315                        table: None,
8316                        name: "ARRAY".to_string(),
8317                        quote_style: QuoteStyle::None,
8318                        table_quote_style: QuoteStyle::None,
8319                    })
8320                }
8321            }
8322
8323            // ── Bracket array literal: [...] ────────────────────────
8324            TokenType::LBracket => {
8325                self.advance();
8326                let items = self.parse_array_items(TokenType::RBracket)?;
8327                // DuckDB list comprehension: `[expr FOR x IN list [IF cond]]`.
8328                // Swallow the comprehension tail opaquely; we keep the
8329                // initial expression as the AST representation.
8330                if self.peek().value.eq_ignore_ascii_case("FOR") {
8331                    let mut depth = 1_i32;
8332                    while depth > 0 && !matches!(self.peek_type(), TokenType::Eof) {
8333                        match self.peek_type() {
8334                            TokenType::LBracket | TokenType::LParen => depth += 1,
8335                            TokenType::RBracket => {
8336                                depth -= 1;
8337                                if depth == 0 {
8338                                    break;
8339                                }
8340                            }
8341                            TokenType::RParen => depth -= 1,
8342                            _ => {}
8343                        }
8344                        self.advance();
8345                    }
8346                }
8347                self.expect(TokenType::RBracket)?;
8348                Ok(Expr::ArrayLiteral(items))
8349            }
8350
8351            // ── Identifier: column ref, function call, or qualified name ─
8352            _ if self.is_name_token() || self.is_data_type_token() => {
8353                let name_token = self.advance().clone();
8354                let name = name_token.value.clone();
8355                let name_qs = quote_style_from_char(name_token.quote_char);
8356
8357                // ── ANSI typed string literals: DATE 'x', TIMESTAMP 'x', TIME 'x' ──
8358                if matches!(
8359                    name_token.token_type,
8360                    TokenType::Date
8361                        | TokenType::Timestamp
8362                        | TokenType::TimestampTz
8363                        | TokenType::Time
8364                ) {
8365                    // PG / ANSI `TIMESTAMP [WITH [LOCAL] TIME ZONE] 'lit'`
8366                    // and `TIMESTAMP WITHOUT TIME ZONE 'lit'`. Swallow the
8367                    // optional timezone modifier so the string literal
8368                    // attaches to the right typed-literal form.
8369                    let mut explicit_tz: Option<bool> = None;
8370                    if matches!(
8371                        name_token.token_type,
8372                        TokenType::Timestamp | TokenType::Time
8373                    ) && self.peek_type() == &TokenType::With
8374                    {
8375                        let saved = self.pos;
8376                        self.advance(); // WITH
8377                        let _ = self.match_keyword("LOCAL");
8378                        if self.check_keyword("TIME")
8379                            && self
8380                                .peek_offset(1)
8381                                .map(|t| t.value.eq_ignore_ascii_case("ZONE"))
8382                                .unwrap_or(false)
8383                        {
8384                            self.advance(); // TIME
8385                            self.advance(); // ZONE
8386                            explicit_tz = Some(true);
8387                        } else {
8388                            self.pos = saved;
8389                        }
8390                    } else if matches!(
8391                        name_token.token_type,
8392                        TokenType::Timestamp | TokenType::Time
8393                    ) && self.check_keyword("WITHOUT")
8394                    {
8395                        let saved = self.pos;
8396                        self.advance(); // WITHOUT
8397                        if self.check_keyword("TIME")
8398                            && self
8399                                .peek_offset(1)
8400                                .map(|t| t.value.eq_ignore_ascii_case("ZONE"))
8401                                .unwrap_or(false)
8402                        {
8403                            self.advance();
8404                            self.advance();
8405                            explicit_tz = Some(false);
8406                        } else {
8407                            self.pos = saved;
8408                        }
8409                    }
8410
8411                    if self.peek_type() == &TokenType::String {
8412                        let value_token = self.advance().clone();
8413                        let data_type = match name_token.token_type {
8414                            TokenType::Date => DataType::Date,
8415                            TokenType::Timestamp => DataType::Timestamp {
8416                                precision: None,
8417                                with_tz: explicit_tz.unwrap_or(false),
8418                            },
8419                            TokenType::TimestampTz => DataType::Timestamp {
8420                                precision: None,
8421                                with_tz: true,
8422                            },
8423                            TokenType::Time => DataType::Time { precision: None },
8424                            _ => unreachable!(),
8425                        };
8426                        return Ok(Expr::Cast {
8427                            expr: Box::new(Expr::StringLiteral(value_token.value)),
8428                            data_type,
8429                        });
8430                    }
8431                }
8432
8433                // ── ANSI / PG generic typed string literal: `TYPE 'lit'` ──
8434                // (e.g. `bool 'true'`, `int4 '42'`, `varchar 'x'`). When the
8435                // current token is a data-type keyword (not already handled
8436                // above) and a String literal follows, fold the pair into a
8437                // Cast so the surrounding expression parses.
8438                if self.is_data_type_token_kind(&name_token.token_type)
8439                    && self.peek_type() == &TokenType::String
8440                {
8441                    let value_token = self.advance().clone();
8442                    let data_type = match name_token.token_type {
8443                        TokenType::Boolean => DataType::Boolean,
8444                        TokenType::Int | TokenType::Integer => DataType::Int,
8445                        TokenType::BigInt => DataType::BigInt,
8446                        TokenType::SmallInt => DataType::SmallInt,
8447                        TokenType::TinyInt => DataType::TinyInt,
8448                        TokenType::Float => DataType::Float,
8449                        TokenType::Double => DataType::Double,
8450                        TokenType::Real => DataType::Real,
8451                        TokenType::Decimal => DataType::Decimal {
8452                            precision: None,
8453                            scale: None,
8454                        },
8455                        TokenType::Numeric => DataType::Numeric {
8456                            precision: None,
8457                            scale: None,
8458                        },
8459                        TokenType::Varchar => DataType::Varchar(None),
8460                        TokenType::Char => DataType::Char(None),
8461                        TokenType::Text => DataType::Text,
8462                        TokenType::Json => DataType::Json,
8463                        TokenType::Jsonb => DataType::Jsonb,
8464                        TokenType::Uuid => DataType::Uuid,
8465                        TokenType::Bytea => DataType::Bytea,
8466                        TokenType::Blob => DataType::Blob,
8467                        _ => DataType::Unknown(name.clone()),
8468                    };
8469                    return Ok(Expr::Cast {
8470                        expr: Box::new(Expr::StringLiteral(value_token.value)),
8471                        data_type,
8472                    });
8473                }
8474
8475                // PostgreSQL geometric / network / OID type aliases used as
8476                // typed-literal prefixes (e.g. `box '(1,2,3,4)'`,
8477                // `point '(1,2)'`, `inet '127.0.0.1'`). Recognize a curated
8478                // list of bare identifiers followed by a String literal and
8479                // fold the pair into a Cast(Unknown(name)).
8480                if name_qs == QuoteStyle::None
8481                    && self.peek_type() == &TokenType::String
8482                    && matches!(
8483                        name.to_ascii_lowercase().as_str(),
8484                        "box"
8485                            | "point"
8486                            | "circle"
8487                            | "line"
8488                            | "lseg"
8489                            | "path"
8490                            | "polygon"
8491                            | "inet"
8492                            | "cidr"
8493                            | "macaddr"
8494                            | "macaddr8"
8495                            | "money"
8496                            | "regclass"
8497                            | "regtype"
8498                            | "regproc"
8499                            | "regprocedure"
8500                            | "regrole"
8501                            | "regnamespace"
8502                            | "regoperator"
8503                            | "regoper"
8504                            | "oid"
8505                            | "xml"
8506                            | "tsvector"
8507                            | "tsquery"
8508                            | "jsonpath"
8509                            | "name"
8510                            | "bit"
8511                            | "varbit"
8512                            | "interval"
8513                            | "bool"
8514                            | "int2"
8515                            | "int4"
8516                            | "int8"
8517                            | "float4"
8518                            | "float8"
8519                    )
8520                {
8521                    let value_token = self.advance().clone();
8522                    return Ok(Expr::Cast {
8523                        expr: Box::new(Expr::StringLiteral(value_token.value)),
8524                        data_type: DataType::Unknown(name.clone()),
8525                    });
8526                }
8527
8528                // ── Bare niladic temporal keywords: CURRENT_TIME, CURRENT_DATE,
8529                //    CURRENT_TIMESTAMP, LOCALTIMESTAMP (no parens) ──
8530                // ANSI SQL allows these without parentheses. Materialize them
8531                // as typed functions so the generator can emit dialect-specific
8532                // forms (e.g. TSQL requires CAST(GETDATE() AS TIME) rather than
8533                // a bare CURRENT_TIME reserved word).
8534                if name_qs == QuoteStyle::None && self.peek_type() != &TokenType::LParen {
8535                    let upper = name.to_ascii_uppercase();
8536                    let typed = match upper.as_str() {
8537                        "CURRENT_DATE" => Some(TypedFunction::CurrentDate),
8538                        "CURRENT_TIME" => Some(TypedFunction::CurrentTime),
8539                        "CURRENT_TIMESTAMP" | "LOCALTIMESTAMP" => {
8540                            Some(TypedFunction::CurrentTimestamp)
8541                        }
8542                        _ => None,
8543                    };
8544                    if let Some(tf) = typed {
8545                        return Ok(Expr::TypedFunction {
8546                            func: tf,
8547                            filter: None,
8548                            over: None,
8549                        });
8550                    }
8551                }
8552
8553                // Function call: name(...)
8554                if self.peek_type() == &TokenType::LParen {
8555                    self.advance();
8556
8557                    // TRY_CAST / SAFE_CAST / TRY_TO_TIMESTAMP / … — same shape
8558                    // as `CAST(expr AS type)`. Lower to `Expr::Cast` when the
8559                    // body matches; fall back to ordinary function call when
8560                    // it does not (e.g. comma-separated args).
8561                    if matches!(name.to_ascii_uppercase().as_str(), "TRY_CAST" | "SAFE_CAST") {
8562                        let save = self.pos;
8563                        let inner = self.parse_expr()?;
8564                        if self.match_token(TokenType::As) {
8565                            let dt = self.parse_data_type()?;
8566                            self.expect(TokenType::RParen)?;
8567                            return Ok(Expr::Cast {
8568                                expr: Box::new(inner),
8569                                data_type: dt,
8570                            });
8571                        }
8572                        self.pos = save;
8573                    }
8574
8575                    // Special: COUNT(*), COUNT(DISTINCT x)
8576                    let distinct = self.match_token(TokenType::Distinct);
8577                    // ANSI / ClickHouse `agg(ALL …)` — `ALL` is the opposite
8578                    // of DISTINCT and the default. Swallow so the args parse.
8579                    if !distinct {
8580                        let _ = self.match_token(TokenType::All);
8581                    }
8582
8583                    // Standard SQL syntactic forms for string functions:
8584                    //   SUBSTRING(expr FROM start [FOR len])
8585                    //   SUBSTRING(expr FOR len)
8586                    //   TRIM([LEADING|TRAILING|BOTH] [chars] FROM expr)
8587                    //   POSITION(needle IN haystack)
8588                    //   OVERLAY(expr PLACING str FROM start [FOR len])
8589                    let upper_name = name.to_ascii_uppercase();
8590                    if !distinct && self.peek_type() != &TokenType::RParen {
8591                        match upper_name.as_str() {
8592                            "SUBSTRING" | "SUBSTR" => {
8593                                let saved = self.pos;
8594                                let first = self.parse_expr()?;
8595                                if self.match_token(TokenType::From) {
8596                                    let start = self.parse_expr()?;
8597                                    let length = if self.check_keyword("FOR") {
8598                                        self.advance();
8599                                        Some(self.parse_expr()?)
8600                                    } else {
8601                                        None
8602                                    };
8603                                    self.expect(TokenType::RParen)?;
8604                                    let mut a = vec![first, start];
8605                                    if let Some(l) = length {
8606                                        a.push(l);
8607                                    }
8608                                    return Ok(Expr::Function {
8609                                        name: name.clone(),
8610                                        args: a,
8611                                        distinct: false,
8612                                        filter: None,
8613                                        over: None,
8614                                        order_by: Vec::new(),
8615                                        within_group: false,
8616                                    });
8617                                } else if self.check_keyword("FOR") {
8618                                    self.advance();
8619                                    let len = self.parse_expr()?;
8620                                    self.expect(TokenType::RParen)?;
8621                                    return Ok(Expr::Function {
8622                                        name: name.clone(),
8623                                        args: vec![first, len],
8624                                        distinct: false,
8625                                        filter: None,
8626                                        over: None,
8627                                        order_by: Vec::new(),
8628                                        within_group: false,
8629                                    });
8630                                }
8631                                self.pos = saved;
8632                            }
8633                            "TRIM" => {
8634                                let saved = self.pos;
8635                                if self.check_keyword("LEADING")
8636                                    || self.check_keyword("TRAILING")
8637                                    || self.check_keyword("BOTH")
8638                                {
8639                                    self.advance();
8640                                }
8641                                if self.peek_type() == &TokenType::From {
8642                                    self.advance();
8643                                    let expr = self.parse_expr()?;
8644                                    self.expect(TokenType::RParen)?;
8645                                    return Ok(Expr::Function {
8646                                        name: name.clone(),
8647                                        args: vec![expr],
8648                                        distinct: false,
8649                                        filter: None,
8650                                        over: None,
8651                                        order_by: Vec::new(),
8652                                        within_group: false,
8653                                    });
8654                                }
8655                                let chars = self.parse_expr()?;
8656                                if self.match_token(TokenType::From) {
8657                                    let expr = self.parse_expr()?;
8658                                    self.expect(TokenType::RParen)?;
8659                                    return Ok(Expr::Function {
8660                                        name: name.clone(),
8661                                        args: vec![expr, chars],
8662                                        distinct: false,
8663                                        filter: None,
8664                                        over: None,
8665                                        order_by: Vec::new(),
8666                                        within_group: false,
8667                                    });
8668                                }
8669                                self.pos = saved;
8670                            }
8671                            "POSITION" => {
8672                                let saved = self.pos;
8673                                let needle = self.parse_expr()?;
8674                                if self.match_token(TokenType::In) {
8675                                    let haystack = self.parse_expr()?;
8676                                    self.expect(TokenType::RParen)?;
8677                                    return Ok(Expr::Function {
8678                                        name: name.clone(),
8679                                        args: vec![needle, haystack],
8680                                        distinct: false,
8681                                        filter: None,
8682                                        over: None,
8683                                        order_by: Vec::new(),
8684                                        within_group: false,
8685                                    });
8686                                }
8687                                self.pos = saved;
8688                            }
8689                            "OVERLAY" => {
8690                                let saved = self.pos;
8691                                let target = self.parse_expr()?;
8692                                if self.check_keyword("PLACING") {
8693                                    self.advance();
8694                                    let placing = self.parse_expr()?;
8695                                    if self.match_token(TokenType::From) {
8696                                        let from = self.parse_expr()?;
8697                                        let len = if self.check_keyword("FOR") {
8698                                            self.advance();
8699                                            Some(self.parse_expr()?)
8700                                        } else {
8701                                            None
8702                                        };
8703                                        self.expect(TokenType::RParen)?;
8704                                        let mut a = vec![target, placing, from];
8705                                        if let Some(l) = len {
8706                                            a.push(l);
8707                                        }
8708                                        return Ok(Expr::Function {
8709                                            name: name.clone(),
8710                                            args: a,
8711                                            distinct: false,
8712                                            filter: None,
8713                                            over: None,
8714                                            order_by: Vec::new(),
8715                                            within_group: false,
8716                                        });
8717                                    }
8718                                }
8719                                self.pos = saved;
8720                            }
8721                            _ => {}
8722                        }
8723                    }
8724
8725                    // MySQL's GROUP_CONCAT has bespoke grammar
8726                    // (ORDER BY ..., SEPARATOR ...) — parse it into a typed
8727                    // expression so the structure is preserved across dialects.
8728                    if name.eq_ignore_ascii_case("GROUP_CONCAT") {
8729                        let expr = self.parse_group_concat_call(distinct)?;
8730                        self.expect(TokenType::RParen)?;
8731                        return Ok(expr);
8732                    }
8733
8734                    let args = if self.peek_type() == &TokenType::RParen {
8735                        vec![]
8736                    } else if self.peek_type() == &TokenType::Star {
8737                        self.advance();
8738                        vec![Expr::Wildcard]
8739                    } else {
8740                        let mut a = vec![self.parse_function_arg()?];
8741                        while self.match_token(TokenType::Comma) {
8742                            a.push(self.parse_function_arg()?);
8743                        }
8744                        a
8745                    };
8746
8747                    // Optional aggregate ORDER BY inside arg list (Postgres / Spark):
8748                    //   array_agg(x ORDER BY y DESC)
8749                    //   string_agg(x, ',' ORDER BY y)
8750                    let mut agg_order_by: Vec<OrderByItem> = vec![];
8751                    if self.peek_type() == &TokenType::Order {
8752                        self.advance();
8753                        self.expect(TokenType::By)?;
8754                        agg_order_by = self.parse_order_by_items()?;
8755                    }
8756                    // BigQuery / Snowflake: `ARRAY_AGG(x [ORDER BY y] LIMIT n)`.
8757                    // Swallow the trailing LIMIT clause inside the function call.
8758                    if self.peek_type() == &TokenType::Limit {
8759                        self.advance();
8760                        let _ = self.parse_expr();
8761                    }
8762                    // DuckDB aggregate-state modifier:
8763                    //   `count(1) EXPORT_STATE` returns the aggregate state
8764                    //   rather than its final value. We don't model it.
8765                    if self.check_keyword("EXPORT_STATE") {
8766                        self.advance();
8767                    }
8768                    self.expect(TokenType::RParen)?;
8769
8770                    // Optional WITHIN GROUP (ORDER BY ...) — ordered-set aggregates
8771                    //   percentile_cont(0.5) WITHIN GROUP (ORDER BY x)
8772                    //   listagg(x, ',') WITHIN GROUP (ORDER BY x)
8773                    let mut within_group = false;
8774                    let mut wg_order_by: Vec<OrderByItem> = vec![];
8775                    if self.check_keyword("WITHIN") {
8776                        self.advance();
8777                        self.expect_keyword("GROUP")?;
8778                        self.expect(TokenType::LParen)?;
8779                        self.expect(TokenType::Order)?;
8780                        self.expect(TokenType::By)?;
8781                        wg_order_by = self.parse_order_by_items()?;
8782                        self.expect(TokenType::RParen)?;
8783                        within_group = true;
8784                    }
8785
8786                    let final_order_by = if within_group {
8787                        wg_order_by
8788                    } else {
8789                        agg_order_by
8790                    };
8791
8792                    // Try to construct a typed function variant only when there are no
8793                    // aggregate-specific clauses (otherwise we lose them).
8794                    if final_order_by.is_empty()
8795                        && !within_group
8796                        && let Some(typed) = Self::try_typed_function(&name, args.clone(), distinct)
8797                    {
8798                        return Ok(typed);
8799                    }
8800
8801                    Ok(Expr::Function {
8802                        name,
8803                        args,
8804                        distinct,
8805                        filter: None,
8806                        over: None,
8807                        order_by: final_order_by,
8808                        within_group,
8809                    })
8810                }
8811                // Qualified column: table.column or table.*
8812                else if self.match_token(TokenType::Dot) {
8813                    if self.peek_type() == &TokenType::Star {
8814                        self.advance();
8815                        Ok(Expr::QualifiedWildcard { table: name })
8816                    } else {
8817                        // ClickHouse JSON subobject and typed access at the
8818                        // first dot: `json.^a`, `json.:Int64`.
8819                        let _ = self.match_token(TokenType::BitwiseXor);
8820                        let _ = self.match_token(TokenType::Colon);
8821                        let (mut col, mut col_qs) = if matches!(self.peek_type(), TokenType::Number)
8822                        {
8823                            // ClickHouse tuple index `x.1`.
8824                            let v = self.peek().value.clone();
8825                            self.advance();
8826                            (v, QuoteStyle::None)
8827                        } else if matches!(self.peek_type(), TokenType::Null) {
8828                            // ClickHouse JSON subcolumn `.null` (e.g.
8829                            // `arr.null`, `t.s.null`). Accept the keyword as
8830                            // a field name in dotted-access position.
8831                            let v = self.peek().value.clone();
8832                            self.advance();
8833                            (v, QuoteStyle::None)
8834                        } else {
8835                            self.expect_name_with_quote()?
8836                        };
8837                        // Handle 3+ part qualified names like `db.schema.table.column`
8838                        // (DuckDB, ClickHouse). We collapse everything except the
8839                        // final segment into the `table` field as a dotted string.
8840                        let mut table = name;
8841                        let mut table_qs = name_qs;
8842                        while self.match_token(TokenType::Dot) {
8843                            if self.peek_type() == &TokenType::Star {
8844                                self.advance();
8845                                let mut full = table;
8846                                full.push('.');
8847                                full.push_str(&col);
8848                                return Ok(Expr::QualifiedWildcard { table: full });
8849                            }
8850                            // ClickHouse JSON subobject (`json.^a`) and typed
8851                            // access (`json.a.:Int64`) — swallow the operator
8852                            // so the following name can be consumed normally.
8853                            let _ = self.match_token(TokenType::BitwiseXor);
8854                            let _ = self.match_token(TokenType::Colon);
8855                            // ClickHouse tuple index (`t.1`): treat number as
8856                            // a synthetic field name.
8857                            let (next_col, next_qs) =
8858                                if matches!(self.peek_type(), TokenType::Number) {
8859                                    let v = self.peek().value.clone();
8860                                    self.advance();
8861                                    (v, QuoteStyle::None)
8862                                } else if matches!(self.peek_type(), TokenType::Null) {
8863                                    let v = self.peek().value.clone();
8864                                    self.advance();
8865                                    (v, QuoteStyle::None)
8866                                } else {
8867                                    self.expect_name_with_quote()?
8868                                };
8869                            table.push('.');
8870                            table.push_str(&col);
8871                            table_qs = col_qs;
8872                            col = next_col;
8873                            col_qs = next_qs;
8874                        }
8875                        // Function call on dotted name: db.schema.func(args).
8876                        if self.peek_type() == &TokenType::LParen {
8877                            self.advance();
8878                            let mut full = table;
8879                            full.push('.');
8880                            full.push_str(&col);
8881                            let args = if self.peek_type() != &TokenType::RParen {
8882                                let mut a = vec![self.parse_function_arg()?];
8883                                while self.match_token(TokenType::Comma) {
8884                                    a.push(self.parse_function_arg()?);
8885                                }
8886                                a
8887                            } else {
8888                                vec![]
8889                            };
8890                            self.expect(TokenType::RParen)?;
8891                            return Ok(Expr::Function {
8892                                name: full,
8893                                args,
8894                                distinct: false,
8895                                filter: None,
8896                                over: None,
8897                                order_by: Vec::new(),
8898                                within_group: false,
8899                            });
8900                        }
8901                        Ok(Expr::Column {
8902                            table: Some(table),
8903                            name: col,
8904                            quote_style: col_qs,
8905                            table_quote_style: table_qs,
8906                        })
8907                    }
8908                } else {
8909                    Ok(Expr::Column {
8910                        table: None,
8911                        name,
8912                        quote_style: name_qs,
8913                        table_quote_style: QuoteStyle::None,
8914                    })
8915                }
8916            }
8917
8918            _ => {
8919                // Fallback: any other token whose value is a valid identifier
8920                // and is immediately followed by `(` is treated as a function
8921                // call. This handles reserved keywords used as Spark/Hive
8922                // built-ins (IF, ALL, ANY, EXISTS, MOD, etc.) and dialect
8923                // functions that happen to collide with token types.
8924                let v = token.value.clone();
8925                let is_word =
8926                    !v.is_empty() && v.chars().all(|c| c.is_ascii_alphanumeric() || c == '_');
8927                if is_word
8928                    && matches!(
8929                        self.peek_offset(1).map(|t| &t.token_type),
8930                        Some(TokenType::LParen)
8931                    )
8932                {
8933                    // TRY_CAST / SAFE_CAST / TRY_TO_TIMESTAMP / … — same
8934                    // shape as `CAST(expr AS type)`. Lower to `Expr::Cast`
8935                    // (or back to a function call when the form doesn't
8936                    // match).
8937                    let upper = v.to_ascii_uppercase();
8938                    if matches!(upper.as_str(), "TRY_CAST" | "SAFE_CAST") {
8939                        self.advance();
8940                        self.advance(); // consume '('
8941                        let inner = self.parse_expr()?;
8942                        if self.match_token(TokenType::As) {
8943                            let data_type = self.parse_data_type()?;
8944                            self.expect(TokenType::RParen)?;
8945                            return Ok(Expr::Cast {
8946                                expr: Box::new(inner),
8947                                data_type,
8948                            });
8949                        }
8950                        // Fall back: treat as ordinary function call.
8951                        let mut args = vec![inner];
8952                        while self.match_token(TokenType::Comma) {
8953                            args.push(self.parse_expr()?);
8954                        }
8955                        self.expect(TokenType::RParen)?;
8956                        return Ok(Expr::Function {
8957                            name: v,
8958                            args,
8959                            distinct: false,
8960                            filter: None,
8961                            over: None,
8962                            order_by: Vec::new(),
8963                            within_group: false,
8964                        });
8965                    }
8966                    self.advance();
8967                    self.advance(); // consume '('
8968                    let upper = v.to_ascii_uppercase();
8969                    // Standard SQL `SUBSTRING(expr FROM start [FOR length])`
8970                    // and MySQL `SUBSTRING(expr FROM start)` / `…FOR length`.
8971                    if matches!(upper.as_str(), "SUBSTRING" | "SUBSTR")
8972                        && self.peek_type() != &TokenType::RParen
8973                    {
8974                        let saved = self.pos;
8975                        let first = self.parse_expr()?;
8976                        if self.match_token(TokenType::From) {
8977                            let start = self.parse_expr()?;
8978                            let length = if self.check_keyword("FOR") {
8979                                self.advance();
8980                                Some(self.parse_expr()?)
8981                            } else {
8982                                None
8983                            };
8984                            self.expect(TokenType::RParen)?;
8985                            let mut args = vec![first, start];
8986                            if let Some(len) = length {
8987                                args.push(len);
8988                            }
8989                            return Ok(Expr::Function {
8990                                name: v,
8991                                args,
8992                                distinct: false,
8993                                filter: None,
8994                                over: None,
8995                                order_by: Vec::new(),
8996                                within_group: false,
8997                            });
8998                        }
8999                        if self.check_keyword("FOR") {
9000                            self.advance();
9001                            let length = self.parse_expr()?;
9002                            self.expect(TokenType::RParen)?;
9003                            return Ok(Expr::Function {
9004                                name: v,
9005                                args: vec![first, length],
9006                                distinct: false,
9007                                filter: None,
9008                                over: None,
9009                                order_by: Vec::new(),
9010                                within_group: false,
9011                            });
9012                        }
9013                        // Fall back: re-parse as comma list.
9014                        self.pos = saved;
9015                    }
9016                    // Standard `TRIM([LEADING|TRAILING|BOTH] [chars] FROM expr)`
9017                    // and `TRIM(expr [, chars])` (already covered by comma).
9018                    if upper == "TRIM" && self.peek_type() != &TokenType::RParen {
9019                        let saved = self.pos;
9020                        if self.check_keyword("LEADING")
9021                            || self.check_keyword("TRAILING")
9022                            || self.check_keyword("BOTH")
9023                        {
9024                            self.advance();
9025                        }
9026                        if self.peek_type() == &TokenType::From {
9027                            self.advance();
9028                            let expr = self.parse_expr()?;
9029                            self.expect(TokenType::RParen)?;
9030                            return Ok(Expr::Function {
9031                                name: v,
9032                                args: vec![expr],
9033                                distinct: false,
9034                                filter: None,
9035                                over: None,
9036                                order_by: Vec::new(),
9037                                within_group: false,
9038                            });
9039                        }
9040                        // chars FROM expr
9041                        let chars = self.parse_expr()?;
9042                        if self.match_token(TokenType::From) {
9043                            let expr = self.parse_expr()?;
9044                            self.expect(TokenType::RParen)?;
9045                            return Ok(Expr::Function {
9046                                name: v,
9047                                args: vec![expr, chars],
9048                                distinct: false,
9049                                filter: None,
9050                                over: None,
9051                                order_by: Vec::new(),
9052                                within_group: false,
9053                            });
9054                        }
9055                        // Plain comma list — fall back.
9056                        self.pos = saved;
9057                    }
9058                    // Standard `OVERLAY(expr PLACING str FROM start [FOR len])`.
9059                    if upper == "OVERLAY" && self.peek_type() != &TokenType::RParen {
9060                        let saved = self.pos;
9061                        let target = self.parse_expr()?;
9062                        if self.check_keyword("PLACING") {
9063                            self.advance();
9064                            let placing = self.parse_expr()?;
9065                            self.expect(TokenType::From)?;
9066                            let from = self.parse_expr()?;
9067                            let len = if self.check_keyword("FOR") {
9068                                self.advance();
9069                                Some(self.parse_expr()?)
9070                            } else {
9071                                None
9072                            };
9073                            self.expect(TokenType::RParen)?;
9074                            let mut args = vec![target, placing, from];
9075                            if let Some(l) = len {
9076                                args.push(l);
9077                            }
9078                            return Ok(Expr::Function {
9079                                name: v,
9080                                args,
9081                                distinct: false,
9082                                filter: None,
9083                                over: None,
9084                                order_by: Vec::new(),
9085                                within_group: false,
9086                            });
9087                        }
9088                        self.pos = saved;
9089                    }
9090                    // Standard `POSITION(needle IN haystack)`.
9091                    if upper == "POSITION" && self.peek_type() != &TokenType::RParen {
9092                        let saved = self.pos;
9093                        let needle = self.parse_expr()?;
9094                        if self.check_keyword("IN") {
9095                            self.advance();
9096                            let haystack = self.parse_expr()?;
9097                            self.expect(TokenType::RParen)?;
9098                            return Ok(Expr::Function {
9099                                name: v,
9100                                args: vec![needle, haystack],
9101                                distinct: false,
9102                                filter: None,
9103                                over: None,
9104                                order_by: Vec::new(),
9105                                within_group: false,
9106                            });
9107                        }
9108                        self.pos = saved;
9109                    }
9110                    let mut args = Vec::new();
9111                    if self.peek_type() != &TokenType::RParen {
9112                        args.push(self.parse_function_arg()?);
9113                        while self.match_token(TokenType::Comma) {
9114                            args.push(self.parse_function_arg()?);
9115                        }
9116                    }
9117                    self.expect(TokenType::RParen)?;
9118                    return Ok(Expr::Function {
9119                        name: v,
9120                        args,
9121                        distinct: false,
9122                        filter: None,
9123                        over: None,
9124                        order_by: Vec::new(),
9125                        within_group: false,
9126                    });
9127                }
9128                Err(SqlglotError::UnexpectedToken { token })
9129            }
9130        }
9131    }
9132
9133    /// Parse a single function-call argument. Accepts the DuckDB / PostgreSQL
9134    /// named-argument syntaxes `name := value` and `name => value` and falls
9135    /// back to a plain expression for positional arguments. The argument
9136    /// name is discarded — we don't model it in the AST.
9137    fn parse_function_arg(&mut self) -> Result<Expr> {
9138        // Hive table-valued function clause: `noop(on tbl partition by p
9139        // order by q distribute by r cluster by s sort by t)`. The arg
9140        // list begins with the `ON` keyword and is followed by a series
9141        // of windowing-style clauses we don't model. Swallow it as an
9142        // opaque payload so we don't reject the call.
9143        if matches!(self.peek_type(), TokenType::On) {
9144            let mut depth = 0usize;
9145            while !matches!(self.peek_type(), TokenType::Eof) {
9146                match self.peek_type() {
9147                    TokenType::LParen => depth += 1,
9148                    TokenType::RParen => {
9149                        if depth == 0 {
9150                            break;
9151                        }
9152                        depth -= 1;
9153                    }
9154                    TokenType::Comma if depth == 0 => break,
9155                    _ => {}
9156                }
9157                self.advance();
9158            }
9159            return Ok(Expr::Null);
9160        }
9161        if self.is_name_token()
9162            || self.is_data_type_token()
9163            || matches!(self.peek_type(), TokenType::Recursive)
9164        {
9165            let next = self.peek_offset(1).map(|t| &t.token_type);
9166            if matches!(next, Some(TokenType::Colon)) {
9167                let after = self.peek_offset(2).map(|t| &t.token_type);
9168                if matches!(after, Some(TokenType::Eq)) {
9169                    self.advance();
9170                    self.advance();
9171                    self.advance();
9172                    return self.parse_expr();
9173                }
9174            }
9175            if matches!(next, Some(TokenType::DoubleArrow)) {
9176                self.advance();
9177                self.advance();
9178                return self.parse_expr();
9179            }
9180        }
9181        // ClickHouse table functions: `view(SELECT …)`, `cluster(…)` etc.
9182        // accept a full SELECT / WITH / UNION inside the arg list. Parse
9183        // it as a Subquery so the surrounding call closes properly.
9184        if matches!(self.peek_type(), TokenType::Select | TokenType::With) {
9185            let stmt = self.parse_statement_inner()?;
9186            return Ok(Expr::Subquery(Box::new(stmt)));
9187        }
9188        let mut expr = self.parse_expr()?;
9189        // Oracle / Snowflake / MySQL `JSON_OBJECT('k' : value, ...)` and the
9190        // `JSON_OBJECTAGG(k : v)` family use `:` as a key-value separator
9191        // inside function args. After parsing the first expression, swallow
9192        // a bare `:` and parse the value side; emit the value as the arg
9193        // (we don't model JSON key-value pairs in the AST). Only fire when
9194        // the next-after-colon is not another `:` (`::` cast) and not `=`
9195        // (`:=` named arg, already handled above).
9196        if matches!(self.peek_type(), TokenType::Colon)
9197            && !matches!(
9198                self.peek_offset(1).map(|t| &t.token_type),
9199                Some(TokenType::Colon) | Some(TokenType::Eq)
9200            )
9201        {
9202            self.advance(); // :
9203            expr = self.parse_expr()?;
9204            // Optional `FORMAT JSON` suffix (Oracle).
9205            if self.peek().value.eq_ignore_ascii_case("FORMAT")
9206                && self
9207                    .peek_offset(1)
9208                    .map(|t| t.value.eq_ignore_ascii_case("JSON"))
9209                    .unwrap_or(false)
9210            {
9211                self.advance();
9212                self.advance();
9213            }
9214        }
9215        // ClickHouse: `func(expr AS alias)` — swallow the alias.
9216        if self.match_token(TokenType::As) && self.is_name_token() {
9217            self.advance();
9218        }
9219        // Spark / DataBricks UDTF call: `UDTF(TABLE(t) [PARTITION BY cols]
9220        // [ORDER BY cols])`. Swallow the table-argument modifiers opaquely.
9221        if self.peek_type() == &TokenType::Partition
9222            && self
9223                .peek_offset(1)
9224                .map(|t| matches!(t.token_type, TokenType::By))
9225                .unwrap_or(false)
9226        {
9227            self.advance(); // PARTITION
9228            self.advance(); // BY
9229            // Comma-separated expression list (column refs / exprs).
9230            let _ = self.parse_expr()?;
9231            while self.match_token(TokenType::Comma) {
9232                let _ = self.parse_expr()?;
9233            }
9234        }
9235        if self.peek_type() == &TokenType::Order
9236            && self
9237                .peek_offset(1)
9238                .map(|t| matches!(t.token_type, TokenType::By))
9239                .unwrap_or(false)
9240        {
9241            self.advance(); // ORDER
9242            self.advance(); // BY
9243            let _ = self.parse_order_by_items()?;
9244        }
9245        // BigQuery / DuckDB / Snowflake / Oracle window-function nulls
9246        // modifier: `LAST_VALUE(arg IGNORE NULLS)`, `... RESPECT NULLS`.
9247        // Swallow opaquely; we don't model it in the AST.
9248        if (self.peek().value.eq_ignore_ascii_case("IGNORE")
9249            || self.peek().value.eq_ignore_ascii_case("RESPECT"))
9250            && self
9251                .peek_offset(1)
9252                .map(|t| t.token_type == TokenType::Null || t.value.eq_ignore_ascii_case("NULLS"))
9253                .unwrap_or(false)
9254        {
9255            self.advance();
9256            self.advance();
9257        }
9258        // Postgres JSON helpers: `JSON_SERIALIZE(expr RETURNING type)`,
9259        // `JSON_QUERY(... RETURNING jsonb FORMAT JSON)`,
9260        // `JSON_VALUE(... RETURNING type DEFAULT v ON EMPTY|ERROR …)`. After
9261        // any RETURNING clause, swallow the optional FORMAT, DEFAULT, ON
9262        // EMPTY/ERROR tail so the call parses cleanly.
9263        if self.match_token(TokenType::Returning) {
9264            if self.is_data_type_token() || self.is_name_token() {
9265                let _ = self.parse_data_type();
9266            }
9267        }
9268        // SQL/JSON `PASSING v AS name [, v AS name]*` clause inside
9269        // JSON_EXISTS / JSON_VALUE / JSON_QUERY argument lists.
9270        if self.check_keyword("PASSING") {
9271            self.advance();
9272            loop {
9273                let _ = self.parse_expr()?;
9274                if self.match_token(TokenType::As) && self.is_name_token() {
9275                    self.advance();
9276                }
9277                if !self.match_token(TokenType::Comma) {
9278                    break;
9279                }
9280            }
9281        }
9282        // SQL/JSON behavior clauses: `NULL|ERROR|EMPTY [ARRAY|OBJECT]|
9283        // DEFAULT expr ON EMPTY|ERROR`. Swallow them opaquely; the
9284        // surrounding call still resolves to its primary expression.
9285        loop {
9286            let is_default = self.peek_type() == &TokenType::Default;
9287            let is_behavior_kw = self.check_keyword("ERROR")
9288                || self.check_keyword("NULL")
9289                || self.peek_type() == &TokenType::Null
9290                || self.check_keyword("EMPTY")
9291                || self.check_keyword("TRUE")
9292                || self.check_keyword("FALSE")
9293                || self.check_keyword("UNKNOWN");
9294            if !is_default && !is_behavior_kw {
9295                break;
9296            }
9297            // Look ahead: behavior keyword must be followed (possibly via
9298            // optional ARRAY/OBJECT/expr) by `ON ERROR|EMPTY` to qualify.
9299            let saved = self.pos;
9300            if is_default {
9301                self.advance();
9302                let _ = self.parse_expr();
9303            } else {
9304                self.advance();
9305                if self.check_keyword("ARRAY") || self.check_keyword("OBJECT") {
9306                    self.advance();
9307                }
9308            }
9309            if self.peek_type() == &TokenType::On
9310                && self
9311                    .peek_offset(1)
9312                    .map(|t| {
9313                        t.value.eq_ignore_ascii_case("ERROR")
9314                            || t.value.eq_ignore_ascii_case("EMPTY")
9315                    })
9316                    .unwrap_or(false)
9317            {
9318                self.advance(); // ON
9319                self.advance(); // ERROR / EMPTY
9320            } else {
9321                // Not actually a behavior clause — rewind.
9322                self.pos = saved;
9323                break;
9324            }
9325        }
9326        // MySQL `CONVERT(expr USING charset)` — swallow USING + name.
9327        if self.match_token(TokenType::Using) {
9328            if self.is_name_token() {
9329                self.advance();
9330            }
9331        }
9332        // ON EMPTY / ON ERROR / DEFAULT … ON EMPTY|ERROR / FORMAT … —
9333        // tolerated tail clauses common to JSON_VALUE / JSON_QUERY /
9334        // JSON_EXISTS. Loop while one of the recognized starters appears.
9335        loop {
9336            let starts = self.peek_type() == &TokenType::Default
9337                || self.match_keyword_clone("FORMAT")
9338                || (self.peek_type() == &TokenType::On
9339                    && self
9340                        .peek_offset(1)
9341                        .map(|t| {
9342                            t.value.eq_ignore_ascii_case("EMPTY")
9343                                || t.value.eq_ignore_ascii_case("ERROR")
9344                        })
9345                        .unwrap_or(false));
9346            if !starts {
9347                break;
9348            }
9349            // Consume up to the next top-level `,` / `)` / EOF, tracking
9350            // nesting so embedded parens (e.g. `DEFAULT ('C' COLLATE "C")`)
9351            // don't terminate prematurely.
9352            let mut depth = 0i32;
9353            while !matches!(self.peek_type(), TokenType::Eof) {
9354                match self.peek_type() {
9355                    TokenType::LParen | TokenType::LBracket => depth += 1,
9356                    TokenType::RParen | TokenType::RBracket => {
9357                        if depth == 0 {
9358                            break;
9359                        }
9360                        depth -= 1;
9361                    }
9362                    TokenType::Comma if depth == 0 => break,
9363                    _ => {}
9364                }
9365                self.advance();
9366            }
9367        }
9368        Ok(expr)
9369    }
9370
9371    /// True when the current token is a name token whose uppercase value
9372    /// equals `kw`. Does NOT advance the token cursor.
9373    fn match_keyword_clone(&self, kw: &str) -> bool {
9374        self.check_keyword(kw)
9375    }
9376
9377    fn is_data_type_token(&self) -> bool {
9378        self.is_data_type_token_kind(self.peek_type())
9379    }
9380
9381    fn is_data_type_token_kind(&self, tt: &TokenType) -> bool {
9382        matches!(
9383            tt,
9384            TokenType::Int
9385                | TokenType::Integer
9386                | TokenType::BigInt
9387                | TokenType::SmallInt
9388                | TokenType::TinyInt
9389                | TokenType::Float
9390                | TokenType::Double
9391                | TokenType::Decimal
9392                | TokenType::Numeric
9393                | TokenType::Real
9394                | TokenType::Varchar
9395                | TokenType::Char
9396                | TokenType::Text
9397                | TokenType::Boolean
9398                | TokenType::Date
9399                | TokenType::Timestamp
9400                | TokenType::TimestampTz
9401                | TokenType::Time
9402                | TokenType::Interval
9403                | TokenType::Blob
9404                | TokenType::Bytea
9405                | TokenType::Json
9406                | TokenType::Jsonb
9407                | TokenType::Uuid
9408                | TokenType::Array
9409                | TokenType::Map
9410                | TokenType::Struct
9411        )
9412    }
9413
9414    fn parse_datetime_field(&mut self) -> Result<DateTimeField> {
9415        let token = self.peek().clone();
9416        let field = match &token.token_type {
9417            TokenType::Year => DateTimeField::Year,
9418            TokenType::Month => DateTimeField::Month,
9419            TokenType::Day => DateTimeField::Day,
9420            TokenType::Hour => DateTimeField::Hour,
9421            TokenType::Minute => DateTimeField::Minute,
9422            TokenType::Second => DateTimeField::Second,
9423            TokenType::Epoch => DateTimeField::Epoch,
9424            _ => {
9425                let name = token.value.to_uppercase();
9426                match name.as_str() {
9427                    "YEAR" => DateTimeField::Year,
9428                    "QUARTER" => DateTimeField::Quarter,
9429                    "MONTH" => DateTimeField::Month,
9430                    "WEEK" => DateTimeField::Week,
9431                    "DAY" => DateTimeField::Day,
9432                    "DOW" | "DAYOFWEEK" => DateTimeField::DayOfWeek,
9433                    "DOY" | "DAYOFYEAR" => DateTimeField::DayOfYear,
9434                    "HOUR" => DateTimeField::Hour,
9435                    "MINUTE" => DateTimeField::Minute,
9436                    "SECOND" => DateTimeField::Second,
9437                    "MILLISECOND" | "MILLISECONDS" | "MS" => DateTimeField::Millisecond,
9438                    "MICROSECOND" | "MICROSECONDS" | "US" => DateTimeField::Microsecond,
9439                    "NANOSECOND" | "NANOSECONDS" | "NS" => DateTimeField::Nanosecond,
9440                    "YEARS" => DateTimeField::Year,
9441                    "QUARTERS" => DateTimeField::Quarter,
9442                    "MONTHS" => DateTimeField::Month,
9443                    "WEEKS" => DateTimeField::Week,
9444                    "DAYS" => DateTimeField::Day,
9445                    "HOURS" => DateTimeField::Hour,
9446                    "MINUTES" => DateTimeField::Minute,
9447                    "SECONDS" => DateTimeField::Second,
9448                    "EPOCH" => DateTimeField::Epoch,
9449                    "TIMEZONE" => DateTimeField::Timezone,
9450                    "TIMEZONE_HOUR" => DateTimeField::TimezoneHour,
9451                    "TIMEZONE_MINUTE" => DateTimeField::TimezoneMinute,
9452                    // MySQL composite interval units. We don't model them
9453                    // distinctly; lower to the dominant component so the
9454                    // surrounding parse completes.
9455                    "DAY_HOUR" | "DAY_MINUTE" | "DAY_SECOND" | "DAY_MICROSECOND" => {
9456                        DateTimeField::Day
9457                    }
9458                    "HOUR_MINUTE" | "HOUR_SECOND" | "HOUR_MICROSECOND" => DateTimeField::Hour,
9459                    "MINUTE_SECOND" | "MINUTE_MICROSECOND" => DateTimeField::Minute,
9460                    "SECOND_MICROSECOND" => DateTimeField::Second,
9461                    "YEAR_MONTH" => DateTimeField::Year,
9462                    _ => {
9463                        return Err(SqlglotError::ParserError {
9464                            message: format!("Unknown datetime field: {name}"),
9465                        });
9466                    }
9467                }
9468            }
9469        };
9470        self.advance();
9471        Ok(field)
9472    }
9473
9474    fn try_parse_datetime_field(&mut self) -> Option<DateTimeField> {
9475        let saved = self.pos;
9476        match self.parse_datetime_field() {
9477            Ok(field) => Some(field),
9478            Err(_) => {
9479                self.pos = saved;
9480                None
9481            }
9482        }
9483    }
9484
9485    /// Parse the inside of `GROUP_CONCAT(...)` (caller has already consumed
9486    /// the `(` and optional `DISTINCT`). Returns a typed `GroupConcat`
9487    /// expression. Does NOT consume the trailing `)`.
9488    fn parse_group_concat_call(&mut self, distinct: bool) -> Result<Expr> {
9489        let mut exprs: Vec<Expr> = Vec::new();
9490        let mut order_by: Vec<OrderByItem> = Vec::new();
9491        let mut separator: Option<Box<Expr>> = None;
9492
9493        if self.peek_type() != &TokenType::RParen {
9494            exprs.push(self.parse_expr()?);
9495            while self.peek_type() == &TokenType::Comma {
9496                // ORDER BY / SEPARATOR are alternative terminators, not args.
9497                // Peek one past the comma to disambiguate `f(a, b)` from
9498                // `f(a, b ORDER BY ...)` — but comma here always introduces
9499                // another positional arg, so just keep consuming.
9500                self.advance();
9501                exprs.push(self.parse_expr()?);
9502            }
9503
9504            if self.match_token(TokenType::Order) {
9505                self.expect(TokenType::By)?;
9506                order_by = self.parse_order_by_items()?;
9507            }
9508
9509            if self.match_keyword("SEPARATOR") {
9510                separator = Some(Box::new(self.parse_expr()?));
9511            }
9512        }
9513
9514        Ok(Expr::TypedFunction {
9515            func: TypedFunction::GroupConcat {
9516                exprs,
9517                separator,
9518                order_by,
9519                distinct,
9520            },
9521            filter: None,
9522            over: None,
9523        })
9524    }
9525
9526    /// Try to construct a typed function expression from a parsed function call.
9527    /// Returns `None` if the function name is not recognized, falling back to
9528    /// the generic `Expr::Function`.
9529    fn try_typed_function(name: &str, args: Vec<Expr>, distinct: bool) -> Option<Expr> {
9530        let upper = name.to_uppercase();
9531        let tf = match upper.as_str() {
9532            // ── Date/Time ──────────────────────────────────────────
9533            "DATE_ADD" | "DATEADD" | "TIMESTAMPADD" => {
9534                let mut it = args.into_iter();
9535                let first = it.next()?;
9536                let second = it.next()?;
9537                let third = it.next();
9538                // Handle DATEADD(unit, interval, expr) — TSQL/Snowflake arg order
9539                if upper == "DATEADD" {
9540                    if let Some(third_arg) = third {
9541                        // 3-arg: DATEADD(unit, interval, expr)
9542                        let unit = Self::expr_to_datetime_field(&first);
9543                        TypedFunction::DateAdd {
9544                            expr: Box::new(third_arg),
9545                            interval: Box::new(second),
9546                            unit,
9547                        }
9548                    } else {
9549                        TypedFunction::DateAdd {
9550                            expr: Box::new(first),
9551                            interval: Box::new(second),
9552                            unit: None,
9553                        }
9554                    }
9555                } else {
9556                    // DATE_ADD(expr, interval [, unit])
9557                    let unit = third.as_ref().and_then(Self::expr_to_datetime_field);
9558                    TypedFunction::DateAdd {
9559                        expr: Box::new(first),
9560                        interval: Box::new(second),
9561                        unit,
9562                    }
9563                }
9564            }
9565            "DATE_DIFF" | "DATEDIFF" | "TIMESTAMPDIFF" => {
9566                let mut it = args.into_iter();
9567                let first = it.next()?;
9568                let second = it.next()?;
9569                let third = it.next();
9570                if let Some(third_arg) = third {
9571                    if upper == "DATEDIFF" {
9572                        // DATEDIFF(unit, start, end) — TSQL/Snowflake
9573                        let unit = Self::expr_to_datetime_field(&first);
9574                        TypedFunction::DateDiff {
9575                            start: Box::new(second),
9576                            end: Box::new(third_arg),
9577                            unit,
9578                        }
9579                    } else {
9580                        let unit = Self::expr_to_datetime_field(&third_arg);
9581                        TypedFunction::DateDiff {
9582                            start: Box::new(first),
9583                            end: Box::new(second),
9584                            unit,
9585                        }
9586                    }
9587                } else {
9588                    TypedFunction::DateDiff {
9589                        start: Box::new(first),
9590                        end: Box::new(second),
9591                        unit: None,
9592                    }
9593                }
9594            }
9595            "DATE_TRUNC" | "DATETRUNC" => {
9596                let mut it = args.into_iter();
9597                let first = it.next()?;
9598                let second = it.next()?;
9599                // DATE_TRUNC('unit', expr) or DATE_TRUNC(unit, expr)
9600                let (unit, expr) = if let Some(u) = Self::expr_to_datetime_field(&first) {
9601                    (u, second)
9602                } else if let Some(u) = Self::expr_to_datetime_field(&second) {
9603                    (u, first)
9604                } else {
9605                    // Default: first = unit string, second = expr
9606                    return None;
9607                };
9608                TypedFunction::DateTrunc {
9609                    unit,
9610                    expr: Box::new(expr),
9611                }
9612            }
9613            "DATE_SUB" | "DATESUB" => {
9614                let mut it = args.into_iter();
9615                let first = it.next()?;
9616                let second = it.next()?;
9617                let third = it.next();
9618                let unit = third.as_ref().and_then(Self::expr_to_datetime_field);
9619                TypedFunction::DateSub {
9620                    expr: Box::new(first),
9621                    interval: Box::new(second),
9622                    unit,
9623                }
9624            }
9625            "CURRENT_DATE" => TypedFunction::CurrentDate,
9626            "CURRENT_TIME" | "CURTIME" => TypedFunction::CurrentTime,
9627            "CURRENT_TIMESTAMP" | "NOW" | "GETDATE" | "SYSDATE" => TypedFunction::CurrentTimestamp,
9628            "STR_TO_TIME" | "STR_TO_DATE" | "TO_TIMESTAMP" | "PARSE_TIMESTAMP"
9629            | "PARSE_DATETIME" => {
9630                let mut it = args.into_iter();
9631                let expr = it.next()?;
9632                let format = it.next()?;
9633                TypedFunction::StrToTime {
9634                    expr: Box::new(expr),
9635                    format: Box::new(format),
9636                }
9637            }
9638            "TIME_TO_STR" | "DATE_FORMAT" | "FORMAT_TIMESTAMP" | "FORMAT_DATETIME" | "TO_CHAR" => {
9639                let mut it = args.into_iter();
9640                let expr = it.next()?;
9641                let format = it.next()?;
9642                TypedFunction::TimeToStr {
9643                    expr: Box::new(expr),
9644                    format: Box::new(format),
9645                }
9646            }
9647            "TS_OR_DS_TO_DATE" => {
9648                let mut it = args.into_iter();
9649                TypedFunction::TsOrDsToDate {
9650                    expr: Box::new(it.next()?),
9651                }
9652            }
9653            "YEAR" => {
9654                let mut it = args.into_iter();
9655                TypedFunction::Year {
9656                    expr: Box::new(it.next()?),
9657                }
9658            }
9659            "MONTH" => {
9660                let mut it = args.into_iter();
9661                TypedFunction::Month {
9662                    expr: Box::new(it.next()?),
9663                }
9664            }
9665            "DAY" | "DAYOFMONTH" => {
9666                let mut it = args.into_iter();
9667                TypedFunction::Day {
9668                    expr: Box::new(it.next()?),
9669                }
9670            }
9671
9672            // ── String ─────────────────────────────────────────────
9673            "TRIM" => {
9674                let mut it = args.into_iter();
9675                let expr = it.next()?;
9676                TypedFunction::Trim {
9677                    expr: Box::new(expr),
9678                    trim_type: TrimType::Both,
9679                    trim_chars: None,
9680                }
9681            }
9682            "LTRIM" => {
9683                let mut it = args.into_iter();
9684                let expr = it.next()?;
9685                TypedFunction::Trim {
9686                    expr: Box::new(expr),
9687                    trim_type: TrimType::Leading,
9688                    trim_chars: None,
9689                }
9690            }
9691            "RTRIM" => {
9692                let mut it = args.into_iter();
9693                let expr = it.next()?;
9694                TypedFunction::Trim {
9695                    expr: Box::new(expr),
9696                    trim_type: TrimType::Trailing,
9697                    trim_chars: None,
9698                }
9699            }
9700            "SUBSTRING" | "SUBSTR" => {
9701                let mut it = args.into_iter();
9702                let expr = it.next()?;
9703                let start = it.next()?;
9704                let length = it.next();
9705                TypedFunction::Substring {
9706                    expr: Box::new(expr),
9707                    start: Box::new(start),
9708                    length: length.map(Box::new),
9709                }
9710            }
9711            "UPPER" | "UCASE" => {
9712                let mut it = args.into_iter();
9713                TypedFunction::Upper {
9714                    expr: Box::new(it.next()?),
9715                }
9716            }
9717            "LOWER" | "LCASE" => {
9718                let mut it = args.into_iter();
9719                TypedFunction::Lower {
9720                    expr: Box::new(it.next()?),
9721                }
9722            }
9723            "REGEXP_LIKE" | "RLIKE" => {
9724                let mut it = args.into_iter();
9725                let expr = it.next()?;
9726                let pattern = it.next()?;
9727                let flags = it.next();
9728                TypedFunction::RegexpLike {
9729                    expr: Box::new(expr),
9730                    pattern: Box::new(pattern),
9731                    flags: flags.map(Box::new),
9732                }
9733            }
9734            "REGEXP_EXTRACT" | "REGEXP_SUBSTR" => {
9735                let mut it = args.into_iter();
9736                let expr = it.next()?;
9737                let pattern = it.next()?;
9738                let group_index = it.next();
9739                TypedFunction::RegexpExtract {
9740                    expr: Box::new(expr),
9741                    pattern: Box::new(pattern),
9742                    group_index: group_index.map(Box::new),
9743                }
9744            }
9745            "REGEXP_REPLACE" => {
9746                let mut it = args.into_iter();
9747                let expr = it.next()?;
9748                let pattern = it.next()?;
9749                let replacement = it.next()?;
9750                let flags = it.next();
9751                TypedFunction::RegexpReplace {
9752                    expr: Box::new(expr),
9753                    pattern: Box::new(pattern),
9754                    replacement: Box::new(replacement),
9755                    flags: flags.map(Box::new),
9756                }
9757            }
9758            "CONCAT_WS" => {
9759                let mut it = args.into_iter();
9760                let separator = it.next()?;
9761                let exprs: Vec<Expr> = it.collect();
9762                TypedFunction::ConcatWs {
9763                    separator: Box::new(separator),
9764                    exprs,
9765                }
9766            }
9767            "SPLIT" | "STRING_SPLIT" => {
9768                let mut it = args.into_iter();
9769                let expr = it.next()?;
9770                let delimiter = it.next()?;
9771                TypedFunction::Split {
9772                    expr: Box::new(expr),
9773                    delimiter: Box::new(delimiter),
9774                }
9775            }
9776            "INITCAP" => {
9777                let mut it = args.into_iter();
9778                TypedFunction::Initcap {
9779                    expr: Box::new(it.next()?),
9780                }
9781            }
9782            "LENGTH" | "LEN" | "CHAR_LENGTH" | "CHARACTER_LENGTH" => {
9783                let mut it = args.into_iter();
9784                TypedFunction::Length {
9785                    expr: Box::new(it.next()?),
9786                }
9787            }
9788            "REPLACE" => {
9789                let mut it = args.into_iter();
9790                let expr = it.next()?;
9791                let from = it.next()?;
9792                let to = it.next()?;
9793                TypedFunction::Replace {
9794                    expr: Box::new(expr),
9795                    from: Box::new(from),
9796                    to: Box::new(to),
9797                }
9798            }
9799            "REVERSE" => {
9800                let mut it = args.into_iter();
9801                TypedFunction::Reverse {
9802                    expr: Box::new(it.next()?),
9803                }
9804            }
9805            "LEFT" => {
9806                let mut it = args.into_iter();
9807                let expr = it.next()?;
9808                let n = it.next()?;
9809                TypedFunction::Left {
9810                    expr: Box::new(expr),
9811                    n: Box::new(n),
9812                }
9813            }
9814            "RIGHT" => {
9815                let mut it = args.into_iter();
9816                let expr = it.next()?;
9817                let n = it.next()?;
9818                TypedFunction::Right {
9819                    expr: Box::new(expr),
9820                    n: Box::new(n),
9821                }
9822            }
9823            "LPAD" => {
9824                let mut it = args.into_iter();
9825                let expr = it.next()?;
9826                let length = it.next()?;
9827                let pad = it.next();
9828                TypedFunction::Lpad {
9829                    expr: Box::new(expr),
9830                    length: Box::new(length),
9831                    pad: pad.map(Box::new),
9832                }
9833            }
9834            "RPAD" => {
9835                let mut it = args.into_iter();
9836                let expr = it.next()?;
9837                let length = it.next()?;
9838                let pad = it.next();
9839                TypedFunction::Rpad {
9840                    expr: Box::new(expr),
9841                    length: Box::new(length),
9842                    pad: pad.map(Box::new),
9843                }
9844            }
9845
9846            // ── Aggregate ──────────────────────────────────────────
9847            "COUNT" => {
9848                let mut it = args.into_iter();
9849                let expr = it.next().unwrap_or(Expr::Wildcard);
9850                TypedFunction::Count {
9851                    expr: Box::new(expr),
9852                    distinct,
9853                }
9854            }
9855            "SUM" => {
9856                let mut it = args.into_iter();
9857                TypedFunction::Sum {
9858                    expr: Box::new(it.next()?),
9859                    distinct,
9860                }
9861            }
9862            "AVG" => {
9863                let mut it = args.into_iter();
9864                TypedFunction::Avg {
9865                    expr: Box::new(it.next()?),
9866                    distinct,
9867                }
9868            }
9869            "MIN" => {
9870                let mut it = args.into_iter();
9871                TypedFunction::Min {
9872                    expr: Box::new(it.next()?),
9873                }
9874            }
9875            "MAX" => {
9876                let mut it = args.into_iter();
9877                TypedFunction::Max {
9878                    expr: Box::new(it.next()?),
9879                }
9880            }
9881            "ARRAY_AGG" | "LIST" | "COLLECT_LIST" => {
9882                let mut it = args.into_iter();
9883                TypedFunction::ArrayAgg {
9884                    expr: Box::new(it.next()?),
9885                    distinct,
9886                }
9887            }
9888            "APPROX_DISTINCT" | "APPROX_COUNT_DISTINCT" => {
9889                let mut it = args.into_iter();
9890                TypedFunction::ApproxDistinct {
9891                    expr: Box::new(it.next()?),
9892                }
9893            }
9894            "VARIANCE" | "VAR_SAMP" | "VAR" => {
9895                let mut it = args.into_iter();
9896                TypedFunction::Variance {
9897                    expr: Box::new(it.next()?),
9898                }
9899            }
9900            "STDDEV" | "STDDEV_SAMP" => {
9901                let mut it = args.into_iter();
9902                TypedFunction::Stddev {
9903                    expr: Box::new(it.next()?),
9904                }
9905            }
9906
9907            // ── Array ──────────────────────────────────────────────
9908            "ARRAY_CONCAT" | "ARRAY_CAT" => TypedFunction::ArrayConcat { arrays: args },
9909            "ARRAY_CONTAINS" => {
9910                let mut it = args.into_iter();
9911                let array = it.next()?;
9912                let element = it.next()?;
9913                TypedFunction::ArrayContains {
9914                    array: Box::new(array),
9915                    element: Box::new(element),
9916                }
9917            }
9918            "ARRAY_SIZE" | "ARRAY_LENGTH" | "CARDINALITY" => {
9919                let mut it = args.into_iter();
9920                TypedFunction::ArraySize {
9921                    expr: Box::new(it.next()?),
9922                }
9923            }
9924            "EXPLODE" => {
9925                let mut it = args.into_iter();
9926                TypedFunction::Explode {
9927                    expr: Box::new(it.next()?),
9928                }
9929            }
9930            "GENERATE_SERIES" | "SEQUENCE" => {
9931                let mut it = args.into_iter();
9932                let start = it.next()?;
9933                let stop = it.next()?;
9934                let step = it.next();
9935                TypedFunction::GenerateSeries {
9936                    start: Box::new(start),
9937                    stop: Box::new(stop),
9938                    step: step.map(Box::new),
9939                }
9940            }
9941            "FLATTEN" => {
9942                let mut it = args.into_iter();
9943                TypedFunction::Flatten {
9944                    expr: Box::new(it.next()?),
9945                }
9946            }
9947
9948            // ── JSON ───────────────────────────────────────────────
9949            "JSON_EXTRACT" | "JSON_VALUE" => {
9950                let mut it = args.into_iter();
9951                let expr = it.next()?;
9952                let path = it.next()?;
9953                TypedFunction::JSONExtract {
9954                    expr: Box::new(expr),
9955                    path: Box::new(path),
9956                }
9957            }
9958            "JSON_EXTRACT_SCALAR" => {
9959                let mut it = args.into_iter();
9960                let expr = it.next()?;
9961                let path = it.next()?;
9962                TypedFunction::JSONExtractScalar {
9963                    expr: Box::new(expr),
9964                    path: Box::new(path),
9965                }
9966            }
9967            "PARSE_JSON" | "JSON_PARSE" => {
9968                let mut it = args.into_iter();
9969                TypedFunction::ParseJSON {
9970                    expr: Box::new(it.next()?),
9971                }
9972            }
9973            "JSON_FORMAT" | "TO_JSON" | "TO_JSON_STRING" => {
9974                let mut it = args.into_iter();
9975                TypedFunction::JSONFormat {
9976                    expr: Box::new(it.next()?),
9977                }
9978            }
9979
9980            // ── Window ─────────────────────────────────────────────
9981            "ROW_NUMBER" => TypedFunction::RowNumber,
9982            "RANK" => TypedFunction::Rank,
9983            "DENSE_RANK" => TypedFunction::DenseRank,
9984            "NTILE" => {
9985                let mut it = args.into_iter();
9986                TypedFunction::NTile {
9987                    n: Box::new(it.next()?),
9988                }
9989            }
9990            "LEAD" => {
9991                let mut it = args.into_iter();
9992                let expr = it.next()?;
9993                let offset = it.next();
9994                let default = it.next();
9995                TypedFunction::Lead {
9996                    expr: Box::new(expr),
9997                    offset: offset.map(Box::new),
9998                    default: default.map(Box::new),
9999                }
10000            }
10001            "LAG" => {
10002                let mut it = args.into_iter();
10003                let expr = it.next()?;
10004                let offset = it.next();
10005                let default = it.next();
10006                TypedFunction::Lag {
10007                    expr: Box::new(expr),
10008                    offset: offset.map(Box::new),
10009                    default: default.map(Box::new),
10010                }
10011            }
10012            "FIRST_VALUE" => {
10013                let mut it = args.into_iter();
10014                TypedFunction::FirstValue {
10015                    expr: Box::new(it.next()?),
10016                }
10017            }
10018            "LAST_VALUE" => {
10019                let mut it = args.into_iter();
10020                TypedFunction::LastValue {
10021                    expr: Box::new(it.next()?),
10022                }
10023            }
10024
10025            // ── Math ───────────────────────────────────────────────
10026            "ABS" => {
10027                let mut it = args.into_iter();
10028                TypedFunction::Abs {
10029                    expr: Box::new(it.next()?),
10030                }
10031            }
10032            "CEIL" | "CEILING" => {
10033                let mut it = args.into_iter();
10034                TypedFunction::Ceil {
10035                    expr: Box::new(it.next()?),
10036                }
10037            }
10038            "FLOOR" => {
10039                let mut it = args.into_iter();
10040                TypedFunction::Floor {
10041                    expr: Box::new(it.next()?),
10042                }
10043            }
10044            "ROUND" => {
10045                let mut it = args.into_iter();
10046                let expr = it.next()?;
10047                let decimals = it.next();
10048                TypedFunction::Round {
10049                    expr: Box::new(expr),
10050                    decimals: decimals.map(Box::new),
10051                }
10052            }
10053            "LOG" => {
10054                let mut it = args.into_iter();
10055                let expr = it.next()?;
10056                let base = it.next();
10057                TypedFunction::Log {
10058                    expr: Box::new(expr),
10059                    base: base.map(Box::new),
10060                }
10061            }
10062            "LN" => {
10063                let mut it = args.into_iter();
10064                TypedFunction::Ln {
10065                    expr: Box::new(it.next()?),
10066                }
10067            }
10068            "POW" | "POWER" => {
10069                let mut it = args.into_iter();
10070                let base = it.next()?;
10071                let exponent = it.next()?;
10072                TypedFunction::Pow {
10073                    base: Box::new(base),
10074                    exponent: Box::new(exponent),
10075                }
10076            }
10077            "SQRT" => {
10078                let mut it = args.into_iter();
10079                TypedFunction::Sqrt {
10080                    expr: Box::new(it.next()?),
10081                }
10082            }
10083            "GREATEST" => TypedFunction::Greatest { exprs: args },
10084            "LEAST" => TypedFunction::Least { exprs: args },
10085            "MOD" => {
10086                let mut it = args.into_iter();
10087                let left = it.next()?;
10088                let right = it.next()?;
10089                TypedFunction::Mod {
10090                    left: Box::new(left),
10091                    right: Box::new(right),
10092                }
10093            }
10094
10095            // ── Conversion ─────────────────────────────────────────
10096            "HEX" | "TO_HEX" => {
10097                let mut it = args.into_iter();
10098                TypedFunction::Hex {
10099                    expr: Box::new(it.next()?),
10100                }
10101            }
10102            "UNHEX" | "FROM_HEX" => {
10103                let mut it = args.into_iter();
10104                TypedFunction::Unhex {
10105                    expr: Box::new(it.next()?),
10106                }
10107            }
10108            "MD5" => {
10109                let mut it = args.into_iter();
10110                TypedFunction::Md5 {
10111                    expr: Box::new(it.next()?),
10112                }
10113            }
10114            "SHA" | "SHA1" => {
10115                let mut it = args.into_iter();
10116                TypedFunction::Sha {
10117                    expr: Box::new(it.next()?),
10118                }
10119            }
10120            "SHA2" | "SHA256" | "SHA512" => {
10121                let mut it = args.into_iter();
10122                let expr = it.next()?;
10123                let bit_length = it.next().unwrap_or(Expr::Number("256".to_string()));
10124                TypedFunction::Sha2 {
10125                    expr: Box::new(expr),
10126                    bit_length: Box::new(bit_length),
10127                }
10128            }
10129
10130            // Not a recognized typed function
10131            _ => return None,
10132        };
10133
10134        Some(Expr::TypedFunction {
10135            func: tf,
10136            filter: None,
10137            over: None,
10138        })
10139    }
10140
10141    /// Try to extract a DateTimeField from a column-name expression.
10142    fn expr_to_datetime_field(expr: &Expr) -> Option<DateTimeField> {
10143        match expr {
10144            Expr::Column {
10145                name, table: None, ..
10146            } => match name.to_uppercase().as_str() {
10147                "YEAR" => Some(DateTimeField::Year),
10148                "QUARTER" => Some(DateTimeField::Quarter),
10149                "MONTH" => Some(DateTimeField::Month),
10150                "WEEK" => Some(DateTimeField::Week),
10151                "DAY" => Some(DateTimeField::Day),
10152                "HOUR" => Some(DateTimeField::Hour),
10153                "MINUTE" => Some(DateTimeField::Minute),
10154                "SECOND" => Some(DateTimeField::Second),
10155                "MILLISECOND" => Some(DateTimeField::Millisecond),
10156                "MICROSECOND" => Some(DateTimeField::Microsecond),
10157                _ => None,
10158            },
10159            Expr::StringLiteral(s) | Expr::NationalStringLiteral(s) => {
10160                match s.to_uppercase().as_str() {
10161                    "YEAR" => Some(DateTimeField::Year),
10162                    "QUARTER" => Some(DateTimeField::Quarter),
10163                    "MONTH" => Some(DateTimeField::Month),
10164                    "WEEK" => Some(DateTimeField::Week),
10165                    "DAY" => Some(DateTimeField::Day),
10166                    "HOUR" => Some(DateTimeField::Hour),
10167                    "MINUTE" => Some(DateTimeField::Minute),
10168                    "SECOND" => Some(DateTimeField::Second),
10169                    "MILLISECOND" => Some(DateTimeField::Millisecond),
10170                    "MICROSECOND" => Some(DateTimeField::Microsecond),
10171                    _ => None,
10172                }
10173            }
10174            _ => None,
10175        }
10176    }
10177
10178    fn parse_case_expr(&mut self) -> Result<Expr> {
10179        self.expect(TokenType::Case)?;
10180
10181        let operand = if self.peek_type() != &TokenType::When {
10182            Some(Box::new(self.parse_expr()?))
10183        } else {
10184            None
10185        };
10186
10187        let mut when_clauses = Vec::new();
10188        while self.match_token(TokenType::When) {
10189            let condition = self.parse_expr()?;
10190            self.expect(TokenType::Then)?;
10191            let result = self.parse_expr()?;
10192            when_clauses.push((condition, result));
10193        }
10194
10195        let else_clause = if self.match_token(TokenType::Else) {
10196            Some(Box::new(self.parse_expr()?))
10197        } else {
10198            None
10199        };
10200
10201        self.expect(TokenType::End)?;
10202
10203        Ok(Expr::Case {
10204            operand,
10205            when_clauses,
10206            else_clause,
10207        })
10208    }
10209}
10210
10211#[cfg(test)]
10212mod tests {
10213    use super::*;
10214
10215    #[test]
10216    fn test_parse_simple_select() {
10217        let stmt = Parser::new("SELECT a, b FROM t")
10218            .unwrap()
10219            .parse_statement()
10220            .unwrap();
10221        match stmt {
10222            Statement::Select(sel) => {
10223                assert_eq!(sel.columns.len(), 2);
10224                assert!(sel.from.is_some());
10225            }
10226            _ => panic!("Expected SELECT"),
10227        }
10228    }
10229
10230    #[test]
10231    fn test_parse_select_with_where() {
10232        let stmt = Parser::new("SELECT x FROM t WHERE x > 10")
10233            .unwrap()
10234            .parse_statement()
10235            .unwrap();
10236        match stmt {
10237            Statement::Select(sel) => assert!(sel.where_clause.is_some()),
10238            _ => panic!("Expected SELECT"),
10239        }
10240    }
10241
10242    #[test]
10243    fn test_parse_select_wildcard() {
10244        let stmt = Parser::new("SELECT * FROM users")
10245            .unwrap()
10246            .parse_statement()
10247            .unwrap();
10248        match stmt {
10249            Statement::Select(sel) => {
10250                assert_eq!(sel.columns.len(), 1);
10251                assert!(matches!(sel.columns[0], SelectItem::Wildcard));
10252            }
10253            _ => panic!("Expected SELECT"),
10254        }
10255    }
10256
10257    #[test]
10258    fn test_parse_insert() {
10259        let stmt = Parser::new("INSERT INTO t (a, b) VALUES (1, 'hello')")
10260            .unwrap()
10261            .parse_statement()
10262            .unwrap();
10263        match stmt {
10264            Statement::Insert(ins) => {
10265                assert_eq!(ins.table.name, "t");
10266                assert_eq!(ins.columns, vec!["a", "b"]);
10267                match &ins.source {
10268                    InsertSource::Values(rows) => {
10269                        assert_eq!(rows.len(), 1);
10270                        assert_eq!(rows[0].len(), 2);
10271                    }
10272                    _ => panic!("Expected VALUES"),
10273                }
10274            }
10275            _ => panic!("Expected INSERT"),
10276        }
10277    }
10278
10279    #[test]
10280    fn test_parse_delete() {
10281        let stmt = Parser::new("DELETE FROM users WHERE id = 1")
10282            .unwrap()
10283            .parse_statement()
10284            .unwrap();
10285        match stmt {
10286            Statement::Delete(del) => {
10287                assert_eq!(del.table.name, "users");
10288                assert!(del.where_clause.is_some());
10289            }
10290            _ => panic!("Expected DELETE"),
10291        }
10292    }
10293
10294    #[test]
10295    fn test_parse_join() {
10296        let stmt = Parser::new("SELECT a.id, b.name FROM a INNER JOIN b ON a.id = b.a_id")
10297            .unwrap()
10298            .parse_statement()
10299            .unwrap();
10300        match stmt {
10301            Statement::Select(sel) => {
10302                assert_eq!(sel.joins.len(), 1);
10303                assert_eq!(sel.joins[0].join_type, JoinType::Inner);
10304            }
10305            _ => panic!("Expected SELECT"),
10306        }
10307    }
10308
10309    #[test]
10310    fn test_parse_cte() {
10311        let stmt = Parser::new("WITH cte AS (SELECT 1 AS x) SELECT x FROM cte")
10312            .unwrap()
10313            .parse_statement()
10314            .unwrap();
10315        match stmt {
10316            Statement::Select(sel) => {
10317                assert_eq!(sel.ctes.len(), 1);
10318                assert_eq!(sel.ctes[0].name, "cte");
10319            }
10320            _ => panic!("Expected SELECT"),
10321        }
10322    }
10323
10324    #[test]
10325    fn test_parse_union() {
10326        let stmt = Parser::new("SELECT 1 UNION ALL SELECT 2")
10327            .unwrap()
10328            .parse_statement()
10329            .unwrap();
10330        match stmt {
10331            Statement::SetOperation(sop) => {
10332                assert_eq!(sop.op, SetOperationType::Union);
10333                assert!(sop.all);
10334            }
10335            _ => panic!("Expected SetOperation"),
10336        }
10337    }
10338
10339    #[test]
10340    fn test_parse_cast() {
10341        let stmt = Parser::new("SELECT CAST(x AS INT) FROM t")
10342            .unwrap()
10343            .parse_statement()
10344            .unwrap();
10345        match stmt {
10346            Statement::Select(sel) => {
10347                if let SelectItem::Expr { expr, .. } = &sel.columns[0] {
10348                    assert!(matches!(expr, Expr::Cast { .. }));
10349                }
10350            }
10351            _ => panic!("Expected SELECT"),
10352        }
10353    }
10354
10355    #[test]
10356    fn test_parse_subquery() {
10357        let stmt = Parser::new("SELECT * FROM (SELECT 1 AS x) AS sub")
10358            .unwrap()
10359            .parse_statement()
10360            .unwrap();
10361        match stmt {
10362            Statement::Select(sel) => {
10363                if let Some(from) = &sel.from {
10364                    assert!(matches!(from.source, TableSource::Subquery { .. }));
10365                }
10366            }
10367            _ => panic!("Expected SELECT"),
10368        }
10369    }
10370
10371    #[test]
10372    fn test_parse_exists() {
10373        let stmt = Parser::new("SELECT * FROM t WHERE EXISTS (SELECT 1 FROM t2)")
10374            .unwrap()
10375            .parse_statement()
10376            .unwrap();
10377        match stmt {
10378            Statement::Select(sel) => {
10379                assert!(sel.where_clause.is_some());
10380            }
10381            _ => panic!("Expected SELECT"),
10382        }
10383    }
10384
10385    #[test]
10386    fn test_parse_window_function() {
10387        let stmt = Parser::new(
10388            "SELECT ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC) FROM emp",
10389        )
10390        .unwrap()
10391        .parse_statement()
10392        .unwrap();
10393        match stmt {
10394            Statement::Select(sel) => {
10395                if let SelectItem::Expr { expr, .. } = &sel.columns[0] {
10396                    match expr {
10397                        Expr::TypedFunction { over, .. } => {
10398                            assert!(over.is_some());
10399                        }
10400                        Expr::Function { over, .. } => {
10401                            assert!(over.is_some());
10402                        }
10403                        _ => panic!("Expected function"),
10404                    }
10405                }
10406            }
10407            _ => panic!("Expected SELECT"),
10408        }
10409    }
10410
10411    #[test]
10412    fn test_parse_multiple_statements() {
10413        let stmts = Parser::new("SELECT 1; SELECT 2;")
10414            .unwrap()
10415            .parse_statements()
10416            .unwrap();
10417        assert_eq!(stmts.len(), 2);
10418    }
10419
10420    #[test]
10421    fn test_parse_insert_select() {
10422        let stmt = Parser::new("INSERT INTO t SELECT * FROM s")
10423            .unwrap()
10424            .parse_statement()
10425            .unwrap();
10426        match stmt {
10427            Statement::Insert(ins) => {
10428                assert!(matches!(ins.source, InsertSource::Query(_)));
10429            }
10430            _ => panic!("Expected INSERT"),
10431        }
10432    }
10433
10434    #[test]
10435    fn test_parse_create_table_constraints() {
10436        let stmt =
10437            Parser::new("CREATE TABLE t (id INT PRIMARY KEY, name VARCHAR(100) NOT NULL UNIQUE)")
10438                .unwrap()
10439                .parse_statement()
10440                .unwrap();
10441        match stmt {
10442            Statement::CreateTable(ct) => {
10443                assert_eq!(ct.columns.len(), 2);
10444                assert!(ct.columns[0].primary_key);
10445                assert!(ct.columns[1].unique);
10446            }
10447            _ => panic!("Expected CREATE TABLE"),
10448        }
10449    }
10450
10451    #[test]
10452    fn test_parse_extract() {
10453        let stmt = Parser::new("SELECT EXTRACT(YEAR FROM created_at) FROM t")
10454            .unwrap()
10455            .parse_statement()
10456            .unwrap();
10457        match stmt {
10458            Statement::Select(sel) => {
10459                if let SelectItem::Expr { expr, .. } = &sel.columns[0] {
10460                    assert!(matches!(expr, Expr::Extract { .. }));
10461                }
10462            }
10463            _ => panic!("Expected SELECT"),
10464        }
10465    }
10466
10467    #[test]
10468    fn test_parse_postgres_cast() {
10469        let stmt = Parser::new("SELECT x::int FROM t")
10470            .unwrap()
10471            .parse_statement()
10472            .unwrap();
10473        match stmt {
10474            Statement::Select(sel) => {
10475                if let SelectItem::Expr { expr, .. } = &sel.columns[0] {
10476                    assert!(matches!(expr, Expr::Cast { .. }));
10477                }
10478            }
10479            _ => panic!("Expected SELECT"),
10480        }
10481    }
10482
10483    #[test]
10484    fn test_parse_on_conflict_expression_targets() {
10485        let stmt = Parser::new(
10486            "INSERT INTO t VALUES (1, 'Crowberry') ON CONFLICT (lower(fruit) collate \"C\" text_pattern_ops, key) DO NOTHING",
10487        )
10488        .unwrap()
10489        .parse_statement()
10490        .unwrap();
10491
10492        match stmt {
10493            Statement::Insert(ins) => {
10494                let on_conflict = ins.on_conflict.expect("Expected ON CONFLICT");
10495                assert_eq!(on_conflict.columns.len(), 2);
10496                assert!(on_conflict.columns[0].starts_with("lower"));
10497                assert!(on_conflict.columns[0].contains("text_pattern_ops"));
10498                assert_eq!(on_conflict.columns[1], "key");
10499            }
10500            _ => panic!("Expected INSERT"),
10501        }
10502    }
10503
10504    #[test]
10505    fn test_parse_postgres_operator_sequences() {
10506        let cases = [
10507            "SELECT * FROM box_temp WHERE f1 <<| '(10,4.33334),(5,100)'",
10508            "SELECT * FROM box_temp WHERE f1 &<| '(10,4.3333334),(5,1)'",
10509            "SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth'",
10510        ];
10511
10512        for sql in &cases {
10513            let stmt = Parser::new(sql).unwrap().parse_statement().unwrap();
10514            assert!(matches!(stmt, Statement::Select(_)));
10515        }
10516    }
10517}
10518
10519/// Attach comments to the appropriate field on a parsed statement.
10520fn attach_comments_to_statement(stmt: &mut Statement, comments: Vec<String>) {
10521    match stmt {
10522        Statement::Select(s) => s.comments = comments,
10523        Statement::Insert(s) => s.comments = comments,
10524        Statement::Update(s) => s.comments = comments,
10525        Statement::Delete(s) => s.comments = comments,
10526        Statement::CreateTable(s) => s.comments = comments,
10527        Statement::DropTable(s) => s.comments = comments,
10528        Statement::SetOperation(s) => s.comments = comments,
10529        Statement::AlterTable(s) => s.comments = comments,
10530        Statement::CreateView(s) => s.comments = comments,
10531        Statement::DropView(s) => s.comments = comments,
10532        Statement::Truncate(s) => s.comments = comments,
10533        Statement::Explain(s) => s.comments = comments,
10534        Statement::Use(s) => s.comments = comments,
10535        Statement::Merge(s) => s.comments = comments,
10536        Statement::Command(s) => s.comments = comments,
10537        // Transaction and Expression don't have comment fields
10538        Statement::Transaction(_) | Statement::Expression(_) => {}
10539    }
10540}