Skip to main content

sqlglot_rust/parser/
sql_parser.rs

1use crate::ast::*;
2use crate::errors::{Result, SqlglotError};
3use crate::tokens::{Token, TokenType, Tokenizer};
4
5/// Convert a token's `quote_char` into a `QuoteStyle`.
6fn quote_style_from_char(c: char) -> QuoteStyle {
7    match c {
8        '"' => QuoteStyle::DoubleQuote,
9        '`' => QuoteStyle::Backtick,
10        '[' => QuoteStyle::Bracket,
11        _ => QuoteStyle::None,
12    }
13}
14
15/// A recursive-descent SQL parser.
16///
17/// Supports CTEs (WITH), subqueries, UNION/INTERSECT/EXCEPT, CAST,
18/// window functions (OVER), EXISTS, EXTRACT, INTERVAL, and more.
19pub struct Parser {
20    tokens: Vec<Token>,
21    pos: usize,
22    /// Whether to preserve comments during parsing.
23    #[allow(dead_code)]
24    preserve_comments: bool,
25    /// Accumulated comments pending attachment to the next AST node.
26    pending_comments: Vec<String>,
27}
28
29impl Parser {
30    /// Create a new parser from a SQL string.
31    pub fn new(sql: &str) -> Result<Self> {
32        let mut tokenizer = Tokenizer::new(sql);
33        let tokens = tokenizer.tokenize()?;
34        Ok(Self {
35            tokens,
36            pos: 0,
37            preserve_comments: false,
38            pending_comments: Vec::new(),
39        })
40    }
41
42    /// Create a new parser that preserves SQL comments in the AST.
43    pub fn new_with_comments(sql: &str) -> Result<Self> {
44        let mut tokenizer = Tokenizer::with_comments(sql);
45        let tokens = tokenizer.tokenize()?;
46        Ok(Self {
47            tokens,
48            pos: 0,
49            preserve_comments: true,
50            pending_comments: Vec::new(),
51        })
52    }
53
54    // ── Comment helpers ────────────────────────────────────────────
55
56    /// Consume any comment tokens at the current position, accumulating
57    /// their text into `pending_comments`.
58    fn collect_comments(&mut self) {
59        while self.pos < self.tokens.len() {
60            match self.tokens[self.pos].token_type {
61                TokenType::LineComment | TokenType::BlockComment => {
62                    let token = &self.tokens[self.pos];
63                    self.pending_comments.push(token.value.clone());
64                    self.pos += 1;
65                }
66                _ => break,
67            }
68        }
69    }
70
71    /// Take all pending comments, leaving the buffer empty.
72    fn take_comments(&mut self) -> Vec<String> {
73        std::mem::take(&mut self.pending_comments)
74    }
75
76    // ── Token helpers ──────────────────────────────────────────────
77
78    fn peek(&self) -> &Token {
79        &self.tokens[self.pos.min(self.tokens.len() - 1)]
80    }
81
82    fn peek_type(&self) -> &TokenType {
83        &self.peek().token_type
84    }
85
86    fn advance(&mut self) -> &Token {
87        let token = &self.tokens[self.pos.min(self.tokens.len() - 1)];
88        if self.pos < self.tokens.len() {
89            self.pos += 1;
90        }
91        token
92    }
93
94    fn expect(&mut self, expected: TokenType) -> Result<Token> {
95        let token = self.peek().clone();
96        if token.token_type == expected {
97            self.advance();
98            Ok(token)
99        } else {
100            Err(SqlglotError::ParserError {
101                message: format!(
102                    "Expected {expected:?}, got {:?} ('{}') at line {} col {}",
103                    token.token_type, token.value, token.line, token.col
104                ),
105            })
106        }
107    }
108
109    fn match_token(&mut self, expected: TokenType) -> bool {
110        if self.peek().token_type == expected {
111            self.advance();
112            true
113        } else {
114            false
115        }
116    }
117
118    /// Check if the current token's uppercased value matches a keyword string.
119    fn check_keyword(&self, keyword: &str) -> bool {
120        self.peek().value.to_uppercase() == keyword
121    }
122
123    /// Check if the token at `current + offset` matches a keyword string.
124    fn check_keyword_offset(&self, keyword: &str, offset: usize) -> bool {
125        let idx = self.pos + offset;
126        if idx < self.tokens.len() {
127            self.tokens[idx].value.to_uppercase() == keyword
128        } else {
129            false
130        }
131    }
132
133    /// Match a keyword by string value (for multi-word context-sensitive keywords).
134    fn match_keyword(&mut self, keyword: &str) -> bool {
135        if self.check_keyword(keyword) {
136            self.advance();
137            true
138        } else {
139            false
140        }
141    }
142
143    /// Expect a keyword by string value, returning an error if not found.
144    fn expect_keyword(&mut self, keyword: &str) -> Result<()> {
145        if self.check_keyword(keyword) {
146            self.advance();
147            Ok(())
148        } else {
149            let token = self.peek().clone();
150            Err(SqlglotError::ParserError {
151                message: format!(
152                    "Expected keyword '{keyword}', got '{value}' at line {line} col {col}",
153                    value = token.value,
154                    line = token.line,
155                    col = token.col
156                ),
157            })
158        }
159    }
160
161    /// Reconstruct a single token's surface representation for raw command
162    /// preservation. String literals are wrapped in their original quotes;
163    /// identifiers may carry a quote_char from the tokenizer.
164    fn token_text(token: &Token) -> String {
165        match token.token_type {
166            TokenType::String => format!("'{}'", token.value.replace('\'', "''")),
167            TokenType::Identifier if token.quote_char != '\0' => {
168                let (l, r) = match token.quote_char {
169                    '[' => ('[', ']'),
170                    c => (c, c),
171                };
172                format!("{l}{}{r}", token.value)
173            }
174            _ => token.value.clone(),
175        }
176    }
177
178    /// Join a slice of tokens with whitespace tuned for SQL — no space
179    /// before `,` `)` `;` `.`, no space after `(` or `.`.
180    fn join_tokens_for_raw(tokens: &[Token]) -> String {
181        let mut out = String::new();
182        let mut prev_no_space_after = true; // suppress leading space
183        for t in tokens {
184            let no_space_before = matches!(
185                t.token_type,
186                TokenType::Comma
187                    | TokenType::RParen
188                    | TokenType::Semicolon
189                    | TokenType::Dot
190                    | TokenType::RBracket
191            );
192            if !out.is_empty() && !prev_no_space_after && !no_space_before {
193                out.push(' ');
194            }
195            out.push_str(&Self::token_text(t));
196            prev_no_space_after = matches!(
197                t.token_type,
198                TokenType::LParen | TokenType::Dot | TokenType::LBracket
199            );
200        }
201        out
202    }
203
204    /// Consume tokens up to (but not including) the next top-level `;` or EOF,
205    /// returning the raw text of the consumed tokens with whitespace
206    /// reconstructed by [`join_tokens_for_raw`]. Honors parenthesis depth so
207    /// embedded `;` inside `(...)` does not terminate the statement.
208    fn consume_raw_to_statement_end(&mut self) -> String {
209        let start = self.pos;
210        let mut depth: i32 = 0;
211        while self.pos < self.tokens.len() {
212            let tt = &self.tokens[self.pos].token_type;
213            match tt {
214                TokenType::Eof => break,
215                TokenType::Semicolon if depth == 0 => break,
216                TokenType::LParen | TokenType::LBracket => {
217                    depth += 1;
218                    self.pos += 1;
219                }
220                TokenType::RParen | TokenType::RBracket => {
221                    // A closing paren at depth 0 belongs to an enclosing
222                    // context (e.g. CTE body, subquery) — stop without
223                    // consuming it.
224                    if depth == 0 {
225                        break;
226                    }
227                    depth -= 1;
228                    self.pos += 1;
229                }
230                _ => self.pos += 1,
231            }
232        }
233        Self::join_tokens_for_raw(&self.tokens[start..self.pos])
234    }
235
236    /// Parse a comma-separated list of raw items inside an already-opened
237    /// parenthesized context. Stops at the matching `)` and returns each item
238    /// reconstructed from tokens.
239    fn parse_parenthesized_raw_items(&mut self) -> Result<Vec<String>> {
240        let mut items = Vec::new();
241
242        // Allow empty parens for tolerance.
243        if self.match_token(TokenType::RParen) {
244            return Ok(items);
245        }
246
247        loop {
248            let start = self.pos;
249            let mut paren_depth: i32 = 0;
250            let mut bracket_depth: i32 = 0;
251
252            while self.pos < self.tokens.len() {
253                match self.peek_type() {
254                    TokenType::Eof => break,
255                    TokenType::LParen => {
256                        paren_depth += 1;
257                        self.pos += 1;
258                    }
259                    TokenType::RParen => {
260                        if paren_depth == 0 && bracket_depth == 0 {
261                            break;
262                        }
263                        if paren_depth > 0 {
264                            paren_depth -= 1;
265                        }
266                        self.pos += 1;
267                    }
268                    TokenType::LBracket => {
269                        bracket_depth += 1;
270                        self.pos += 1;
271                    }
272                    TokenType::RBracket => {
273                        if bracket_depth > 0 {
274                            bracket_depth -= 1;
275                        }
276                        self.pos += 1;
277                    }
278                    TokenType::Comma if paren_depth == 0 && bracket_depth == 0 => break,
279                    _ => self.pos += 1,
280                }
281            }
282
283            if start == self.pos {
284                let token = self.peek().clone();
285                return Err(SqlglotError::ParserError {
286                    message: format!(
287                        "Expected expression inside parenthesized list, got '{}' at line {} col {}",
288                        token.value, token.line, token.col
289                    ),
290                });
291            }
292
293            items.push(Self::join_tokens_for_raw(&self.tokens[start..self.pos]));
294
295            if self.match_token(TokenType::Comma) {
296                continue;
297            }
298
299            self.expect(TokenType::RParen)?;
300            break;
301        }
302
303        Ok(items)
304    }
305
306    /// Helper for the dispatcher: consume one verb token (already known) and
307    /// then capture the entire tail as a [`CommandStatement`].
308    fn parse_command_kind(&mut self, kind: &str) -> Result<Statement> {
309        self.advance(); // consume the verb token
310        let body = self.consume_raw_to_statement_end();
311        Ok(Statement::Command(CommandStatement {
312            comments: vec![],
313            kind: kind.to_string(),
314            body,
315        }))
316    }
317
318    /// `COMMENT ON {TABLE|COLUMN|...} <name> IS '...'` — preserved as raw.
319    /// `COMMENT` can also appear inside `CREATE TABLE` column definitions and
320    /// in other positions; only the standalone DDL form lands here because
321    /// the dispatcher peeks at the *first* token.
322    fn parse_comment_on_command(&mut self) -> Result<Statement> {
323        // Look ahead for "COMMENT ON" — if not "ON", fall back to parser error
324        // (the COMMENT token would otherwise have been consumed inside an
325        // expression / column-def parser, not at statement boundary).
326        if self.peek_offset(1).map(|t| t.value.to_uppercase()) != Some("ON".to_string()) {
327            return Err(SqlglotError::UnexpectedToken {
328                token: self.peek().clone(),
329            });
330        }
331        self.advance(); // COMMENT
332        let body = self.consume_raw_to_statement_end();
333        Ok(Statement::Command(CommandStatement {
334            comments: vec![],
335            kind: "COMMENT".to_string(),
336            body,
337        }))
338    }
339
340    /// Returns `true` when the current Identifier token is a known
341    /// statement-starting verb that we preserve verbatim.
342    fn match_command_keyword(&self) -> bool {
343        let v = self.peek().value.to_uppercase();
344        matches!(
345            v.as_str(),
346            "GO" | "DECLARE"
347                | "LOAD"
348                | "REM"
349                | "REMARK"
350                | "RESET"
351                | "PRAGMA"
352                | "VACUUM"
353                | "REINDEX"
354                | "CALL"
355                | "LOCK"
356                | "UNLOCK"
357                | "CLUSTER"
358                | "REFRESH"
359                | "CHECKPOINT"
360                | "LISTEN"
361                | "NOTIFY"
362                | "PREPARE"
363                | "EXECUTE"
364                | "DEALLOCATE"
365                | "DISCARD"
366                | "COPY"
367                | "ATTACH"
368                | "DETACH"
369                | "COMMENT"
370                | "DESCRIBE"
371                | "DESC"
372                | "OPTIMIZE"
373                | "SYSTEM"
374                | "KILL"
375                | "FLUSH"
376                | "RESTORE"
377                | "BACKUP"
378                | "EXCHANGE"
379                | "RENAME"
380                | "WATCH"
381                | "MSCK"
382                | "UNLOAD"
383                | "ASSERT"
384                | "REPAIR"
385                | "PURGE"
386                | "ABORT"
387                | "VALIDATE"
388                | "MOVE"
389                | "CLOSE"
390                | "FETCH"
391                | "REPLICATE"
392                | "START"
393                | "RAISE"
394                | "UNDROP"
395                | "EXCEPTION"
396                | "CONNECT"
397                | "DISCONNECT"
398                | "SEND"
399                | "ENABLE"
400                | "DISABLE"
401                | "REPLAY"
402                | "SYNCHRONIZE"
403                | "CHECK"
404                | "REPORT"
405                | "BIND"
406                | "UNBIND"
407                | "INCLUDE"
408                | "EXPORT"
409                | "IMPORT"
410                | "ADMIN"
411                | "SPLIT"
412                | "TRACE"
413                | "RESUME"
414                | "SUSPEND"
415                | "ROUTE"
416                | "EMIT"
417                | "FOR"
418                | "WHILE"
419                | "LOOP"
420                | "RETURN"
421                | "REPEAT"
422                | "EXIT"
423                | "LEAVE"
424                | "ITERATE"
425                | "CONTINUE"
426                | "GOTO"
427                | "RAISERROR"
428                | "PRINT"
429                | "WAITFOR"
430                | "TRUNCATE"
431                | "DO"
432                | "CONNECTION"
433                | "ELSEIF"
434                | "ELSIF"
435                | "UNTIL"
436                | "CONNECT_BY_ROOT"
437                | "APPLY"
438                | "EXEC"
439                | "OPEN"
440                | "REVERT"
441                | "DEALLOC"
442                | "GRANT"
443                | "REVOKE"
444                | "DENY"
445                | "UNSET"
446                | "USE"
447                | "PRELOAD"
448                | "RECOMPRESS"
449                | "COMPUTE"
450                | "INVALIDATE"
451                | "ANALYSE"
452                | "BOOTSTRAP"
453                | "LATCH"
454                | "UNLATCH"
455                | "SETOF"
456                | "CHECKSUM"
457                | "DELIMITER"
458                | "GET"
459                | "HELP"
460                | "BINLOG"
461                | "RELOAD"
462                | "PARSE"
463                | "BUFFER"
464                | "BUILDS"
465                | "COMPACT"
466                | "FREEZE"
467                | "UNFREEZE"
468                | "BORROW"
469                | "UNLISTEN"
470                | "REPACK"
471                | "RESIGNAL"
472                | "SIGNAL"
473                | "THROW"
474                | "DBCC"
475                | "SUMMARIZE"
476                | "BATCH"
477        )
478    }
479
480    /// Variant of [`parse_command_kind`] for verbs that arrive as an
481    /// Identifier token (no dedicated TokenType).
482    fn parse_command_from_identifier(&mut self) -> Result<Statement> {
483        let verb = self.peek().value.to_uppercase();
484        self.advance();
485        let body = self.consume_raw_to_statement_end();
486        Ok(Statement::Command(CommandStatement {
487            comments: vec![],
488            kind: verb,
489            body,
490        }))
491    }
492
493    /// Look at the token `offset` positions ahead of the current one,
494    /// returning `None` if past EOF.
495    fn peek_offset(&self, offset: usize) -> Option<&Token> {
496        self.tokens.get(self.pos + offset)
497    }
498
499    /// Look ahead past a run of `(` tokens to see if a `SELECT`, `WITH`, or
500    /// `EXPLAIN` keyword starts inside. Used by the subquery parser to detect
501    /// `((SELECT …))` and similar shapes.
502    fn peek_starts_subquery_through_parens(&self) -> bool {
503        let mut i = self.pos;
504        while i < self.tokens.len() && self.tokens[i].token_type == TokenType::LParen {
505            i += 1;
506        }
507        i < self.tokens.len()
508            && matches!(
509                self.tokens[i].token_type,
510                TokenType::Select | TokenType::With | TokenType::Explain | TokenType::From
511            )
512    }
513
514    /// Helper to check if current token is an identifier or keyword that can serve as a name.
515    fn is_name_token(&self) -> bool {
516        matches!(
517            self.peek_type(),
518            TokenType::Identifier
519                | TokenType::All
520                | TokenType::Year
521                | TokenType::Month
522                | TokenType::Day
523                | TokenType::Hour
524                | TokenType::Minute
525                | TokenType::Second
526                | TokenType::Interval
527                | TokenType::Key
528                | TokenType::Filter
529                | TokenType::First
530                | TokenType::Next
531                | TokenType::Only
532                | TokenType::Respect
533                | TokenType::Epoch
534                | TokenType::Schema
535                | TokenType::Database
536                | TokenType::View
537                | TokenType::Collate
538                | TokenType::Comment
539                | TokenType::Left
540                | TokenType::Right
541                | TokenType::Replace
542                | TokenType::Cube
543                | TokenType::Rollup
544                | TokenType::Grouping
545                | TokenType::Pivot
546                | TokenType::Unpivot
547                | TokenType::Sets
548                | TokenType::Range
549                | TokenType::Conflict
550                | TokenType::Unnest
551                | TokenType::Text
552                | TokenType::Show
553                | TokenType::Describe
554                | TokenType::Analyze
555                | TokenType::Index
556                | TokenType::Cast
557                | TokenType::Group
558                | TokenType::Order
559                | TokenType::Explain
560                | TokenType::Table
561                | TokenType::Offset
562                | TokenType::Merge
563                | TokenType::Nulls
564                | TokenType::Temp
565                | TokenType::Temporary
566                | TokenType::Rows
567                | TokenType::Partition
568                | TokenType::Any
569                | TokenType::Escape
570        )
571    }
572
573    /// Consume a name token (identifier or unreserved keyword used as identifier).
574    fn expect_name(&mut self) -> Result<String> {
575        let (name, _) = self.expect_name_with_quote()?;
576        Ok(name)
577    }
578
579    /// If the current token is `@` / `:` / `Parameter` immediately followed by
580    /// a name token (no whitespace tracking — they are adjacent in the token
581    /// stream), consume both and return them as a combined alias name.
582    /// Used to accept auto-generated aliases like `AS @rpm` or `AS :minutes`
583    /// without changing parameter-marker handling elsewhere.
584    fn try_parse_prefixed_alias(&mut self) -> Result<Option<(String, QuoteStyle)>> {
585        let prefix = match self.peek_type() {
586            TokenType::AtSign => '@',
587            TokenType::Colon => ':',
588            // Standalone Parameter token (`$` not absorbed into an identifier).
589            TokenType::Parameter if self.peek().value == "$" => '$',
590            _ => return Ok(None),
591        };
592        let next = match self.peek_offset(1) {
593            Some(t) => t,
594            None => return Ok(None),
595        };
596        let is_name_like = matches!(
597            next.token_type,
598            TokenType::Identifier
599                | TokenType::Year | TokenType::Month | TokenType::Day
600                | TokenType::Hour | TokenType::Minute | TokenType::Second
601                | TokenType::Key | TokenType::Filter | TokenType::First
602                | TokenType::Next | TokenType::Only | TokenType::Schema
603                | TokenType::Database | TokenType::View | TokenType::Collate
604                | TokenType::Comment | TokenType::Replace | TokenType::Text
605                | TokenType::Show | TokenType::Describe | TokenType::Analyze
606                | TokenType::Index | TokenType::Cast | TokenType::Group
607                | TokenType::Order | TokenType::Range
608        );
609        if !is_name_like {
610            return Ok(None);
611        }
612        self.advance(); // consume prefix
613        let name_tok = self.advance().clone();
614        let mut combined = String::with_capacity(name_tok.value.len() + 1);
615        combined.push(prefix);
616        combined.push_str(&name_tok.value);
617        Ok(Some((combined, quote_style_from_char(name_tok.quote_char))))
618    }
619
620    /// Like `expect_name` but also returns the quote style of the token.
621    fn expect_name_with_quote(&mut self) -> Result<(String, QuoteStyle)> {
622        if self.is_name_token() {
623            let token = self.advance().clone();
624            let qs = quote_style_from_char(token.quote_char);
625            let mut name = token.value.clone();
626            // Append trailing `${...}` template variables so identifiers
627            // like `t1_${type}` round-trip as a single name token.
628            while matches!(self.peek_type(), TokenType::Parameter)
629                && self.peek().value.starts_with("${")
630            {
631                name.push_str(&self.advance().value.clone());
632            }
633            return Ok((name, qs));
634        }
635        // Leading `${...}` template variable as a name (rare).
636        if matches!(self.peek_type(), TokenType::Parameter)
637            && self.peek().value.starts_with("${")
638        {
639            let mut name = self.advance().value.clone();
640            // Only fuse plain identifiers or further `${...}` segments —
641            // never reserved keywords (Order, By, etc.) even though those
642            // tokenize as name-like, or the template would swallow the
643            // surrounding clause.
644            while matches!(self.peek_type(), TokenType::Identifier)
645                || (matches!(self.peek_type(), TokenType::Parameter)
646                    && self.peek().value.starts_with("${"))
647            {
648                name.push_str(&self.advance().value.clone());
649            }
650            return Ok((name, QuoteStyle::None));
651        }
652        // ClickHouse typed placeholder used as an identifier:
653        // `{db:Identifier}`, `{tbl:Identifier}`. Accept anywhere a name is
654        // expected so `FROM {db:Identifier}.t` and friends parse.
655        if matches!(self.peek_type(), TokenType::Parameter)
656            && self.peek().value.starts_with('{')
657        {
658            let name = self.advance().value.clone();
659            return Ok((name, QuoteStyle::None));
660        }
661        // Also accept any keyword-like identifier
662        let token = self.peek().clone();
663        if matches!(
664            token.token_type,
665            TokenType::Identifier
666                | TokenType::Int
667                | TokenType::Integer
668                | TokenType::BigInt
669                | TokenType::SmallInt
670                | TokenType::TinyInt
671                | TokenType::Float
672                | TokenType::Double
673                | TokenType::Decimal
674                | TokenType::Numeric
675                | TokenType::Real
676                | TokenType::Varchar
677                | TokenType::Char
678                | TokenType::Text
679                | TokenType::Boolean
680                | TokenType::Date
681                | TokenType::Timestamp
682                | TokenType::TimestampTz
683                | TokenType::Time
684                | TokenType::Interval
685                | TokenType::Blob
686                | TokenType::Bytea
687                | TokenType::Json
688                | TokenType::Jsonb
689                | TokenType::Uuid
690                | TokenType::Array
691                | TokenType::Map
692                | TokenType::Struct
693                | TokenType::Offset
694                | TokenType::Limit
695                | TokenType::Default
696                | TokenType::Begin
697                | TokenType::Recursive
698                | TokenType::Ignore
699                | TokenType::Pivot
700                | TokenType::Unpivot
701                | TokenType::Rows
702                | TokenType::Range
703                | TokenType::Values
704        ) {
705            let t = self.advance().clone();
706            let qs = quote_style_from_char(t.quote_char);
707            Ok((t.value.clone(), qs))
708        } else {
709            Err(SqlglotError::ParserError {
710                message: format!(
711                    "Expected identifier, got {:?} ('{}') at line {} col {}",
712                    token.token_type, token.value, token.line, token.col
713                ),
714            })
715        }
716    }
717
718    // ── Top-level parsing ──────────────────────────────────────────
719
720    /// Parse a single SQL statement.
721    pub fn parse_statement(&mut self) -> Result<Statement> {
722        self.collect_comments();
723        let stmt = self.parse_statement_inner()?;
724        // ClickHouse trailing `WITH TOTALS` / `WITH TIES` / `WITH ROLLUP` /
725        // `WITH CUBE` postfix at the end of a SELECT — these are query-level
726        // modifiers we don't model; swallow them so the statement closes.
727        if matches!(self.peek_type(), TokenType::With) {
728            let after = self.peek_offset(1);
729            let is_postfix_modifier = after
730                .map(|t| {
731                    matches!(t.token_type, TokenType::Identifier | TokenType::Cube | TokenType::Rollup)
732                        && matches!(
733                            t.value.to_uppercase().as_str(),
734                            "TOTALS" | "TIES" | "FILL" | "ROLLUP" | "CUBE"
735                        )
736                })
737                .unwrap_or(false);
738            if is_postfix_modifier {
739                self.advance();
740                self.advance();
741                // Swallow any chained option words up to `;`/EOF/FORMAT/SETTINGS.
742                while !matches!(
743                    self.peek_type(),
744                    TokenType::Semicolon | TokenType::Eof
745                ) {
746                    if self.is_name_token()
747                        && matches!(
748                            self.peek().value.to_uppercase().as_str(),
749                            "SETTINGS" | "FORMAT"
750                        )
751                    {
752                        break;
753                    }
754                    self.advance();
755                }
756            }
757        }
758        // ClickHouse trailing `SETTINGS k=v, k=v` clause / `FORMAT name`
759        // (statement-level). Swallow up to the next `;` or EOF.
760        if self.is_name_token()
761            && matches!(
762                self.peek().value.to_uppercase().as_str(),
763                "SETTINGS" | "FORMAT"
764            )
765        {
766            while !matches!(self.peek_type(), TokenType::Semicolon | TokenType::Eof) {
767                self.advance();
768            }
769        }
770        // BigQuery pipe-syntax: `<query> |> WHERE … |> AGGREGATE … |> …`.
771        // The `|>` operator chains query stages. We don't model them; swallow
772        // the entire chain to end of statement so the leading query stands.
773        if self.peek_type() == &TokenType::BitwiseOr
774            && self.peek_offset(1).map(|t| matches!(t.token_type, TokenType::Gt)).unwrap_or(false)
775        {
776            while !matches!(self.peek_type(), TokenType::Semicolon | TokenType::Eof) {
777                self.advance();
778            }
779        }
780        // Consume trailing semicolons
781        while self.match_token(TokenType::Semicolon) {}
782        Ok(stmt)
783    }
784
785    fn parse_statement_inner(&mut self) -> Result<Statement> {
786        self.collect_comments();
787        let comments = self.take_comments();
788        // MySQL / PSM labeled block: `mylabel: BEGIN … END mylabel`.
789        // Swallow the leading `<name>:` so the block dispatches normally.
790        if self.is_name_token()
791            && matches!(
792                self.peek_offset(1).map(|t| &t.token_type),
793                Some(TokenType::Colon)
794            )
795        {
796            let saved = self.pos;
797            self.advance();
798            self.advance();
799            // Only treat as a label if a known block keyword follows;
800            // otherwise rewind so we don't misinterpret `alias: type`.
801            let is_block = matches!(
802                self.peek_type(),
803                TokenType::Begin | TokenType::If | TokenType::Case
804            ) || self.check_keyword("WHILE")
805                || self.check_keyword("LOOP")
806                || self.check_keyword("FOR")
807                || self.check_keyword("REPEAT");
808            if !is_block {
809                self.pos = saved;
810            }
811        }
812        let mut stmt = match self.peek_type() {
813            TokenType::With => self.parse_with_statement(),
814            TokenType::Select => {
815                let select = self.parse_select_body(vec![])?;
816                self.maybe_parse_set_operation(Statement::Select(select))
817            }
818            TokenType::LParen => {
819                // Could be a parenthesized SELECT / VALUES / TABLE form.
820                let saved_pos = self.pos;
821                self.advance(); // consume '('
822                if matches!(
823                    self.peek_type(),
824                    TokenType::Select
825                        | TokenType::With
826                        | TokenType::From
827                        | TokenType::Values
828                        | TokenType::Table
829                        | TokenType::LParen
830                ) {
831                    let inner = self.parse_statement_inner()?;
832                    self.expect(TokenType::RParen)?;
833                    self.maybe_parse_set_operation(inner)
834                } else {
835                    self.pos = saved_pos;
836                    Err(SqlglotError::ParserError {
837                        message: "Expected statement".into(),
838                    })
839                }
840            }
841            TokenType::Insert => self.parse_insert().map(Statement::Insert),
842            TokenType::Replace => self.parse_insert().map(Statement::Insert),
843            TokenType::Update => self.parse_update().map(Statement::Update),
844            TokenType::Delete => self.parse_delete().map(Statement::Delete),
845            TokenType::Merge => self.parse_merge().map(Statement::Merge),
846            TokenType::Create => self.parse_create_or_command(),
847            TokenType::Drop => self.parse_drop(),
848            TokenType::Alter => self.parse_alter_or_command(),
849            TokenType::Truncate => {
850                let saved = self.pos;
851                match self.parse_truncate() {
852                    Ok(t) => {
853                        // Tolerate Oracle-flavored trailing modifiers on
854                        // TRUNCATE (PURGE, DROP STORAGE, REUSE STORAGE,
855                        // KEEP …, CASCADE, etc.) by swallowing all trailing
856                        // tokens up to the statement boundary.
857                        while !matches!(
858                            self.peek_type(),
859                            TokenType::Eof | TokenType::Semicolon
860                        ) {
861                            self.advance();
862                        }
863                        Ok(Statement::Truncate(t))
864                    }
865                    Err(_) => {
866                        self.pos = saved;
867                        self.parse_command_kind("TRUNCATE")
868                    }
869                }
870            }
871            TokenType::Begin | TokenType::Commit | TokenType::Rollback | TokenType::Savepoint => {
872                // PL/pgSQL / MySQL stored-procedure block: `BEGIN <stmt> …
873                // END`. If `BEGIN` is followed by anything that isn't an
874                // obvious transaction modifier, capture the whole block as
875                // a command so the surrounding parse completes.
876                if matches!(self.peek_type(), TokenType::Begin) {
877                    let next = self.peek_offset(1).map(|t| &t.token_type);
878                    let is_psm_block = matches!(
879                        next,
880                        Some(TokenType::Identifier)
881                            | Some(TokenType::If)
882                            | Some(TokenType::Case)
883                            | Some(TokenType::Select)
884                            | Some(TokenType::Insert)
885                            | Some(TokenType::Update)
886                            | Some(TokenType::Delete)
887                    );
888                    if is_psm_block {
889                        return self.parse_command_kind("BEGIN");
890                    }
891                }
892                self.parse_transaction().map(Statement::Transaction)
893            }
894            TokenType::Explain => self.parse_explain().map(Statement::Explain),
895            TokenType::Use => self.parse_use().map(Statement::Use),
896            // Raw-tail command statements: SET / SHOW / DESCRIBE / ANALYZE
897            // (when standalone, not as part of EXPLAIN) / COMMENT ON ... .
898            // We preserve the verb plus the entire remainder up to `;` or EOF
899            // so the AST round-trips even though we don't model these in detail.
900            TokenType::Set => self.parse_command_kind("SET"),
901            TokenType::Show => self.parse_command_kind("SHOW"),
902            TokenType::Describe => self.parse_command_kind("DESCRIBE"),
903            // `DESC <name>` is a Hive/MySQL synonym for DESCRIBE. The lone
904            // `Desc` token also appears mid-statement (ORDER BY x DESC), so
905            // we only treat it as a statement when at the very start.
906            TokenType::Desc => self.parse_command_kind("DESC"),
907            // Hive multi-insert: `FROM tbl INSERT OVERWRITE TABLE x SELECT ...`
908            // [INSERT OVERWRITE TABLE y SELECT ...]+. Capture the whole thing
909            // as a raw command body so it round-trips.
910            TokenType::From => {
911                // Hive `FROM tbl INSERT OVERWRITE TABLE x …` / `FROM tbl
912                // SELECT cols`. DuckDB implicit SELECT: `FROM tbl …`. Try
913                // the structured DuckDB FROM-first parse only when there is
914                // no INSERT/SELECT marker at the top paren level; otherwise
915                // capture as a raw command so it round-trips. Fall back to
916                // command capture on parse failure as well.
917                let mut i = self.pos + 1;
918                let mut depth = 0i32;
919                let mut hive = false;
920                while i < self.tokens.len() {
921                    match &self.tokens[i].token_type {
922                        TokenType::Eof | TokenType::Semicolon => break,
923                        TokenType::LParen => depth += 1,
924                        TokenType::RParen => {
925                            if depth == 0 { break; }
926                            depth -= 1;
927                        }
928                        TokenType::Insert | TokenType::Select if depth == 0 => {
929                            hive = true;
930                            break;
931                        }
932                        _ => {}
933                    }
934                    i += 1;
935                }
936                if hive {
937                    self.parse_command_kind("FROM")
938                } else {
939                    let saved_from = self.pos;
940                    match self.parse_select_body(vec![]) {
941                        Ok(select) => self
942                            .maybe_parse_set_operation(Statement::Select(select)),
943                        Err(_) => {
944                            self.pos = saved_from;
945                            self.parse_command_kind("FROM")
946                        }
947                    }
948                }
949            }
950            TokenType::Analyze => self.parse_command_kind("ANALYZE"),
951            TokenType::Check => self.parse_command_kind("CHECK"),
952            TokenType::Comment => self.parse_comment_on_command(),
953            TokenType::Grant => self.parse_command_kind("GRANT"),
954            TokenType::Revoke => self.parse_command_kind("REVOKE"),
955            // Procedural / control-flow statements (Spark, MySQL stored
956            // procs, PL/SQL, T-SQL): IF / FOR / WHILE / LOOP / CASE blocks
957            // and the matching ELSE / END / WHEN tokens at statement start.
958            // Capture verbatim so the AST round-trips.
959            TokenType::If => self.parse_command_kind("IF"),
960            TokenType::Else => self.parse_command_kind("ELSE"),
961            TokenType::End => self.parse_command_kind("END"),
962            TokenType::Case => self.parse_command_kind("CASE"),
963            TokenType::When => self.parse_command_kind("WHEN"),
964            TokenType::Then => self.parse_command_kind("THEN"),
965            TokenType::Do => self.parse_command_kind("DO"),
966            // Spark: `TABLE name` and `TABLE name |> …` are SELECT-equivalent
967            // shorthand. Capture verbatim so the AST round-trips.
968            TokenType::Table => self.parse_command_kind("TABLE"),
969            TokenType::Values => self.parse_command_kind("VALUES"),
970            // DuckDB SQL-shorthand: `PIVOT tbl ON col USING agg(...)` and
971            // `UNPIVOT tbl ON col INTO ...`. Preserve verbatim.
972            TokenType::Pivot => self.parse_command_kind("PIVOT"),
973            TokenType::Unpivot => self.parse_command_kind("UNPIVOT"),
974            // PG cursor verbs: FETCH, MOVE, CLOSE.
975            TokenType::Fetch => self.parse_command_kind("FETCH"),
976            // Vendor-specific verbs that tokenize as plain identifiers:
977            //   GO (T-SQL batch separator), DECLARE (T-SQL/PL-pgSQL),
978            //   LOAD (PG / MySQL extensions), REM / REMARK (SQL*Plus),
979            //   RESET / PRAGMA / VACUUM / REINDEX (PG / SQLite), CALL (PSM).
980            TokenType::Identifier if self.match_command_keyword() => {
981                self.parse_command_from_identifier()
982            }
983            // PL/pgSQL / MySQL stored-procedure assignment `var := expr` or
984            // `var = expr` at statement position. Preserve verbatim.
985            TokenType::Identifier
986                if matches!(
987                    self.peek_offset(1).map(|t| &t.token_type),
988                    Some(TokenType::Colon)
989                ) && matches!(
990                    self.peek_offset(2).map(|t| &t.token_type),
991                    Some(TokenType::Eq)
992                ) =>
993            {
994                self.parse_command_kind("ASSIGN")
995            }
996            // PL/SQL / PL/pgSQL variable declaration at top level:
997            //   `name TYPE [:= default]`. Some corpora split DECLARE blocks
998            //   into individual lines; treat these as opaque commands.
999            //   Heuristic: <identifier> followed by either a data-type
1000            //   token, or an identifier that looks type-like (uppercase
1001            //   keyword such as NUMBER/VARCHAR2/BOOLEAN/PLS_INTEGER/etc.).
1002            TokenType::Identifier
1003                if self
1004                    .peek_offset(1)
1005                    .map(|t| {
1006                        self.is_data_type_token_kind(&t.token_type)
1007                            || (matches!(t.token_type, TokenType::Identifier)
1008                                && matches!(
1009                                    t.value.to_uppercase().as_str(),
1010                                    "NUMBER"
1011                                        | "VARCHAR2"
1012                                        | "NVARCHAR2"
1013                                        | "PLS_INTEGER"
1014                                        | "BINARY_INTEGER"
1015                                        | "ROWID"
1016                                        | "UROWID"
1017                                        | "CLOB"
1018                                        | "NCLOB"
1019                                        | "BFILE"
1020                                        | "LONG"
1021                                        | "RAW"
1022                                        | "XMLTYPE"
1023                                        | "RECORD"
1024                                ))
1025                            || matches!(t.token_type, TokenType::Percent | TokenType::Percent2)
1026                    })
1027                    .unwrap_or(false)
1028                    && self
1029                        .peek_offset(2)
1030                        .map(|t| {
1031                            // Confirm declaration shape: trailing `:=`,
1032                            // `%TYPE`/`%ROWTYPE`, semicolon, EOF, or
1033                            // `(precision)` parenthesised type modifier.
1034                            matches!(
1035                                t.token_type,
1036                                TokenType::Colon
1037                                    | TokenType::Semicolon
1038                                    | TokenType::Eof
1039                                    | TokenType::Percent
1040                                    | TokenType::Percent2
1041                                    | TokenType::LParen
1042                            ) || matches!(
1043                                t.token_type,
1044                                TokenType::Identifier
1045                            ) && matches!(
1046                                t.value.to_uppercase().as_str(),
1047                                "NOT" | "DEFAULT" | "CONSTANT"
1048                            )
1049                        })
1050                        .unwrap_or(true) =>
1051            {
1052                self.parse_command_kind("PLSQL_DECL")
1053            }
1054            _ => Err(SqlglotError::UnexpectedToken {
1055                token: self.peek().clone(),
1056            }),
1057        }?;
1058        if !comments.is_empty() {
1059            attach_comments_to_statement(&mut stmt, comments);
1060        }
1061        Ok(stmt)
1062    }
1063
1064    /// Parse multiple statements separated by semicolons.
1065    pub fn parse_statements(&mut self) -> Result<Vec<Statement>> {
1066        let mut stmts = Vec::new();
1067        while !matches!(self.peek_type(), TokenType::Eof) {
1068            while self.match_token(TokenType::Semicolon) {}
1069            if matches!(self.peek_type(), TokenType::Eof) {
1070                break;
1071            }
1072            stmts.push(self.parse_statement()?);
1073            // ClickHouse trailing `FORMAT <name>` after a statement is a
1074            // client-side output directive, not part of the AST. Swallow
1075            // it (and any whitespace-separated payload up to the next
1076            // semicolon / EOF) so the statement still parses.
1077            if self.peek().value.eq_ignore_ascii_case("FORMAT") {
1078                let saved = self.pos;
1079                self.advance();
1080                if self.is_name_token() {
1081                    self.advance();
1082                    while !matches!(
1083                        self.peek_type(),
1084                        TokenType::Eof | TokenType::Semicolon
1085                    ) {
1086                        self.advance();
1087                    }
1088                } else {
1089                    self.pos = saved;
1090                }
1091            }
1092        }
1093        Ok(stmts)
1094    }
1095
1096    // ── WITH / CTE parsing ─────────────────────────────────────────
1097
1098    fn parse_with_statement(&mut self) -> Result<Statement> {
1099        self.expect(TokenType::With)?;
1100        let recursive = self.match_token(TokenType::Recursive);
1101
1102        // T-SQL `WITH XMLNAMESPACES ('uri' AS prefix [, ...]) <stmt>`. The
1103        // XML namespaces are not modeled in the AST; swallow the keyword
1104        // and its parenthesized binding list opaquely so the surrounding
1105        // SELECT / INSERT / UPDATE / DELETE / MERGE parses cleanly.
1106        if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("XMLNAMESPACES") {
1107            self.advance(); // XMLNAMESPACES
1108            if self.match_token(TokenType::LParen) {
1109                let mut depth = 1_i32;
1110                while depth > 0 && !matches!(self.peek_type(), TokenType::Eof) {
1111                    match self.peek_type() {
1112                        TokenType::LParen => depth += 1,
1113                        TokenType::RParen => depth -= 1,
1114                        _ => {}
1115                    }
1116                    self.advance();
1117                }
1118            }
1119            return self.parse_with_body(vec![]);
1120        }
1121
1122        // ClickHouse scalar-binding form: `WITH (expr) AS name [, ...] SELECT …`
1123        // (and the symmetric `WITH expr AS name`). Detect by peeking for a
1124        // `<expr> AS <name>` pattern rather than the canonical `<name> AS
1125        // (select …)`. We swallow these bindings — they aren't modeled as
1126        // CTEs — then fall through to the main query.
1127        if self.is_clickhouse_scalar_with() {
1128            loop {
1129                let _ = self.parse_expr()?;
1130                self.expect(TokenType::As)?;
1131                // The binding name may use a data-type keyword (`Uuid`,
1132                // `Text`, etc.) — accept any single token that isn't a
1133                // structural delimiter so the loop advances.
1134                if self.is_name_token() || self.is_data_type_token() {
1135                    self.advance();
1136                } else if !matches!(
1137                    self.peek_type(),
1138                    TokenType::Comma | TokenType::Eof | TokenType::Semicolon
1139                        | TokenType::Select | TokenType::Insert
1140                        | TokenType::Update | TokenType::Delete | TokenType::Merge
1141                ) {
1142                    self.advance();
1143                }
1144                if !self.match_token(TokenType::Comma) {
1145                    break;
1146                }
1147                // The next binding might still be `name AS (select …)`; if so,
1148                // fall back to the canonical CTE parser for the remainder.
1149                if !self.is_clickhouse_scalar_with() {
1150                    let mut ctes = vec![self.parse_cte(recursive)?];
1151                    while self.match_token(TokenType::Comma) {
1152                        ctes.push(self.parse_cte(recursive)?);
1153                    }
1154                    return self.parse_with_body(ctes);
1155                }
1156            }
1157            return self.parse_with_body(vec![]);
1158        }
1159
1160        let mut ctes = vec![self.parse_cte(recursive)?];
1161        while self.match_token(TokenType::Comma) {
1162            ctes.push(self.parse_cte(recursive)?);
1163        }
1164        // PostgreSQL recursive-query SEARCH / CYCLE clauses appear between
1165        // the last CTE and the main query body. Swallow them opaquely.
1166        // Forms:
1167        //   SEARCH { DEPTH | BREADTH } FIRST BY <col_list> SET <col>
1168        //   CYCLE <col_list> SET <col> [TO <val> DEFAULT <val>] USING <col>
1169        loop {
1170            let saved = self.pos;
1171            if self.match_keyword("SEARCH") {
1172                let _ = self.match_keyword("DEPTH") || self.match_keyword("BREADTH");
1173                let _ = self.match_keyword("FIRST");
1174                let _ = self.match_token(TokenType::By);
1175                // Swallow tokens until SET or end-of-search clause.
1176                while !matches!(
1177                    self.peek_type(),
1178                    TokenType::Eof | TokenType::Semicolon
1179                ) && !self.check_keyword("SET")
1180                {
1181                    self.advance();
1182                }
1183                if self.match_keyword("SET") {
1184                    let _ = self.is_name_token() && {
1185                        self.advance();
1186                        true
1187                    };
1188                }
1189                continue;
1190            }
1191            if self.check_keyword("CYCLE") {
1192                self.advance();
1193                while !matches!(
1194                    self.peek_type(),
1195                    TokenType::Select
1196                        | TokenType::Insert
1197                        | TokenType::Update
1198                        | TokenType::Delete
1199                        | TokenType::Merge
1200                        | TokenType::With
1201                        | TokenType::Eof
1202                        | TokenType::Semicolon
1203                ) {
1204                    self.advance();
1205                }
1206                continue;
1207            }
1208            self.pos = saved;
1209            break;
1210        }
1211        self.parse_with_body(ctes)
1212    }
1213
1214    /// Returns true if the current token sequence looks like a ClickHouse
1215    /// scalar `WITH expr AS name` rather than a canonical `name AS (select …)`
1216    /// CTE binding. Used by [`parse_with_statement`] to switch parsing modes.
1217    fn is_clickhouse_scalar_with(&self) -> bool {
1218        // Canonical CTE binding starts with `<name>` then either `(` (column
1219        // list) or `AS`. Anything else — a parenthesized expression, a number,
1220        // a string, a function call, an operator — must be the scalar form.
1221        match self.peek_type() {
1222            TokenType::LParen => true,
1223            TokenType::LBracket => true,
1224            TokenType::Number | TokenType::String | TokenType::HexString => true,
1225            t if matches!(t, TokenType::Minus | TokenType::Plus) => true,
1226            _ => {
1227                // Plain identifier followed by anything other than `(` or `AS`
1228                // also indicates the scalar form (e.g. `WITH x + 1 AS y`).
1229                if self.is_name_token() {
1230                    let next = self.peek_offset(1).map(|t| &t.token_type);
1231                    match next {
1232                        Some(TokenType::LParen) => {
1233                            // `name(...)` is canonical column-list form only
1234                            // if the body is a `name [, name]*` followed by
1235                            // `) AS`. Otherwise (function call like
1236                            // `arrayJoin([...])`) it's the scalar form.
1237                            !self.parens_are_name_list_then_as(1)
1238                        }
1239                        Some(TokenType::As) => false,
1240                        _ => true,
1241                    }
1242                } else {
1243                    false
1244                }
1245            }
1246        }
1247    }
1248
1249    /// Starting at `tokens[self.pos + offset]` (which must be `(`), check
1250    /// whether the body is a comma-separated identifier list followed by
1251    /// `)` and then `AS` — the shape of a CTE column-list binding.
1252    fn parens_are_name_list_then_as(&self, offset: usize) -> bool {
1253        let mut i = self.pos + offset;
1254        if self.tokens.get(i).map(|t| &t.token_type) != Some(&TokenType::LParen) {
1255            return false;
1256        }
1257        i += 1;
1258        loop {
1259            // Accept any name-like token in the column list, not just plain
1260            // identifiers — DuckDB CTEs frequently use unreserved keywords
1261            // like `key`, `value`, `order`, `range` as column names.
1262            let is_name_like = matches!(
1263                self.tokens.get(i).map(|t| &t.token_type),
1264                Some(TokenType::Identifier)
1265                    | Some(TokenType::Key)
1266                    | Some(TokenType::Year) | Some(TokenType::Month) | Some(TokenType::Day)
1267                    | Some(TokenType::Hour) | Some(TokenType::Minute) | Some(TokenType::Second)
1268                    | Some(TokenType::Filter) | Some(TokenType::First) | Some(TokenType::Next)
1269                    | Some(TokenType::Only) | Some(TokenType::Schema) | Some(TokenType::Database)
1270                    | Some(TokenType::View) | Some(TokenType::Collate) | Some(TokenType::Comment)
1271                    | Some(TokenType::Replace) | Some(TokenType::Text) | Some(TokenType::Show)
1272                    | Some(TokenType::Describe) | Some(TokenType::Analyze) | Some(TokenType::Index)
1273                    | Some(TokenType::Cast) | Some(TokenType::Group) | Some(TokenType::Order)
1274                    | Some(TokenType::Range) | Some(TokenType::Partition) | Some(TokenType::Rows)
1275                    | Some(TokenType::Table) | Some(TokenType::Offset) | Some(TokenType::Temp)
1276                    | Some(TokenType::Temporary) | Some(TokenType::Nulls) | Some(TokenType::Conflict)
1277                    | Some(TokenType::Unnest) | Some(TokenType::Explain) | Some(TokenType::Merge)
1278                    | Some(TokenType::Any) | Some(TokenType::Escape)
1279            );
1280            if is_name_like {
1281                i += 1;
1282            } else {
1283                return false;
1284            }
1285            match self.tokens.get(i).map(|t| &t.token_type) {
1286                Some(TokenType::Comma) => i += 1,
1287                Some(TokenType::RParen) => {
1288                    i += 1;
1289                    // DuckDB recursive cycle clause: `(cols) USING KEY (...)
1290                    // AS (...)`. Treat the cycle keyword as a sign this is a
1291                    // canonical CTE binding, not a ClickHouse scalar.
1292                    if self.tokens.get(i).map(|t| t.value.to_uppercase())
1293                        == Some("USING".to_string())
1294                    {
1295                        return true;
1296                    }
1297                    if self.tokens.get(i).map(|t| &t.token_type)
1298                        != Some(&TokenType::As)
1299                    {
1300                        return false;
1301                    }
1302                    // Canonical form requires the body after `AS` to be
1303                    // a parenthesized SELECT (or `[NOT] MATERIALIZED (…)`
1304                    // for DuckDB / PostgreSQL). If it isn't, this is the
1305                    // ClickHouse scalar form.
1306                    i += 1;
1307                    let after_as = self.tokens.get(i).map(|t| &t.token_type);
1308                    if after_as == Some(&TokenType::LParen) {
1309                        return true;
1310                    }
1311                    let after_as_value = self.tokens.get(i).map(|t| t.value.as_str());
1312                    if matches!(
1313                        after_as_value,
1314                        Some(v) if v.eq_ignore_ascii_case("MATERIALIZED")
1315                            || v.eq_ignore_ascii_case("NOT")
1316                    ) {
1317                        return true;
1318                    }
1319                    return false;
1320                }
1321                _ => return false,
1322            }
1323        }
1324    }
1325
1326    fn parse_with_body(&mut self, ctes: Vec<Cte>) -> Result<Statement> {
1327        match self.peek_type() {
1328            TokenType::Select => {
1329                let select = self.parse_select_body(ctes)?;
1330                self.maybe_parse_set_operation(Statement::Select(select))
1331            }
1332            // DuckDB `WITH x AS (...) FROM tbl SELECT cols` (FROM-first form).
1333            // We rely on parse_select_body's existing FROM-first tolerance.
1334            TokenType::From => {
1335                let select = self.parse_select_body(ctes)?;
1336                self.maybe_parse_set_operation(Statement::Select(select))
1337            }
1338            // PostgreSQL / DuckDB `WITH x AS (...) TABLE tbl` body — equivalent
1339            // to `SELECT * FROM tbl`. Swallow the table reference and trailing
1340            // clauses opaquely and emit a stub Select so the surrounding
1341            // statement parses cleanly.
1342            // DuckDB / PostgreSQL `TABLE tbl` as the body of a WITH query —
1343            // shorthand for `SELECT * FROM tbl`. Swallow the trailing tokens
1344            // opaquely and emit a stub Select so the surrounding parse runs.
1345            TokenType::Table => {
1346                self.advance();
1347                while !matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon) {
1348                    self.advance();
1349                }
1350                let select = SelectStatement {
1351                    comments: vec![],
1352                    ctes,
1353                    distinct: false,
1354                    top: None,
1355                    columns: vec![SelectItem::Wildcard],
1356                    from: None,
1357                    joins: vec![],
1358                    where_clause: None,
1359                    group_by: vec![],
1360                    having: None,
1361                    order_by: vec![],
1362                    limit: None,
1363                    offset: None,
1364                    fetch_first: None,
1365                    qualify: None,
1366                    window_definitions: vec![],
1367                };
1368                Ok(Statement::Select(select))
1369            }
1370            TokenType::Insert => {
1371                let ins = self.parse_insert()?;
1372                let _ = ctes;
1373                Ok(Statement::Insert(ins))
1374            }
1375            TokenType::Update => {
1376                let upd = self.parse_update()?;
1377                let _ = ctes;
1378                Ok(Statement::Update(upd))
1379            }
1380            TokenType::Delete => {
1381                let del = self.parse_delete()?;
1382                let _ = ctes;
1383                Ok(Statement::Delete(del))
1384            }
1385            TokenType::Merge => {
1386                let mrg = self.parse_merge()?;
1387                let _ = ctes;
1388                Ok(Statement::Merge(mrg))
1389            }
1390            _ => Err(SqlglotError::ParserError {
1391                message: "Expected SELECT or INSERT after WITH clause".into(),
1392            }),
1393        }
1394    }
1395
1396    fn parse_cte(&mut self, recursive: bool) -> Result<Cte> {
1397        let (name, name_quote_style) = self.expect_name_with_quote()?;
1398
1399        let columns = if self.match_token(TokenType::LParen) {
1400            let mut cols = vec![self.expect_name()?];
1401            while self.match_token(TokenType::Comma) {
1402                cols.push(self.expect_name()?);
1403            }
1404            self.expect(TokenType::RParen)?;
1405            cols
1406        } else {
1407            vec![]
1408        };
1409
1410        // DuckDB recursive CTE cycle clause:
1411        //   `WITH RECURSIVE tbl(a, b) USING KEY (a, max(b)) AS (...)`.
1412        // Swallow `USING KEY (...)` opaquely so the surrounding parse runs.
1413        if self.check_keyword("USING") {
1414            let saved = self.pos;
1415            self.advance();
1416            if self.check_keyword("KEY") {
1417                self.advance();
1418                if self.match_token(TokenType::LParen) {
1419                    let mut depth = 1_i32;
1420                    while depth > 0 && !matches!(self.peek_type(), TokenType::Eof) {
1421                        match self.peek_type() {
1422                            TokenType::LParen => depth += 1,
1423                            TokenType::RParen => depth -= 1,
1424                            _ => {}
1425                        }
1426                        self.advance();
1427                    }
1428                }
1429            } else {
1430                self.pos = saved;
1431            }
1432        }
1433
1434        self.expect(TokenType::As)?;
1435        let materialized = if self.match_keyword("MATERIALIZED") {
1436            Some(true)
1437        } else if self.check_keyword("NOT") {
1438            let saved = self.pos;
1439            self.advance();
1440            if self.match_keyword("MATERIALIZED") {
1441                Some(false)
1442            } else {
1443                self.pos = saved;
1444                None
1445            }
1446        } else {
1447            None
1448        };
1449
1450        self.expect(TokenType::LParen)?;
1451        let query = self.parse_statement_inner()?;
1452        self.expect(TokenType::RParen)?;
1453
1454        Ok(Cte {
1455            name,
1456            name_quote_style,
1457            columns,
1458            query: Box::new(query),
1459            materialized,
1460            recursive,
1461        })
1462    }
1463
1464    // ── SELECT ──────────────────────────────────────────────────────
1465
1466    fn parse_select_body(&mut self, ctes: Vec<Cte>) -> Result<SelectStatement> {
1467        // DuckDB allows starting a query with `FROM ...` and implies
1468        // `SELECT *`. Detect that and synthesise the wildcard projection.
1469        let from_first = !matches!(self.peek_type(), TokenType::Select)
1470            && matches!(self.peek_type(), TokenType::From);
1471        if !from_first {
1472            self.expect(TokenType::Select)?;
1473        }
1474
1475        // MySQL `SELECT` modifiers (between SELECT and the column list):
1476        // DISTINCTROW (alias of DISTINCT), HIGH_PRIORITY, STRAIGHT_JOIN,
1477        // SQL_SMALL_RESULT, SQL_BIG_RESULT, SQL_BUFFER_RESULT, SQL_CACHE /
1478        // SQL_NO_CACHE, SQL_CALC_FOUND_ROWS. Swallow any number of these.
1479        let mut distinctrow = false;
1480        loop {
1481            if self.is_name_token() {
1482                let v = self.peek().value.to_uppercase();
1483                if matches!(
1484                    v.as_str(),
1485                    "DISTINCTROW"
1486                        | "HIGH_PRIORITY"
1487                        | "STRAIGHT_JOIN"
1488                        | "SQL_SMALL_RESULT"
1489                        | "SQL_BIG_RESULT"
1490                        | "SQL_BUFFER_RESULT"
1491                        | "SQL_CACHE"
1492                        | "SQL_NO_CACHE"
1493                        | "SQL_CALC_FOUND_ROWS"
1494                ) {
1495                    if v == "DISTINCTROW" {
1496                        distinctrow = true;
1497                    }
1498                    self.advance();
1499                    continue;
1500                }
1501            }
1502            break;
1503        }
1504        let distinct = distinctrow || self.match_token(TokenType::Distinct);
1505        // PostgreSQL / DuckDB `DISTINCT ON (expr, ...)` — swallow the column
1506        // list so the surrounding query parses. We don't model DISTINCT ON in
1507        // the AST; treat it as plain DISTINCT.
1508        if distinct && self.match_token(TokenType::On) {
1509            self.expect(TokenType::LParen)?;
1510            let mut depth = 1;
1511            while depth > 0 {
1512                match self.peek_type() {
1513                    TokenType::LParen => depth += 1,
1514                    TokenType::RParen => {
1515                        depth -= 1;
1516                        if depth == 0 {
1517                            self.advance();
1518                            break;
1519                        }
1520                    }
1521                    TokenType::Eof => break,
1522                    _ => {}
1523                }
1524                self.advance();
1525            }
1526        }
1527        // SQL-standard `SELECT ALL` quantifier (§7.12). Equivalent to omitting
1528        // the quantifier; consume it so it does not get mis-parsed as a column.
1529        if !distinct {
1530            let _ = self.match_token(TokenType::All);
1531        }
1532
1533        // BigQuery `SELECT [DISTINCT] AS STRUCT|VALUE …` — type-tag for the
1534        // implicit row constructor. We don't model it; swallow the prefix.
1535        if self.peek_type() == &TokenType::As {
1536            let v = self
1537                .peek_offset(1)
1538                .map(|t| t.value.to_uppercase())
1539                .unwrap_or_default();
1540            if matches!(v.as_str(), "STRUCT" | "VALUE") {
1541                self.advance(); // AS
1542                self.advance(); // STRUCT|VALUE
1543            }
1544        }
1545
1546        // TOP N (SQL Server style)
1547        // Use parse_primary() instead of parse_expr() to prevent the parser
1548        // from consuming `*` (SELECT all columns) as a multiplication operator.
1549        // This correctly handles: TOP 5, TOP 100, TOP (expr), TOP (@var)
1550        let top = if self.match_token(TokenType::Top) {
1551            Some(Box::new(self.parse_primary()?))
1552        } else {
1553            None
1554        };
1555
1556        let columns = if from_first {
1557            vec![SelectItem::Wildcard]
1558        } else {
1559            self.parse_select_items()?
1560        };
1561
1562        let from = if self.match_token(TokenType::From) {
1563            Some(FromClause {
1564                source: self.parse_table_source()?,
1565            })
1566        } else {
1567            None
1568        };
1569
1570        let joins = self.parse_joins()?;
1571
1572        // ClickHouse `PREWHERE expr` hint clause (sits between FROM/joins and
1573        // WHERE). Parsed as a regular boolean expression and folded into the
1574        // WHERE clause via `AND` so the AST stays simple.
1575        let prewhere = if self.check_keyword("PREWHERE") {
1576            self.advance();
1577            Some(self.parse_expr()?)
1578        } else {
1579            None
1580        };
1581
1582        let where_clause = if self.match_token(TokenType::Where) {
1583            let e = self.parse_expr()?;
1584            // ClickHouse: `WHERE (expr) AS alias` — alias-binds the
1585            // predicate. Swallow the AS-alias tail; we don't model it.
1586            if self.match_token(TokenType::As) && self.is_name_token() {
1587                self.advance();
1588            }
1589            Some(e)
1590        } else {
1591            None
1592        };
1593
1594        let where_clause = match (prewhere, where_clause) {
1595            (Some(pw), Some(w)) => Some(Expr::BinaryOp {
1596                left: Box::new(pw),
1597                op: BinaryOperator::And,
1598                right: Box::new(w),
1599            }),
1600            (Some(pw), None) => Some(pw),
1601            (None, w) => w,
1602        };
1603
1604        // Teradata `PREFERRING <expr> [PARTITION BY <list>]` skyline clause.
1605        // Sits between WHERE and GROUP BY. Swallow opaquely up to a known
1606        // terminator so the surrounding query parses.
1607        if self.check_keyword("PREFERRING") {
1608            self.advance();
1609            loop {
1610                match self.peek_type() {
1611                    TokenType::Eof
1612                    | TokenType::Semicolon
1613                    | TokenType::Group
1614                    | TokenType::Order
1615                    | TokenType::Having
1616                    | TokenType::Qualify
1617                    | TokenType::Limit
1618                    | TokenType::Union
1619                    | TokenType::Intersect
1620                    | TokenType::Except
1621                    | TokenType::RParen => break,
1622                    _ => {}
1623                }
1624                self.advance();
1625            }
1626        }
1627
1628        let group_by = if self.match_token(TokenType::Group) {
1629            self.expect(TokenType::By)?;
1630            let items = self.parse_group_by_list()?;
1631            // ClickHouse / MySQL `GROUP BY ... WITH ROLLUP|CUBE|TOTALS` —
1632            // swallow the modifier; we don't model it in the AST.
1633            if self.match_token(TokenType::With) {
1634                let _ = self.match_token(TokenType::Rollup)
1635                    || self.match_token(TokenType::Cube)
1636                    || self.match_keyword("TOTALS");
1637            }
1638            // Hive / Spark `GROUP BY k1, k2 GROUPING SETS ((k1), (k2))` —
1639            // swallow the trailing parenthesized list.
1640            if self.match_token(TokenType::Grouping) {
1641                if self.check_keyword("SETS") {
1642                    self.advance();
1643                }
1644                if self.match_token(TokenType::LParen) {
1645                    let mut depth = 1;
1646                    while depth > 0 {
1647                        match self.peek_type() {
1648                            TokenType::LParen => depth += 1,
1649                            TokenType::RParen => {
1650                                depth -= 1;
1651                                if depth == 0 {
1652                                    self.advance();
1653                                    break;
1654                                }
1655                            }
1656                            TokenType::Eof => break,
1657                            _ => {}
1658                        }
1659                        self.advance();
1660                    }
1661                }
1662            }
1663            items
1664        } else {
1665            vec![]
1666        };
1667
1668        let having = if self.match_token(TokenType::Having) {
1669            let expr = self.parse_expr()?;
1670            // ClickHouse corpora occasionally include a trailing alias after
1671            // HAVING expression text (`HAVING cond AS x`). Swallow alias so it
1672            // doesn't leak as an unexpected token.
1673            if self.match_token(TokenType::As) && self.is_name_token() {
1674                self.advance();
1675            }
1676            Some(expr)
1677        } else {
1678            None
1679        };
1680
1681        let qualify = if self.match_token(TokenType::Qualify) {
1682            Some(self.parse_expr()?)
1683        } else {
1684            None
1685        };
1686
1687        // Named WINDOW definitions
1688        let window_definitions = if self.match_token(TokenType::Window) {
1689            self.parse_window_definitions()?
1690        } else {
1691            vec![]
1692        };
1693
1694        let order_by = if self.match_token(TokenType::Order) {
1695            self.expect(TokenType::By)?;
1696            self.parse_order_by_items()?
1697        } else {
1698            vec![]
1699        };
1700
1701        // Hive / Spark non-standard ordering clauses; behave syntactically
1702        // like ORDER BY. We parse and discard them so the surrounding query
1703        // continues to parse.
1704        loop {
1705            let is_sort = self.check_keyword("SORT");
1706            let is_distribute = self.check_keyword("DISTRIBUTE");
1707            let is_cluster = self.check_keyword("CLUSTER");
1708            if !(is_sort || is_distribute || is_cluster) {
1709                break;
1710            }
1711            let saved = self.pos;
1712            self.advance();
1713            if self.peek_type() == &TokenType::By {
1714                self.advance();
1715                let _ = self.parse_order_by_items()?;
1716            } else {
1717                self.pos = saved;
1718                break;
1719            }
1720        }
1721
1722        let (mut limit, mut offset) = if self.match_token(TokenType::Limit) {
1723            let first = self.parse_expr()?;
1724            // MySQL / ClickHouse `LIMIT offset, count` form — convert to
1725            // `LIMIT count OFFSET offset`.
1726            if self.match_token(TokenType::Comma) {
1727                let count = self.parse_expr()?;
1728                (Some(count), Some(first))
1729            } else {
1730                (Some(first), None)
1731            }
1732        } else {
1733            (None, None)
1734        };
1735
1736        // ClickHouse `LIMIT N BY col[, ...]` / `LIMIT N BY col LIMIT M` —
1737        // consume the BY-list and an optional outer LIMIT so the trailing
1738        // SETTINGS / FORMAT clauses still parse.
1739        if limit.is_some() && self.match_token(TokenType::By) {
1740            let _ = self.parse_expr_list_allow_item_alias()?;
1741            if self.match_token(TokenType::Limit) {
1742                let _ = self.parse_expr()?;
1743            }
1744        }
1745
1746        if offset.is_none() && self.match_token(TokenType::Offset) {
1747            let expr = self.parse_expr()?;
1748            // T-SQL / ANSI SQL:2008 form: OFFSET n ROWS [FETCH …].
1749            // Consume the optional ROWS/ROW keyword so FETCH can match next.
1750            let _ = self.match_token(TokenType::Rows) || self.match_keyword("ROW");
1751            offset = Some(expr);
1752        } else if offset.is_some() {
1753            // Already populated from `LIMIT a, b`; still consume an explicit
1754            // `OFFSET n` if it appears so it does not leak into the trailer.
1755            if self.match_token(TokenType::Offset) {
1756                let expr = self.parse_expr()?;
1757                let _ = self.match_token(TokenType::Rows) || self.match_keyword("ROW");
1758                offset = Some(expr);
1759            }
1760        }
1761
1762        // Trino / Presto: `OFFSET n LIMIT m` (ordering opposite to MySQL).
1763        // We've parsed OFFSET; accept a trailing LIMIT n.
1764        if limit.is_none() && self.match_token(TokenType::Limit) {
1765            limit = Some(self.parse_expr()?);
1766        }
1767
1768        // FETCH FIRST|NEXT n ROWS ONLY (Oracle / ANSI SQL:2008 / T-SQL)
1769        let fetch_first = if self.match_token(TokenType::Fetch) {
1770            // consume FIRST or NEXT
1771            let _ = self.match_token(TokenType::First) || self.match_token(TokenType::Next);
1772            let count = self.parse_expr()?;
1773            // consume ROWS or ROW
1774            let _ = self.match_keyword("ROWS") || self.match_keyword("ROW");
1775            // consume ONLY
1776            let _ = self.match_token(TokenType::Only);
1777            Some(count)
1778        } else {
1779            None
1780        };
1781
1782        // ClickHouse trailing `WITH TOTALS` / `WITH TIES` / `WITH ROLLUP` /
1783        // `WITH CUBE` / `WITH FILL` modifiers in subquery position. These
1784        // are query-level modifiers we don't model; swallow so the
1785        // surrounding `)` is reached.
1786        if matches!(self.peek_type(), TokenType::With) {
1787            let after = self.peek_offset(1);
1788            let is_postfix_modifier = after
1789                .map(|t| {
1790                    matches!(
1791                        t.token_type,
1792                        TokenType::Identifier | TokenType::Cube | TokenType::Rollup
1793                    ) && matches!(
1794                        t.value.to_uppercase().as_str(),
1795                        "TOTALS" | "TIES" | "FILL" | "ROLLUP" | "CUBE"
1796                    )
1797                })
1798                .unwrap_or(false);
1799            if is_postfix_modifier {
1800                self.advance(); // WITH
1801                self.advance(); // modifier keyword
1802            }
1803        }
1804
1805        // ClickHouse `SETTINGS k = v, ...` / `FORMAT <name>` and MySQL
1806        // `INTO OUTFILE 'file'` style trailing clauses. None of these have
1807        // a dedicated AST representation; consume to keep the surrounding
1808        // statement parseable.
1809        loop {
1810            if self.check_keyword("SETTINGS")
1811                || self.check_keyword("FORMAT")
1812                || self.check_keyword("INTO")
1813            {
1814                self.skip_trailing_options();
1815                break;
1816            }
1817            break;
1818        }
1819
1820        Ok(SelectStatement {
1821            comments: vec![],
1822            ctes,
1823            distinct,
1824            top,
1825            columns,
1826            from,
1827            joins,
1828            where_clause,
1829            group_by,
1830            having,
1831            order_by,
1832            limit,
1833            offset,
1834            fetch_first,
1835            qualify,
1836            window_definitions,
1837        })
1838    }
1839
1840    fn parse_window_definitions(&mut self) -> Result<Vec<WindowDefinition>> {
1841        let mut defs = Vec::new();
1842        loop {
1843            let name = self.expect_name()?;
1844            self.expect(TokenType::As)?;
1845            self.expect(TokenType::LParen)?;
1846            let spec = self.parse_window_spec()?;
1847            self.expect(TokenType::RParen)?;
1848            defs.push(WindowDefinition { name, spec });
1849            if !self.match_token(TokenType::Comma) {
1850                break;
1851            }
1852        }
1853        Ok(defs)
1854    }
1855
1856    /// Check if we should parse a set operation (UNION / INTERSECT / EXCEPT)
1857    fn maybe_parse_set_operation(&mut self, left: Statement) -> Result<Statement> {
1858        let op = match self.peek_type() {
1859            TokenType::Union => SetOperationType::Union,
1860            TokenType::Intersect => SetOperationType::Intersect,
1861            TokenType::Except => SetOperationType::Except,
1862            _ => {
1863                // Spark / Oracle `MINUS` as a synonym for `EXCEPT`.
1864                if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("MINUS") {
1865                    self.advance();
1866                    let all = self.match_token(TokenType::All);
1867                    let _ = self.match_token(TokenType::Distinct);
1868                    let right = self.parse_statement_inner()?;
1869                    return Ok(Statement::SetOperation(SetOperationStatement {
1870                        comments: vec![],
1871                        op: SetOperationType::Except,
1872                        all,
1873                        left: Box::new(left),
1874                        right: Box::new(right),
1875                        order_by: vec![],
1876                        limit: None,
1877                        offset: None,
1878                    }));
1879                }
1880                return Ok(left);
1881            }
1882        };
1883        self.advance();
1884
1885        let all = self.match_token(TokenType::All);
1886        let _ = self.match_token(TokenType::Distinct); // UNION DISTINCT
1887
1888        // DuckDB `UNION ALL BY NAME` / `UNION BY NAME` — column-name-based
1889        // set operation. Swallow the modifier so the inner SELECT parses.
1890        if self.match_token(TokenType::By) {
1891            if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("NAME") {
1892                self.advance();
1893            }
1894        }
1895
1896        let right = self.parse_statement_inner()?;
1897
1898        // Check for further set operations chaining
1899        let combined = Statement::SetOperation(SetOperationStatement {
1900            comments: vec![],
1901            op,
1902            all,
1903            left: Box::new(left),
1904            right: Box::new(right),
1905            order_by: vec![],
1906            limit: None,
1907            offset: None,
1908        });
1909
1910        // Parse trailing ORDER BY / LIMIT / OFFSET that applies to the whole set operation
1911        if matches!(
1912            self.peek_type(),
1913            TokenType::Union | TokenType::Intersect | TokenType::Except
1914        ) {
1915            self.maybe_parse_set_operation(combined)
1916        } else {
1917            // Check for global ORDER BY / LIMIT
1918            if let Statement::SetOperation(mut sop) = combined {
1919                if self.match_token(TokenType::Order) {
1920                    self.expect(TokenType::By)?;
1921                    sop.order_by = self.parse_order_by_items()?;
1922                }
1923                if self.match_token(TokenType::Limit) {
1924                    sop.limit = Some(self.parse_expr()?);
1925                }
1926                if self.match_token(TokenType::Offset) {
1927                    sop.offset = Some(self.parse_expr()?);
1928                }
1929                Ok(Statement::SetOperation(sop))
1930            } else {
1931                Ok(combined)
1932            }
1933        }
1934    }
1935
1936    fn parse_select_items(&mut self) -> Result<Vec<SelectItem>> {
1937        let mut items = vec![self.parse_select_item()?];
1938        while self.match_token(TokenType::Comma) {
1939            // DuckDB / BigQuery / Snowflake allow a trailing comma in the
1940            // SELECT list before `FROM` / end of select clause. Bail out if
1941            // the next token can't start a select item.
1942            if matches!(
1943                self.peek_type(),
1944                TokenType::From
1945                    | TokenType::Where
1946                    | TokenType::Group
1947                    | TokenType::Order
1948                    | TokenType::Limit
1949                    | TokenType::Having
1950                    | TokenType::Qualify
1951                    | TokenType::Eof
1952                    | TokenType::Semicolon
1953                    | TokenType::RParen
1954                    | TokenType::Union
1955                    | TokenType::Intersect
1956                    | TokenType::Except
1957            ) {
1958                break;
1959            }
1960            items.push(self.parse_select_item()?);
1961        }
1962        Ok(items)
1963    }
1964
1965    /// Consume DuckDB / Snowflake star modifiers — `EXCLUDE (...)`,
1966    /// `EXCEPT (...)`, `RENAME (...)`, `REPLACE (...)` — that may follow
1967    /// `*` or `t.*` in a SELECT list. Each modifier may appear at most
1968    /// once; we tolerate any order.
1969    fn swallow_star_modifiers(&mut self) {
1970        loop {
1971            let matched = self.check_keyword("EXCLUDE")
1972                || self.check_keyword("RENAME")
1973                || (self.check_keyword("REPLACE")
1974                    && matches!(
1975                        self.peek_offset(1).map(|t| &t.token_type),
1976                        Some(TokenType::LParen)
1977                    ))
1978                || (self.peek_type() == &TokenType::Except
1979                    && matches!(
1980                        self.peek_offset(1).map(|t| &t.token_type),
1981                        Some(TokenType::LParen)
1982                    ));
1983            // sqlfluff `SELECT * GLOB '…' FROM t` / `* SIMILAR TO '…'` /
1984            // `* LIKE '…'` style column-filter shorthand. Swallow the
1985            // operator and its pattern literal so the rest parses.
1986            let pattern_modifier = if matches!(self.peek_type(), TokenType::Like | TokenType::ILike)
1987                || (self.check_keyword("GLOB")
1988                    || self.check_keyword("REGEXP")
1989                    || self.check_keyword("RLIKE")
1990                    || self.check_keyword("IREGEXP")
1991                    || self.check_keyword("SIMILAR"))
1992            {
1993                let next_is_string =
1994                    matches!(self.peek_offset(1).map(|t| &t.token_type), Some(TokenType::String));
1995                let is_similar_to = self.check_keyword("SIMILAR")
1996                    && self
1997                        .peek_offset(1)
1998                        .map(|t| t.value.eq_ignore_ascii_case("TO"))
1999                        .unwrap_or(false);
2000                next_is_string || is_similar_to
2001            } else {
2002                false
2003            };
2004            if !matched && !pattern_modifier {
2005                break;
2006            }
2007            if pattern_modifier {
2008                // Operator keyword (and optional TO for SIMILAR TO) +
2009                // pattern string. We're tolerant of extra ESCAPE clause.
2010                self.advance(); // GLOB / LIKE / etc.
2011                if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("TO") {
2012                    self.advance();
2013                }
2014                if matches!(self.peek_type(), TokenType::String) {
2015                    self.advance();
2016                    if self.match_token(TokenType::Escape) {
2017                        if matches!(self.peek_type(), TokenType::String) {
2018                            self.advance();
2019                        }
2020                    }
2021                }
2022                continue;
2023            }
2024            self.advance(); // keyword
2025            if self.match_token(TokenType::LParen) {
2026                let mut depth = 1;
2027                while depth > 0 {
2028                    match self.peek_type() {
2029                        TokenType::LParen => depth += 1,
2030                        TokenType::RParen => {
2031                            depth -= 1;
2032                            if depth == 0 {
2033                                self.advance();
2034                                break;
2035                            }
2036                        }
2037                        TokenType::Eof => break,
2038                        _ => {}
2039                    }
2040                    self.advance();
2041                }
2042            } else if self.is_name_token() {
2043                // EXCLUDE col (single-column without parens)
2044                self.advance();
2045            }
2046        }
2047    }
2048
2049    fn parse_select_item(&mut self) -> Result<SelectItem> {
2050        if self.peek().token_type == TokenType::Star {
2051            self.advance();
2052            // DuckDB / Snowflake `* EXCLUDE (col, ...)`,
2053            // `* RENAME (a AS b, ...)`, `* REPLACE (expr AS col, ...)`.
2054            // Swallow the modifier so the surrounding select parses.
2055            self.swallow_star_modifiers();
2056            return Ok(SelectItem::Wildcard);
2057        }
2058
2059        // DuckDB struct-shorthand alias-first form: `alias: expr` in a SELECT
2060        // list. Only fire when we see `<name> :` followed by something that
2061        // is not another `:` (which would form `::` cast) — i.e. a leading
2062        // alias-then-colon pattern. The alias may be any name-like token.
2063        if self.is_name_token() {
2064            let pos1 = self.peek_offset(1).map(|t| &t.token_type);
2065            let pos2 = self.peek_offset(2).map(|t| &t.token_type);
2066            if matches!(pos1, Some(TokenType::Colon)) && !matches!(pos2, Some(TokenType::Colon)) {
2067                // Save state so we can roll back if the trailing expression
2068                // fails to parse (avoids misclassifying obscure forms).
2069                let saved = self.pos;
2070                let alias_tok = self.advance().clone();
2071                self.advance(); // consume ':'
2072                if let Ok(expr) = self.parse_expr() {
2073                    return Ok(SelectItem::Expr {
2074                        expr,
2075                        alias: Some(alias_tok.value),
2076                        alias_quote_style: quote_style_from_char(alias_tok.quote_char),
2077                    });
2078                }
2079                self.pos = saved;
2080            }
2081        }
2082
2083        let expr = self.parse_expr()?;
2084
2085        // Check for table.* pattern
2086        if let Expr::QualifiedWildcard { ref table } = expr {
2087            self.swallow_star_modifiers();
2088            return Ok(SelectItem::QualifiedWildcard {
2089                table: table.clone(),
2090            });
2091        }
2092
2093        // Hive scripting: `SELECT TRANSFORM(cols) [ROW FORMAT ...] USING
2094        // 'cmd' [AS (cols)] [ROW FORMAT ...] [RECORDREADER 'cls']`. The
2095        // tail clauses appear between the function call and `FROM`. We
2096        // don't model the scripting AST yet; swallow opaquely so the rest
2097        // of the SELECT parses.
2098        if matches!(
2099            &expr,
2100            Expr::Function { name, .. } if name.eq_ignore_ascii_case("TRANSFORM")
2101        ) {
2102            while !matches!(
2103                self.peek_type(),
2104                TokenType::From | TokenType::Eof | TokenType::Semicolon | TokenType::Comma
2105            ) {
2106                let v = self.peek().value.to_uppercase();
2107                let is_tail = self.peek_type() == &TokenType::Using
2108                    || self.peek_type() == &TokenType::As
2109                    || matches!(
2110                        v.as_str(),
2111                        "ROW" | "FORMAT" | "SERDE" | "WITH" | "SERDEPROPERTIES"
2112                            | "RECORDREADER" | "RECORDWRITER" | "FIELDS" | "TERMINATED"
2113                            | "BY" | "COLLECTION" | "ITEMS" | "MAP" | "KEYS"
2114                            | "LINES" | "NULL" | "DEFINED" | "STORED" | "DELIMITED"
2115                            | "ESCAPED" | "LOCATION" | "OUTPUTFORMAT" | "INPUTFORMAT"
2116                    );
2117                if !is_tail
2118                    && !matches!(
2119                        self.peek_type(),
2120                        TokenType::String | TokenType::LParen | TokenType::RParen
2121                            | TokenType::Identifier | TokenType::Eq
2122                    )
2123                {
2124                    break;
2125                }
2126                self.advance();
2127            }
2128            return Ok(SelectItem::Expr {
2129                expr,
2130                alias: None,
2131                alias_quote_style: QuoteStyle::None,
2132            });
2133        }
2134
2135        let (alias, alias_quote_style) = match self.parse_optional_alias()? {
2136            Some((name, qs)) => (Some(name), qs),
2137            None => (None, QuoteStyle::None),
2138        };
2139
2140        Ok(SelectItem::Expr {
2141            expr,
2142            alias,
2143            alias_quote_style,
2144        })
2145    }
2146
2147    fn parse_optional_alias(&mut self) -> Result<Option<(String, QuoteStyle)>> {
2148        if self.match_token(TokenType::As) {
2149            // After AS, also accept `@name` / `:name` as an alias. Both forms
2150            // appear in auto-generated SQL corpora (e.g. `AS @rpm`, `AS :minutes`)
2151            // where the symbol is part of the column name from the source data.
2152            if let Some((name, qs)) = self.try_parse_prefixed_alias()? {
2153                return Ok(Some((name, qs)));
2154            }
2155            // PostgreSQL / SQLite tolerate reserved-word literals as aliases
2156            // (`SELECT bool 't' AS true`). Accept TRUE / FALSE / NULL tokens.
2157            if matches!(
2158                self.peek_type(),
2159                TokenType::True | TokenType::False | TokenType::Null
2160            ) {
2161                let token = self.advance().clone();
2162                return Ok(Some((token.value, QuoteStyle::None)));
2163            }
2164            // DuckDB allows column aliases that collide with reserved
2165            // keywords (`AS matched`, `AS or`, `AS using`). After AS, take
2166            // whatever non-structural token appears.
2167            if matches!(
2168                self.peek_type(),
2169                TokenType::Matched
2170                    | TokenType::Or
2171                    | TokenType::And
2172                    | TokenType::Using
2173                    | TokenType::When
2174                    | TokenType::Where
2175                    | TokenType::Asc
2176                    | TokenType::Desc
2177                    | TokenType::Limit
2178                    | TokenType::Group
2179                    | TokenType::Having
2180                    | TokenType::On
2181                    | TokenType::Into
2182                    | TokenType::From
2183                    | TokenType::Order
2184                    | TokenType::Like
2185            ) {
2186                let token = self.advance().clone();
2187                return Ok(Some((token.value, QuoteStyle::None)));
2188            }
2189            return Ok(Some(self.expect_name_with_quote()?));
2190        }
2191        // Implicit alias
2192        if self.is_name_token() {
2193            let peeked_upper = self.peek().value.to_uppercase();
2194            if !matches!(
2195                peeked_upper.as_str(),
2196                "FROM"
2197                    | "WHERE"
2198                    | "GROUP"
2199                    | "ORDER"
2200                    | "LIMIT"
2201                    | "HAVING"
2202                    | "UNION"
2203                    | "INTERSECT"
2204                    | "EXCEPT"
2205                    | "JOIN"
2206                    | "INNER"
2207                    | "LEFT"
2208                    | "RIGHT"
2209                    | "FULL"
2210                    | "CROSS"
2211                    | "ON"
2212                    | "WINDOW"
2213                    | "QUALIFY"
2214                    | "INTO"
2215                    | "SET"
2216                    | "RETURNING"
2217                    | "PIVOT"
2218                    | "UNPIVOT"
2219                    | "PREWHERE"
2220                    | "SETTINGS"
2221                    | "FORMAT"
2222                    | "SAMPLE"
2223                    | "TABLESAMPLE"
2224                    | "LATERAL"
2225                    | "USING"
2226                    | "OFFSET"
2227                    | "FETCH"
2228                    | "FOR"
2229                    | "WITH"
2230                    | "OPTION"
2231                    | "MATCH_RECOGNIZE"
2232                    | "SORT"
2233                    | "DISTRIBUTE"
2234                    | "CLUSTER"
2235                    | "GLOBAL"
2236                    | "PREFERRING"
2237                    | "FORCE"
2238                    | "USE"
2239                    | "IGNORE"
2240                    | "STRAIGHT_JOIN"
2241                    | "DISTRIBUTED"
2242                    | "VALUE"
2243                    | "VALUES"
2244                    | "DEFAULT"
2245                    | "PARTITION"
2246            ) {
2247                let token = self.advance().clone();
2248                let qs = quote_style_from_char(token.quote_char);
2249                return Ok(Some((token.value.clone(), qs)));
2250            }
2251        }
2252        Ok(None)
2253    }
2254
2255    fn parse_table_source(&mut self) -> Result<TableSource> {
2256        let mut source = self.parse_base_table_source()?;
2257        // PostgreSQL table-inheritance star: `FROM parent*` includes all
2258        // child tables. Swallow the trailing `*` so the table alias /
2259        // joins continue to parse.
2260        let _ = self.match_token(TokenType::Star);
2261        // BigQuery / Snowflake / MySQL TiDB time-travel:
2262        //   `<tbl> [FOR SYSTEM_TIME] AS OF [TIMESTAMP] <expr>` or
2263        //   `<tbl> AS OF VERSION <expr>` / `AS OF TIMESTAMP <expr>`.
2264        // We don't model the time-travel clause in the AST; swallow the
2265        // keywords and the expression so the surrounding query parses.
2266        if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("FOR")
2267            && self
2268                .peek_offset(1)
2269                .map(|t| t.value.eq_ignore_ascii_case("SYSTEM_TIME"))
2270                .unwrap_or(false)
2271        {
2272            self.advance(); // FOR
2273            self.advance(); // SYSTEM_TIME
2274        }
2275        if self.peek_type() == &TokenType::As
2276            && self
2277                .peek_offset(1)
2278                .map(|t| t.value.eq_ignore_ascii_case("OF"))
2279                .unwrap_or(false)
2280        {
2281            self.advance(); // AS
2282            self.advance(); // OF
2283            // Optional TIMESTAMP / VERSION qualifier.
2284            if matches!(self.peek_type(), TokenType::Timestamp)
2285                || (self.is_name_token()
2286                    && matches!(
2287                        self.peek().value.to_uppercase().as_str(),
2288                        "VERSION" | "SCN" | "SEQUENCE"
2289                    ))
2290            {
2291                self.advance();
2292            }
2293            let _ = self.parse_expr()?;
2294        }
2295        // Hive / Spark / Trino `TABLESAMPLE [method] (...)` after a table
2296        // reference. We don't model the sample clause in the AST; just
2297        // consume the optional method identifier (BERNOULLI / SYSTEM /
2298        // RESERVOIR) and the parenthesized body so the surrounding query
2299        // parses. Also accept an optional `REPEATABLE (n)` trailer.
2300        if self.match_token(TokenType::Tablesample) {
2301            // Optional sampling method identifier.
2302            if matches!(self.peek_type(), TokenType::Identifier) {
2303                self.advance();
2304            }
2305            if self.match_token(TokenType::LParen) {
2306                let mut depth = 1;
2307                while depth > 0 {
2308                    match self.peek_type() {
2309                        TokenType::LParen => depth += 1,
2310                        TokenType::RParen => {
2311                            depth -= 1;
2312                            if depth == 0 {
2313                                self.advance();
2314                                break;
2315                            }
2316                        }
2317                        TokenType::Eof => break,
2318                        _ => {}
2319                    }
2320                    self.advance();
2321                }
2322            }
2323            if self.check_keyword("REPEATABLE") {
2324                self.advance();
2325                if self.match_token(TokenType::LParen) {
2326                    let mut depth = 1;
2327                    while depth > 0 {
2328                        match self.peek_type() {
2329                            TokenType::LParen => depth += 1,
2330                            TokenType::RParen => {
2331                                depth -= 1;
2332                                if depth == 0 {
2333                                    self.advance();
2334                                    break;
2335                                }
2336                            }
2337                            TokenType::Eof => break,
2338                            _ => {}
2339                        }
2340                        self.advance();
2341                    }
2342                }
2343            }
2344            // Optional trailing alias on the sampled table — `… TABLESAMPLE
2345            // (…) s`. We attach it to the underlying table reference when
2346            // possible, otherwise just consume the identifier.
2347            if let TableSource::Table(ref mut tr) = source {
2348                if tr.alias.is_none() {
2349                    if let Some((name, qs)) = self.parse_optional_alias()? {
2350                        tr.alias = Some(name);
2351                        tr.alias_quote_style = qs;
2352                    }
2353                }
2354            }
2355        }
2356        // Check for trailing PIVOT / UNPIVOT
2357        let source = self.parse_pivot_or_unpivot(source)?;
2358        // ClickHouse: `SELECT * FROM t SAMPLE 0.1` (no parens) — and the
2359        // optional `OFFSET m` modifier. The keyword tokenizes as a plain
2360        // identifier so this also handles dialects that don't reserve it.
2361        if self.check_keyword("SAMPLE") {
2362            self.advance();
2363            // Accept a number, identifier, or parenthesized expression.
2364            if matches!(self.peek_type(), TokenType::Number) {
2365                self.advance();
2366                // Optional `/ N` ratio.
2367                if self.peek_type() == &TokenType::Slash {
2368                    self.advance();
2369                    if matches!(self.peek_type(), TokenType::Number) {
2370                        self.advance();
2371                    }
2372                }
2373            }
2374            if self.check_keyword("OFFSET") {
2375                self.advance();
2376                if matches!(self.peek_type(), TokenType::Number) {
2377                    self.advance();
2378                }
2379            }
2380        }
2381        Ok(source)
2382    }
2383
2384    fn parse_base_table_source(&mut self) -> Result<TableSource> {
2385        // LATERAL
2386        if self.match_token(TokenType::Lateral) {
2387            let source = self.parse_table_source()?;
2388            return Ok(TableSource::Lateral {
2389                source: Box::new(source),
2390            });
2391        }
2392
2393        // Spark / DuckDB / Postgres `FROM VALUES (...) [, (...)]+ [alias[(cols)]]`
2394        // (un-parenthesised VALUES list). Swallow the rows.
2395        if self.match_token(TokenType::Values) {
2396            // First row.
2397            if self.match_token(TokenType::LParen) {
2398                let mut depth = 1;
2399                while depth > 0 {
2400                    match self.peek_type() {
2401                        TokenType::LParen => depth += 1,
2402                        TokenType::RParen => {
2403                            depth -= 1;
2404                            if depth == 0 {
2405                                self.advance();
2406                                break;
2407                            }
2408                        }
2409                        TokenType::Eof => break,
2410                        _ => {}
2411                    }
2412                    self.advance();
2413                }
2414            }
2415            // Additional rows.
2416            while self.peek_type() == &TokenType::Comma {
2417                let saved = self.pos;
2418                self.advance();
2419                if !self.match_token(TokenType::LParen) {
2420                    // Not a row — restore comma for the outer parser.
2421                    self.pos = saved;
2422                    break;
2423                }
2424                let mut depth = 1;
2425                while depth > 0 {
2426                    match self.peek_type() {
2427                        TokenType::LParen => depth += 1,
2428                        TokenType::RParen => {
2429                            depth -= 1;
2430                            if depth == 0 {
2431                                self.advance();
2432                                break;
2433                            }
2434                        }
2435                        TokenType::Eof => break,
2436                        _ => {}
2437                    }
2438                    self.advance();
2439                }
2440            }
2441            let (alias, alias_quote_style) = match self.parse_optional_alias()? {
2442                Some((name, qs)) => (Some(name), qs),
2443                None => (None, QuoteStyle::None),
2444            };
2445            if alias.is_some() && self.peek_type() == &TokenType::LParen {
2446                let saved = self.pos;
2447                self.advance();
2448                let mut ok = true;
2449                loop {
2450                    if !self.is_name_token() {
2451                        ok = false;
2452                        break;
2453                    }
2454                    self.advance();
2455                    if self.match_token(TokenType::RParen) {
2456                        break;
2457                    }
2458                    if !self.match_token(TokenType::Comma) {
2459                        ok = false;
2460                        break;
2461                    }
2462                }
2463                if !ok {
2464                    self.pos = saved;
2465                }
2466            }
2467            return Ok(TableSource::TableFunction {
2468                name: "VALUES".to_string(),
2469                args: vec![],
2470                alias,
2471                alias_quote_style,
2472            });
2473        }
2474
2475        // UNNEST(expr)
2476        if self.match_token(TokenType::Unnest) {
2477            self.expect(TokenType::LParen)?;
2478            let expr = self.parse_expr()?;
2479            // Multi-arg form (Trino): UNNEST(a, b, c). Drop extras.
2480            while self.match_token(TokenType::Comma) {
2481                let _ = self.parse_expr()?;
2482            }
2483            self.expect(TokenType::RParen)?;
2484            let (mut alias, mut alias_quote_style) = match self.parse_optional_alias()? {
2485                Some((name, qs)) => (Some(name), qs),
2486                None => (None, QuoteStyle::None),
2487            };
2488            // BigQuery `WITH OFFSET [AS name]` / Postgres `WITH ORDINALITY`.
2489            let mut with_offset = false;
2490            if self.check_keyword("WITH") {
2491                let saved = self.pos;
2492                self.advance();
2493                if self.check_keyword("OFFSET") || self.check_keyword("ORDINALITY") {
2494                    self.advance();
2495                    with_offset = true;
2496                    // Optional alias after OFFSET / ORDINALITY.
2497                    if alias.is_none() {
2498                        if let Some((n, qs)) = self.parse_optional_alias()? {
2499                            alias = Some(n);
2500                            alias_quote_style = qs;
2501                        }
2502                    } else if self.is_name_token() {
2503                        // `UNNEST(a) id WITH OFFSET pos` — extra trailing
2504                        // name; absorb so we don't trip the join parser.
2505                        self.advance();
2506                    }
2507                } else {
2508                    self.pos = saved;
2509                }
2510            }
2511            // Optional positional column list: `AS t (n, a)`.
2512            if alias.is_some() && self.peek_type() == &TokenType::LParen {
2513                let saved = self.pos;
2514                self.advance();
2515                let mut ok = true;
2516                loop {
2517                    if !self.is_name_token() {
2518                        ok = false;
2519                        break;
2520                    }
2521                    self.advance();
2522                    if self.match_token(TokenType::RParen) {
2523                        break;
2524                    }
2525                    if !self.match_token(TokenType::Comma) {
2526                        ok = false;
2527                        break;
2528                    }
2529                }
2530                if !ok {
2531                    self.pos = saved;
2532                }
2533            }
2534            return Ok(TableSource::Unnest {
2535                expr: Box::new(expr),
2536                alias,
2537                alias_quote_style,
2538                with_offset,
2539            });
2540        }
2541
2542        // Subquery: (SELECT ...)
2543        if self.peek_type() == &TokenType::LParen {
2544            let saved = self.pos;
2545            self.advance();
2546            // Skip nested `(` so `((SELECT …))` and `((SELECT) UNION (SELECT))`
2547            // parse as a subquery. We count how many we consumed and pair
2548            // them with the matching trailing `)`s.
2549            let mut extra_parens = 0_usize;
2550            while self.peek_type() == &TokenType::LParen
2551                && self.peek_starts_subquery_through_parens()
2552            {
2553                self.advance();
2554                extra_parens += 1;
2555            }
2556            let starts_subquery = matches!(
2557                self.peek_type(),
2558                TokenType::Select | TokenType::With | TokenType::Explain | TokenType::From
2559                    | TokenType::Describe | TokenType::Show | TokenType::Table
2560            );
2561            if starts_subquery {
2562                let query = self.parse_statement_inner()?;
2563                // Set operations across parenthesised subqueries: `(SELECT …)
2564                // UNION ALL (SELECT …) [ORDER BY …] [LIMIT …]`.
2565                let query = self.maybe_parse_set_operation(query)?;
2566                for _ in 0..extra_parens {
2567                    self.expect(TokenType::RParen)?;
2568                }
2569                self.expect(TokenType::RParen)?;
2570                let (alias, alias_quote_style) = match self.parse_optional_alias()? {
2571                    Some((name, qs)) => (Some(name), qs),
2572                    None => (None, QuoteStyle::None),
2573                };
2574                // Positional column-list alias: `(SELECT ...) t(c1, c2)`
2575                if alias.is_some() && self.peek_type() == &TokenType::LParen {
2576                    let saved2 = self.pos;
2577                    self.advance();
2578                    let mut ok = true;
2579                    loop {
2580                        if !self.is_name_token() {
2581                            ok = false;
2582                            break;
2583                        }
2584                        self.advance();
2585                        if self.match_token(TokenType::RParen) {
2586                            break;
2587                        }
2588                        if !self.match_token(TokenType::Comma) {
2589                            ok = false;
2590                            break;
2591                        }
2592                    }
2593                    if !ok {
2594                        self.pos = saved2;
2595                    }
2596                }
2597                return Ok(TableSource::Subquery {
2598                    query: Box::new(query),
2599                    alias,
2600                    alias_quote_style,
2601                });
2602            }
2603            // `(VALUES (...), (...)) alias[(cols)]` — common in DuckDB /
2604            // Postgres derived tables. We don't model the VALUES rows in the
2605            // AST as a table source; swallow the parenthesized body and
2606            // synthesise an empty subquery placeholder.
2607            if self.peek_type() == &TokenType::Values {
2608                // Re-advance past the values list, balancing parens (we are
2609                // inside the outer LParen at depth 1).
2610                let mut depth = 1;
2611                while depth > 0 {
2612                    match self.peek_type() {
2613                        TokenType::LParen => depth += 1,
2614                        TokenType::RParen => {
2615                            depth -= 1;
2616                            if depth == 0 {
2617                                self.advance();
2618                                break;
2619                            }
2620                        }
2621                        TokenType::Eof => break,
2622                        _ => {}
2623                    }
2624                    self.advance();
2625                }
2626                let (alias, alias_quote_style) = match self.parse_optional_alias()? {
2627                    Some((name, qs)) => (Some(name), qs),
2628                    None => (None, QuoteStyle::None),
2629                };
2630                if alias.is_some() && self.peek_type() == &TokenType::LParen {
2631                    let saved2 = self.pos;
2632                    self.advance();
2633                    let mut ok = true;
2634                    loop {
2635                        if !self.is_name_token() {
2636                            ok = false;
2637                            break;
2638                        }
2639                        self.advance();
2640                        if self.match_token(TokenType::RParen) {
2641                            break;
2642                        }
2643                        if !self.match_token(TokenType::Comma) {
2644                            ok = false;
2645                            break;
2646                        }
2647                    }
2648                    if !ok {
2649                        self.pos = saved2;
2650                    }
2651                }
2652                // Synthesise an empty values placeholder. Reuse Subquery
2653                // with a single-row Insert wrapper is awkward; instead,
2654                // wrap as a TableFunction("VALUES") with empty args.
2655                return Ok(TableSource::TableFunction {
2656                    name: "VALUES".to_string(),
2657                    args: vec![],
2658                    alias,
2659                    alias_quote_style,
2660                });
2661            }
2662            self.pos = saved;
2663
2664            // MySQL / SQLite / others permit parenthesized join expressions
2665            // as a table source: `(t1 LEFT JOIN t2 ON …)` or comma-list
2666            // `(t1, t2)`. Recurse into the parens, then consume joins /
2667            // commas until the matching `)`. Emit the first source so the
2668            // surrounding query parses; trailing tables are discarded
2669            // (their predicates were already parsed into the JOIN node we
2670            // throw away — acceptance only).
2671            if self.peek_type() == &TokenType::LParen {
2672                let inner_saved = self.pos;
2673                self.advance();
2674                let after_lparen = self.pos;
2675                if let Ok(inner) = self.parse_table_source() {
2676                    let _ = self.parse_joins();
2677                    while self.match_token(TokenType::Comma) {
2678                        if self.parse_table_source().is_err() {
2679                            self.pos = inner_saved;
2680                            // Fall through to the generic parse_table_ref
2681                            // path below, which will surface the original
2682                            // error message.
2683                            break;
2684                        }
2685                        let _ = self.parse_joins();
2686                    }
2687                    if self.pos != inner_saved && self.match_token(TokenType::RParen) {
2688                        let (alias, alias_quote_style) = match self
2689                            .parse_optional_alias()?
2690                        {
2691                            Some((name, qs)) => (Some(name), qs),
2692                            None => (None, QuoteStyle::None),
2693                        };
2694                        if let Some(name) = alias.clone() {
2695                            if let TableSource::Table(mut tr) = inner {
2696                                tr.alias = Some(name);
2697                                tr.alias_quote_style = alias_quote_style;
2698                                return Ok(TableSource::Table(tr));
2699                            }
2700                        }
2701                        return Ok(inner);
2702                    }
2703                }
2704                // Restore so the caller sees the LParen and emits a useful
2705                // error rather than silently misparsing partial state.
2706                self.pos = inner_saved;
2707                let _ = after_lparen; // suppress unused warning when build optimises
2708            }
2709        }
2710
2711        // Regular table reference (possibly with function syntax)
2712        let table_ref = self.parse_table_ref()?;
2713
2714        // MySQL / TiDB partition selector: `tbl PARTITION (p0, p1)`. Swallow
2715        // it so the table reference parses cleanly.
2716        if matches!(self.peek_type(), TokenType::Partition)
2717            && matches!(self.peek_offset(1).map(|t| &t.token_type), Some(TokenType::LParen))
2718        {
2719            self.advance();
2720            self.advance();
2721            while !matches!(self.peek_type(), TokenType::RParen | TokenType::Eof) {
2722                self.advance();
2723            }
2724            let _ = self.match_token(TokenType::RParen);
2725        }
2726
2727        // Check if it's actually a table function: name(args...). Also
2728        // accept dotted qualifiers so DuckDB `schema.func(...)` /
2729        // `catalog.schema.func(...)` parse.
2730        if self.peek_type() == &TokenType::LParen {
2731            // SQL/PGQ `GRAPH_TABLE(graph MATCH … COLUMNS (…))`,
2732            // SQL/XML `XMLTABLE('xpath' PASSING expr COLUMNS …)`,
2733            // SQL/JSON `JSON_TABLE(expr, '$' COLUMNS (…))`. Swallow the
2734            // body opaquely so the rest of the query parses.
2735            let fname = table_ref.name.to_uppercase();
2736            if matches!(
2737                fname.as_str(),
2738                "GRAPH_TABLE" | "XMLTABLE" | "JSON_TABLE" | "OPENJSON" | "OPENROWSET" | "OPENXML"
2739            ) {
2740                self.advance();
2741                let mut depth = 1usize;
2742                while depth > 0 && !matches!(self.peek_type(), TokenType::Eof) {
2743                    match self.peek_type() {
2744                        TokenType::LParen => depth += 1,
2745                        TokenType::RParen => {
2746                            depth -= 1;
2747                            if depth == 0 {
2748                                self.advance();
2749                                break;
2750                            }
2751                        }
2752                        _ => {}
2753                    }
2754                    self.advance();
2755                }
2756                let (alias, alias_quote_style) = match self.parse_optional_alias()? {
2757                    Some((name, qs)) => (Some(name), qs),
2758                    None => (None, QuoteStyle::None),
2759                };
2760                if alias.is_some() && self.peek_type() == &TokenType::LParen {
2761                    let saved = self.pos;
2762                    self.advance();
2763                    let mut ok = true;
2764                    loop {
2765                        if !self.is_name_token() {
2766                            ok = false;
2767                            break;
2768                        }
2769                        self.advance();
2770                        if self.match_token(TokenType::RParen) {
2771                            break;
2772                        }
2773                        if !self.match_token(TokenType::Comma) {
2774                            ok = false;
2775                            break;
2776                        }
2777                    }
2778                    if !ok {
2779                        self.pos = saved;
2780                    }
2781                }
2782                return Ok(TableSource::TableFunction {
2783                    name: match (&table_ref.catalog, &table_ref.schema) {
2784                        (Some(c), Some(s)) => format!("{}.{}.{}", c, s, table_ref.name),
2785                        (None, Some(s)) => format!("{}.{}", s, table_ref.name),
2786                        _ => table_ref.name,
2787                    },
2788                    args: vec![],
2789                    alias,
2790                    alias_quote_style,
2791                });
2792            }
2793            self.advance();
2794            // Hive `noop(on tbl partition by ... order by ... )` table-valued
2795            // function. Arguments start with the `ON` keyword and include
2796            // PARTITION/ORDER/CLUSTER/DISTRIBUTE/SORT BY clauses we don't
2797            // model. Swallow the body opaquely.
2798            let args = if matches!(self.peek_type(), TokenType::On) {
2799                let mut depth = 0usize;
2800                while !matches!(self.peek_type(), TokenType::Eof) {
2801                    match self.peek_type() {
2802                        TokenType::LParen => depth += 1,
2803                        TokenType::RParen => {
2804                            if depth == 0 {
2805                                break;
2806                            }
2807                            depth -= 1;
2808                        }
2809                        _ => {}
2810                    }
2811                    self.advance();
2812                }
2813                vec![]
2814            } else if self.peek_type() != &TokenType::RParen {
2815                self.parse_expr_list()?
2816            } else {
2817                vec![]
2818            };
2819            self.expect(TokenType::RParen)?;
2820            let (alias, alias_quote_style) = match self.parse_optional_alias()? {
2821                Some((name, qs)) => (Some(name), qs),
2822                None => (None, QuoteStyle::None),
2823            };
2824            // DuckDB / Postgres positional column-list alias:
2825            //   range(10) t(i)   →   alias = "t", columns = (i)
2826            // We consume the parenthesized list but do not model it in the AST.
2827            if alias.is_some() && self.peek_type() == &TokenType::LParen {
2828                let saved = self.pos;
2829                self.advance();
2830                let mut ok = true;
2831                loop {
2832                    if !self.is_name_token() {
2833                        ok = false;
2834                        break;
2835                    }
2836                    self.advance();
2837                    if self.match_token(TokenType::RParen) {
2838                        break;
2839                    }
2840                    if !self.match_token(TokenType::Comma) {
2841                        ok = false;
2842                        break;
2843                    }
2844                }
2845                if !ok {
2846                    self.pos = saved;
2847                }
2848            }
2849            return Ok(TableSource::TableFunction {
2850                name: match (&table_ref.catalog, &table_ref.schema) {
2851                    (Some(c), Some(s)) => format!("{}.{}.{}", c, s, table_ref.name),
2852                    (None, Some(s)) => format!("{}.{}", s, table_ref.name),
2853                    _ => table_ref.name,
2854                },
2855                args,
2856                alias,
2857                alias_quote_style,
2858            });
2859        }
2860
2861        // Also support positional column-list alias on a plain table reference:
2862        //   FROM tbl t(c1, c2)
2863        if self.peek_type() == &TokenType::LParen
2864            && table_ref.alias.is_some()
2865        {
2866            let saved = self.pos;
2867            self.advance();
2868            let mut ok = true;
2869            loop {
2870                if !self.is_name_token() {
2871                    ok = false;
2872                    break;
2873                }
2874                self.advance();
2875                if self.match_token(TokenType::RParen) {
2876                    break;
2877                }
2878                if !self.match_token(TokenType::Comma) {
2879                    ok = false;
2880                    break;
2881                }
2882            }
2883            if !ok {
2884                self.pos = saved;
2885            }
2886        }
2887
2888        // MySQL / MariaDB index hints — `USE INDEX (idx)`, `FORCE INDEX (idx)`,
2889        // `IGNORE INDEX (idx)`, optionally with `FOR JOIN|ORDER BY|GROUP BY`.
2890        // Swallow any sequence of these so the rest of the query parses.
2891        loop {
2892            let saved = self.pos;
2893            let is_hint = matches!(self.peek_type(), TokenType::Use | TokenType::Ignore)
2894                || self.check_keyword("FORCE");
2895            if !is_hint {
2896                break;
2897            }
2898            self.advance();
2899            if !self.check_keyword("INDEX") && !self.check_keyword("KEY") {
2900                self.pos = saved;
2901                break;
2902            }
2903            self.advance();
2904            // Optional `FOR JOIN | FOR ORDER BY | FOR GROUP BY`.
2905            if self.match_keyword("FOR") {
2906                if matches!(
2907                    self.peek_type(),
2908                    TokenType::Join | TokenType::Order | TokenType::Group
2909                ) {
2910                    self.advance();
2911                    let _ = self.match_token(TokenType::By);
2912                }
2913            }
2914            if self.match_token(TokenType::LParen) {
2915                let mut depth = 1;
2916                while depth > 0 {
2917                    match self.peek_type() {
2918                        TokenType::LParen => depth += 1,
2919                        TokenType::RParen => {
2920                            depth -= 1;
2921                            if depth == 0 {
2922                                self.advance();
2923                                break;
2924                            }
2925                        }
2926                        TokenType::Eof => break,
2927                        _ => {}
2928                    }
2929                    self.advance();
2930                }
2931            }
2932        }
2933
2934        // ClickHouse `FROM tbl [AS alias] FINAL` — swallow the FINAL modifier.
2935        // The token tokenizes as Identifier so check_keyword is enough.
2936        if self.check_keyword("FINAL") {
2937            self.advance();
2938        }
2939
2940        // MySQL: `FROM t PARTITION (p0[, p1, ...])` — swallow partition
2941        // selector. May appear before or after the alias; we accept it
2942        // here (i.e., before parse_optional_alias has run).
2943        if matches!(self.peek_type(), TokenType::Partition)
2944            && matches!(self.peek_offset(1).map(|t| &t.token_type), Some(TokenType::LParen))
2945        {
2946            self.advance();
2947            self.advance();
2948            let mut depth = 1;
2949            while depth > 0 && !matches!(self.peek_type(), TokenType::Eof) {
2950                match self.peek_type() {
2951                    TokenType::LParen => depth += 1,
2952                    TokenType::RParen => {
2953                        depth -= 1;
2954                        if depth == 0 {
2955                            self.advance();
2956                            break;
2957                        }
2958                    }
2959                    _ => {}
2960                }
2961                self.advance();
2962            }
2963        }
2964
2965        Ok(TableSource::Table(table_ref))
2966    }
2967
2968    /// After parsing a base table source, check if PIVOT or UNPIVOT follows.
2969    fn parse_pivot_or_unpivot(&mut self, source: TableSource) -> Result<TableSource> {
2970        if self.match_token(TokenType::Pivot) {
2971            self.expect(TokenType::LParen)?;
2972            let aggregate = self.parse_expr()?;
2973            // Snowflake / Databricks: optional `AS <alias>` on the aggregate
2974            // expression: `PIVOT (sum(sales) AS sales FOR …)`.
2975            if self.peek_type() == &TokenType::As
2976                && self
2977                    .peek_offset(1)
2978                    .map(|t| {
2979                        matches!(
2980                            t.token_type,
2981                            TokenType::Identifier | TokenType::String | TokenType::Number
2982                        )
2983                    })
2984                    .unwrap_or(false)
2985            {
2986                self.advance();
2987                self.advance();
2988            }
2989            // Multi-aggregate PIVOT: `PIVOT (SUM(x), COUNT(x) FOR …)`. Drop
2990            // the extra aggregates — we only keep the first one in the AST.
2991            while self.match_token(TokenType::Comma) {
2992                let _ = self.parse_expr()?;
2993                if self.peek_type() == &TokenType::As
2994                    && self
2995                        .peek_offset(1)
2996                        .map(|t| {
2997                            matches!(
2998                                t.token_type,
2999                                TokenType::Identifier | TokenType::String | TokenType::Number
3000                            )
3001                        })
3002                        .unwrap_or(false)
3003                {
3004                    self.advance();
3005                    self.advance();
3006                }
3007            }
3008            self.expect_keyword("FOR")?;
3009            // Snowflake `FOR (col1, col2) IN …` — grouped pivot key. Use the
3010            // first column name as the AST's for_column.
3011            let for_column = if self.peek_type() == &TokenType::LParen {
3012                self.advance();
3013                let first = self.expect_name()?;
3014                while self.match_token(TokenType::Comma) {
3015                    let _ = self.expect_name()?;
3016                }
3017                self.expect(TokenType::RParen)?;
3018                first
3019            } else {
3020                self.expect_name()?
3021            };
3022            self.expect(TokenType::In)?;
3023            self.expect(TokenType::LParen)?;
3024            let in_values = self.parse_pivot_values()?;
3025            self.expect(TokenType::RParen)?;
3026            self.expect(TokenType::RParen)?;
3027            let (alias, alias_quote_style) = match self.parse_optional_alias()? {
3028                Some((name, qs)) => (Some(name), qs),
3029                None => (None, QuoteStyle::None),
3030            };
3031            return Ok(TableSource::Pivot {
3032                source: Box::new(source),
3033                aggregate: Box::new(aggregate),
3034                for_column,
3035                in_values,
3036                alias,
3037                alias_quote_style,
3038            });
3039        }
3040        if self.match_token(TokenType::Unpivot) {
3041            // BigQuery: `UNPIVOT INCLUDE|EXCLUDE NULLS (...)`.
3042            if self.check_keyword("INCLUDE") || self.check_keyword("EXCLUDE") {
3043                let saved = self.pos;
3044                self.advance();
3045                if !self.match_keyword("NULLS") {
3046                    self.pos = saved;
3047                }
3048            }
3049            self.expect(TokenType::LParen)?;
3050            // Snowflake/DuckDB allow a grouped value-column tuple:
3051            // `UNPIVOT ((col1, col2) FOR period IN (...))`. Swallow the
3052            // grouping parens — we only model a single value-column name.
3053            let value_column = if self.peek_type() == &TokenType::LParen {
3054                self.advance();
3055                let first = self.expect_name()?;
3056                while self.match_token(TokenType::Comma) {
3057                    let _ = self.expect_name()?;
3058                }
3059                self.expect(TokenType::RParen)?;
3060                first
3061            } else {
3062                self.expect_name()?
3063            };
3064            self.expect_keyword("FOR")?;
3065            let for_column = self.expect_name()?;
3066            self.expect(TokenType::In)?;
3067            self.expect(TokenType::LParen)?;
3068            let in_columns = self.parse_pivot_values()?;
3069            self.expect(TokenType::RParen)?;
3070            self.expect(TokenType::RParen)?;
3071            let (alias, alias_quote_style) = match self.parse_optional_alias()? {
3072                Some((name, qs)) => (Some(name), qs),
3073                None => (None, QuoteStyle::None),
3074            };
3075            return Ok(TableSource::Unpivot {
3076                source: Box::new(source),
3077                value_column,
3078                for_column,
3079                in_columns,
3080                alias,
3081                alias_quote_style,
3082            });
3083        }
3084        Ok(source)
3085    }
3086
3087    /// Parse comma-separated pivot values, each optionally aliased.
3088    fn parse_pivot_values(&mut self) -> Result<Vec<PivotValue>> {
3089        let mut values = Vec::new();
3090        loop {
3091            let value = self.parse_expr()?;
3092            // Snowflake / BigQuery permit string or numeric aliases on pivot
3093            // values: `(a, b) AS 'semester_1'` / `(a, b) AS 1`. Accept those
3094            // alongside the regular identifier alias.
3095            let (alias, alias_quote_style) = if self.match_token(TokenType::As)
3096                && matches!(self.peek_type(), TokenType::String | TokenType::Number)
3097            {
3098                let tok = self.advance().clone();
3099                (Some(tok.value), QuoteStyle::None)
3100            } else {
3101                match self.parse_optional_alias()? {
3102                    Some((name, qs)) => (Some(name), qs),
3103                    None => (None, QuoteStyle::None),
3104                }
3105            };
3106            values.push(PivotValue {
3107                value,
3108                alias,
3109                alias_quote_style,
3110            });
3111            if !self.match_token(TokenType::Comma) {
3112                break;
3113            }
3114        }
3115        Ok(values)
3116    }
3117
3118    fn parse_table_ref(&mut self) -> Result<TableRef> {
3119        // T-SQL table variable: `FROM @t` / `INTO @t` etc. The @ is its own
3120        // token; fuse with the following name into a single identifier.
3121        if matches!(self.peek_type(), TokenType::AtSign)
3122            && self
3123                .peek_offset(1)
3124                .map(|t| {
3125                    matches!(t.token_type, TokenType::Identifier)
3126                        || matches!(t.token_type, TokenType::AtSign)
3127                })
3128                .unwrap_or(false)
3129        {
3130            let mut name = String::from("@");
3131            self.advance();
3132            if matches!(self.peek_type(), TokenType::AtSign) {
3133                name.push('@');
3134                self.advance();
3135            }
3136            let n = self.advance().clone();
3137            name.push_str(&n.value);
3138            let (alias, alias_quote_style) = match self.parse_optional_alias()? {
3139                Some((a, qs)) => (Some(a), qs),
3140                None => (None, QuoteStyle::None),
3141            };
3142            return Ok(TableRef {
3143                catalog: None,
3144                schema: None,
3145                name,
3146                alias,
3147                name_quote_style: QuoteStyle::None,
3148                alias_quote_style,
3149            });
3150        }
3151        let (first, first_qs) = self.expect_name_with_quote()?;
3152
3153        // Check for schema.table or catalog.schema.table. We also tolerate 4+
3154        // part qualified names (DuckDB / SQL Server `srv.db.sch.tbl`) by
3155        // folding additional segments into the catalog field.
3156        let (catalog, schema, name, name_qs) = if self.match_token(TokenType::Dot) {
3157            let (second, second_qs) = self.expect_name_with_quote()?;
3158            if self.match_token(TokenType::Dot) {
3159                let (mut third, mut third_qs) = self.expect_name_with_quote()?;
3160                let mut catalog = first;
3161                let mut schema = second;
3162                while self.match_token(TokenType::Dot) {
3163                    let (next, next_qs) = self.expect_name_with_quote()?;
3164                    catalog.push('.');
3165                    catalog.push_str(&schema);
3166                    schema = third;
3167                    third = next;
3168                    third_qs = next_qs;
3169                }
3170                (Some(catalog), Some(schema), third, third_qs)
3171            } else {
3172                (None, Some(first), second, second_qs)
3173            }
3174        } else {
3175            (None, None, first, first_qs)
3176        };
3177
3178        let (alias, alias_quote_style) = match self.parse_optional_alias()? {
3179            Some((name, qs)) => (Some(name), qs),
3180            None => (None, QuoteStyle::None),
3181        };
3182
3183        Ok(TableRef {
3184            catalog,
3185            schema,
3186            name,
3187            alias,
3188            name_quote_style: name_qs,
3189            alias_quote_style,
3190        })
3191    }
3192
3193    /// Like `parse_table_ref` but does not consume an alias.
3194    fn parse_table_ref_no_alias(&mut self) -> Result<TableRef> {
3195        let (first, first_qs) = self.expect_name_with_quote()?;
3196
3197        let (catalog, schema, name, name_qs) = if self.match_token(TokenType::Dot) {
3198            let (second, second_qs) = self.expect_name_with_quote()?;
3199            if self.match_token(TokenType::Dot) {
3200                let (mut third, mut third_qs) = self.expect_name_with_quote()?;
3201                let mut catalog = first;
3202                let mut schema = second;
3203                while self.match_token(TokenType::Dot) {
3204                    let (next, next_qs) = self.expect_name_with_quote()?;
3205                    catalog.push('.');
3206                    catalog.push_str(&schema);
3207                    schema = third;
3208                    third = next;
3209                    third_qs = next_qs;
3210                }
3211                (Some(catalog), Some(schema), third, third_qs)
3212            } else {
3213                (None, Some(first), second, second_qs)
3214            }
3215        } else {
3216            (None, None, first, first_qs)
3217        };
3218
3219        Ok(TableRef {
3220            catalog,
3221            schema,
3222            name,
3223            alias: None,
3224            name_quote_style: name_qs,
3225            alias_quote_style: QuoteStyle::None,
3226        })
3227    }
3228
3229    fn parse_joins(&mut self) -> Result<Vec<JoinClause>> {
3230        let mut joins = Vec::new();
3231        loop {
3232            // Hive `LATERAL VIEW [OUTER] func(args) tbl_alias [AS col, ...]`.
3233            // Model as a CROSS JOIN over a table-function so the rest of the
3234            // query parses; the AS column list is dropped.
3235            if self.peek_type() == &TokenType::Lateral
3236                && self
3237                    .peek_offset(1)
3238                    .map(|t| t.value.eq_ignore_ascii_case("VIEW"))
3239                    .unwrap_or(false)
3240            {
3241                self.advance(); // LATERAL
3242                self.advance(); // VIEW
3243                let _outer = self.check_keyword("OUTER") && {
3244                    self.advance();
3245                    true
3246                };
3247                // func(args) — parse name and arg list
3248                let fname = self.expect_name().unwrap_or_default();
3249                let mut fargs = Vec::new();
3250                if self.match_token(TokenType::LParen) {
3251                    if self.peek_type() != &TokenType::RParen {
3252                        fargs.push(self.parse_expr()?);
3253                        while self.match_token(TokenType::Comma) {
3254                            fargs.push(self.parse_expr()?);
3255                        }
3256                    }
3257                    self.expect(TokenType::RParen)?;
3258                }
3259                let (alias, alias_quote_style) = match self.parse_optional_alias()? {
3260                    Some((name, qs)) => (Some(name), qs),
3261                    None => (None, QuoteStyle::None),
3262                };
3263                // Optional `[AS] col1[, col2, ...]` column list. Hive
3264                // allows the AS to be omitted entirely; Spark sometimes
3265                // emits `tbl_name col`. Consume names while we keep seeing
3266                // identifier-then-comma pairs.
3267                let _ = self.match_token(TokenType::As);
3268                if self.is_name_token() {
3269                    self.advance();
3270                    while self.match_token(TokenType::Comma) {
3271                        if !self.is_name_token() {
3272                            break;
3273                        }
3274                        self.advance();
3275                    }
3276                }
3277                joins.push(JoinClause {
3278                    join_type: JoinType::Cross,
3279                    table: TableSource::TableFunction {
3280                        name: fname,
3281                        args: fargs,
3282                        alias,
3283                        alias_quote_style,
3284                    },
3285                    on: None,
3286                    using: Vec::new(),
3287                });
3288                continue;
3289            }
3290            // ClickHouse: ARRAY JOIN / LEFT ARRAY JOIN — flatten arrays as join source.
3291            // We model it as a CROSS JOIN over the array expression.
3292            let saved_array = self.pos;
3293            let _left_array = self.match_token(TokenType::Left);
3294            if self.match_token(TokenType::Array) && self.match_token(TokenType::Join) {
3295                // parse the array expression(s) — comma-separated
3296                let mut sources = Vec::new();
3297                loop {
3298                    // ClickHouse permits inline array literals as the source:
3299                    //   ARRAY JOIN [1,2,3] AS x, [(...), (...)] AS y
3300                    // Wrap as Unnest so we don't reject the syntax.
3301                    let src = if matches!(self.peek_type(), TokenType::LBracket) {
3302                        let arr = self.parse_primary()?;
3303                        let (alias, alias_quote_style) =
3304                            match self.parse_optional_alias()? {
3305                                Some((name, qs)) => (Some(name), qs),
3306                                None => (None, QuoteStyle::None),
3307                            };
3308                        TableSource::Unnest {
3309                            expr: Box::new(arr),
3310                            alias,
3311                            alias_quote_style,
3312                            with_offset: false,
3313                        }
3314                    } else {
3315                        self.parse_table_source()?
3316                    };
3317                    sources.push(src);
3318                    if !self.match_token(TokenType::Comma) {
3319                        break;
3320                    }
3321                }
3322                for src in sources {
3323                    joins.push(JoinClause {
3324                        join_type: JoinType::Cross,
3325                        table: src,
3326                        on: None,
3327                        using: Vec::new(),
3328                    });
3329                }
3330                continue;
3331            } else {
3332                self.pos = saved_array;
3333            }
3334            // ClickHouse / Hive join strictness modifiers — consume and drop:
3335            //   GLOBAL? ALL | ANY | SEMI | ANTI | ASOF [LEFT|RIGHT|INNER|OUTER] JOIN
3336            let saved_strictness = self.pos;
3337            let _global_prefix = self.check_keyword("GLOBAL") && {
3338                self.advance();
3339                true
3340            };
3341            let consumed_strictness = if self.match_token(TokenType::All) {
3342                true
3343            } else if self.match_token(TokenType::Any) {
3344                true
3345            } else if self.check_keyword("SEMI")
3346                || self.check_keyword("ANTI")
3347                || self.check_keyword("ASOF")
3348                || self.check_keyword("PASTE")
3349            {
3350                self.advance();
3351                // DuckDB / ClickHouse allow compound forms like
3352                // `ASOF ANTI JOIN` / `ASOF SEMI JOIN` — absorb a
3353                // following second strictness keyword too.
3354                if self.check_keyword("SEMI")
3355                    || self.check_keyword("ANTI")
3356                    || self.check_keyword("ASOF")
3357                {
3358                    self.advance();
3359                }
3360                true
3361            } else {
3362                _global_prefix
3363            };
3364            // If the strictness modifier wasn't followed by a join keyword,
3365            // rewind so we don't accidentally consume a stray ALL/ANY (e.g.
3366            // `ORDER BY ALL`).
3367            if consumed_strictness
3368                && !matches!(
3369                    self.peek_type(),
3370                    TokenType::Join
3371                        | TokenType::Inner
3372                        | TokenType::Left
3373                        | TokenType::Right
3374                        | TokenType::Full
3375                        | TokenType::Cross
3376                )
3377            {
3378                self.pos = saved_strictness;
3379            }
3380            let join_type = match self.peek_type() {
3381                // `FROM a, b` is treated as `FROM a CROSS JOIN b`. Note the
3382                // SQL standard gives comma a lower precedence than explicit
3383                // JOIN operators (so `FROM a, b JOIN c ON ...` should be
3384                // `a CROSS JOIN (b JOIN c ...)`), but we flatten everything
3385                // into a left-deep chain. Column resolution still works for
3386                // the common cases since the join order is associative when
3387                // ON-clauses only reference adjacent tables.
3388                TokenType::Comma => {
3389                    self.advance();
3390                    JoinType::Cross
3391                }
3392                // `NATURAL [LEFT|RIGHT|FULL [OUTER]] JOIN tbl` — auto-equi-join
3393                // on shared column names. We don't model NATURAL semantics yet;
3394                // promote to the corresponding non-natural join type and treat
3395                // the implicit USING clause as empty.
3396                t if matches!(t, TokenType::Identifier)
3397                    && self.peek().value.eq_ignore_ascii_case("NATURAL") =>
3398                {
3399                    self.advance(); // NATURAL
3400                    let jt = match self.peek_type() {
3401                        TokenType::Left => {
3402                            self.advance();
3403                            let _ = self.match_token(TokenType::Outer);
3404                            JoinType::Left
3405                        }
3406                        TokenType::Right => {
3407                            self.advance();
3408                            let _ = self.match_token(TokenType::Outer);
3409                            JoinType::Right
3410                        }
3411                        TokenType::Full => {
3412                            self.advance();
3413                            let _ = self.match_token(TokenType::Outer);
3414                            JoinType::Full
3415                        }
3416                        TokenType::Inner => {
3417                            self.advance();
3418                            JoinType::Inner
3419                        }
3420                        _ => JoinType::Inner,
3421                    };
3422                    self.expect(TokenType::Join)?;
3423                    jt
3424                }
3425                // MySQL `STRAIGHT_JOIN` — non-reordered INNER JOIN.
3426                t if matches!(t, TokenType::Identifier)
3427                    && self.peek().value.eq_ignore_ascii_case("STRAIGHT_JOIN") =>
3428                {
3429                    self.advance();
3430                    JoinType::Inner
3431                }
3432                TokenType::Join => {
3433                    self.advance();
3434                    JoinType::Inner
3435                }
3436                TokenType::Inner => {
3437                    self.advance();
3438                    self.expect(TokenType::Join)?;
3439                    JoinType::Inner
3440                }
3441                TokenType::Left => {
3442                    self.advance();
3443                    let _ = self.match_token(TokenType::Outer);
3444                    // Hive / Spark: LEFT SEMI JOIN / LEFT ANTI JOIN
3445                    let _ = self.check_keyword("SEMI") && {
3446                        self.advance();
3447                        true
3448                    } || self.check_keyword("ANTI") && {
3449                        self.advance();
3450                        true
3451                    };
3452                    // ClickHouse: LEFT ANY|ALL JOIN
3453                    let _ = self.match_token(TokenType::Any)
3454                        || self.match_token(TokenType::All);
3455                    // Some dialects (Spark/Hive variants) allow a trailing
3456                    // OUTER after the strictness modifier.
3457                    let _ = self.match_token(TokenType::Outer);
3458                    self.expect(TokenType::Join)?;
3459                    JoinType::Left
3460                }
3461                TokenType::Right => {
3462                    self.advance();
3463                    let _ = self.match_token(TokenType::Outer);
3464                    let _ = self.check_keyword("SEMI") && {
3465                        self.advance();
3466                        true
3467                    } || self.check_keyword("ANTI") && {
3468                        self.advance();
3469                        true
3470                    };
3471                    let _ = self.match_token(TokenType::Any)
3472                        || self.match_token(TokenType::All);
3473                    let _ = self.match_token(TokenType::Outer);
3474                    self.expect(TokenType::Join)?;
3475                    JoinType::Right
3476                }
3477                TokenType::Full => {
3478                    self.advance();
3479                    let _ = self.match_token(TokenType::Outer);
3480                    self.expect(TokenType::Join)?;
3481                    JoinType::Full
3482                }
3483                TokenType::Cross => {
3484                    self.advance();
3485                    // T-SQL `CROSS APPLY <source>` ≈ `CROSS JOIN LATERAL ...`.
3486                    if self.is_name_token()
3487                        && self.peek().value.eq_ignore_ascii_case("APPLY")
3488                    {
3489                        self.advance();
3490                        JoinType::Cross
3491                    } else {
3492                        self.expect(TokenType::Join)?;
3493                        JoinType::Cross
3494                    }
3495                }
3496                TokenType::Outer => {
3497                    // T-SQL `OUTER APPLY <source>` ≈ `LEFT JOIN LATERAL ... ON TRUE`.
3498                    self.advance();
3499                    if self.is_name_token()
3500                        && self.peek().value.eq_ignore_ascii_case("APPLY")
3501                    {
3502                        self.advance();
3503                        JoinType::Left
3504                    } else {
3505                        break;
3506                    }
3507                }
3508                _ => break,
3509            };
3510
3511            let table = self.parse_table_source()?;
3512            let mut on = None;
3513            let mut using = vec![];
3514
3515            if self.match_token(TokenType::On) {
3516                on = Some(self.parse_expr()?);
3517            } else if self.match_token(TokenType::Using) {
3518                // ClickHouse permits a bare column name without parens:
3519                // `JOIN t USING k`.
3520                if self.match_token(TokenType::LParen) {
3521                    using = vec![self.expect_name()?];
3522                    while self.match_token(TokenType::Comma) {
3523                        using.push(self.expect_name()?);
3524                    }
3525                    self.expect(TokenType::RParen)?;
3526                } else {
3527                    using = vec![self.expect_name()?];
3528                    while self.match_token(TokenType::Comma) {
3529                        if !self.is_name_token() {
3530                            break;
3531                        }
3532                        using.push(self.expect_name()?);
3533                    }
3534                }
3535            }
3536
3537            joins.push(JoinClause {
3538                join_type,
3539                table,
3540                on,
3541                using,
3542            });
3543        }
3544        Ok(joins)
3545    }
3546
3547    fn parse_order_by_items(&mut self) -> Result<Vec<OrderByItem>> {
3548        let mut items = Vec::new();
3549        // DuckDB / Snowflake `ORDER BY ALL` shortcut.
3550        if self.match_token(TokenType::All) {
3551            let ascending = if self.match_token(TokenType::Desc) {
3552                false
3553            } else {
3554                let _ = self.match_token(TokenType::Asc);
3555                true
3556            };
3557            items.push(OrderByItem {
3558                expr: Expr::Wildcard,
3559                ascending,
3560                nulls_first: None,
3561            });
3562            return Ok(items);
3563        }
3564        loop {
3565            // MySQL: `ORDER BY BINARY col [ASC|DESC]` — BINARY here is a
3566            // collation modifier on the sort key. Swallow it; the rest of
3567            // the expression parses normally.
3568            if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("BINARY") {
3569                let saved = self.pos;
3570                self.advance();
3571                // Only consume BINARY when followed by something that can
3572                // start an order-by key (name, literal, paren, etc.); if it
3573                // looks like the end of the list, rewind.
3574                if matches!(
3575                    self.peek_type(),
3576                    TokenType::Comma
3577                        | TokenType::Semicolon
3578                        | TokenType::Eof
3579                        | TokenType::RParen
3580                ) {
3581                    self.pos = saved;
3582                }
3583            }
3584            let expr = self.parse_expr()?;
3585            // ClickHouse: `ORDER BY expr AS alias`. Swallow the alias.
3586            if self.match_token(TokenType::As) && self.is_name_token() {
3587                self.advance();
3588            }
3589            let ascending = if self.match_token(TokenType::Desc) {
3590                false
3591            } else {
3592                let _ = self.match_token(TokenType::Asc);
3593                true
3594            };
3595
3596            let nulls_first = if self.match_token(TokenType::Nulls) {
3597                if self.match_token(TokenType::First) {
3598                    Some(true)
3599                } else {
3600                    self.expect(TokenType::Identifier)?; // LAST
3601                    Some(false)
3602                }
3603            } else {
3604                None
3605            };
3606
3607            items.push(OrderByItem {
3608                expr,
3609                ascending,
3610                nulls_first,
3611            });
3612            if !self.match_token(TokenType::Comma) {
3613                break;
3614            }
3615        }
3616        Ok(items)
3617    }
3618
3619    fn parse_expr_list(&mut self) -> Result<Vec<Expr>> {
3620        let mut exprs = vec![self.parse_expr()?];
3621        while self.match_token(TokenType::Comma) {
3622            // Tolerate a trailing comma — DuckDB / PostgreSQL accept
3623            // `IN ('a', 'b', )` and similar list shapes.
3624            if matches!(self.peek_type(), TokenType::RParen | TokenType::RBracket) {
3625                break;
3626            }
3627            exprs.push(self.parse_expr()?);
3628        }
3629        Ok(exprs)
3630    }
3631
3632    /// Parse a comma-separated expression list where each item may carry an
3633    /// inline alias (`expr AS name` or `expr name`). Used for dialects (notably
3634    /// ClickHouse) that permit aliases inside partition/grouping lists.
3635    fn parse_expr_list_allow_item_alias(&mut self) -> Result<Vec<Expr>> {
3636        let mut exprs = Vec::new();
3637        loop {
3638            exprs.push(self.parse_expr()?);
3639            if self.match_token(TokenType::As) && self.is_name_token() {
3640                self.advance();
3641            }
3642            if !self.match_token(TokenType::Comma) {
3643                break;
3644            }
3645            if matches!(self.peek_type(), TokenType::RParen | TokenType::RBracket) {
3646                break;
3647            }
3648        }
3649        Ok(exprs)
3650    }
3651
3652    /// Parse array-literal elements: comma-separated expressions, each
3653    /// optionally followed by `AS alias` (ClickHouse lets bindings
3654    /// appear inside `[…]`). The closing token is the caller's
3655    /// responsibility.
3656    fn parse_array_items(&mut self, close: TokenType) -> Result<Vec<Expr>> {
3657        if self.peek_type() == &close {
3658            return Ok(vec![]);
3659        }
3660        let mut items = Vec::new();
3661        loop {
3662            let expr = self.parse_expr()?;
3663            if self.match_token(TokenType::As) {
3664                let _ = self.parse_optional_alias();
3665            }
3666            items.push(expr);
3667            if !self.match_token(TokenType::Comma) {
3668                break;
3669            }
3670        }
3671        Ok(items)
3672    }
3673
3674    /// Parse a GROUP BY list, which may contain regular expressions,
3675    /// CUBE(...), ROLLUP(...), and GROUPING SETS(...).
3676    fn parse_group_by_list(&mut self) -> Result<Vec<Expr>> {
3677        // DuckDB / Snowflake `GROUP BY ALL` shortcut — emit a wildcard
3678        // marker so downstream code can recognise it. PostgreSQL also
3679        // allows `GROUP BY ALL <col>, <col>` (treated identically to a
3680        // regular GROUP BY list); fall through to the normal parser when
3681        // the next token is a column expression rather than a clause
3682        // terminator.
3683        if self.match_token(TokenType::All) {
3684            let terminates = matches!(
3685                self.peek_type(),
3686                TokenType::Comma
3687                    | TokenType::Semicolon
3688                    | TokenType::Eof
3689                    | TokenType::RParen
3690                    | TokenType::Having
3691                    | TokenType::Order
3692                    | TokenType::Limit
3693                    | TokenType::Offset
3694                    | TokenType::Window
3695                    | TokenType::Union
3696                    | TokenType::Intersect
3697                    | TokenType::Except
3698                    | TokenType::Qualify
3699            );
3700            if terminates {
3701                return Ok(vec![Expr::Wildcard]);
3702            }
3703            // Followed by a real grouping expression — fall through.
3704        }
3705        let mut items = vec![self.parse_group_by_item()?];
3706        // ClickHouse: `GROUP BY col AS alias [, …]` — swallow alias.
3707        if self.match_token(TokenType::As) && self.is_name_token() {
3708            self.advance();
3709        }
3710        // MySQL: `GROUP BY col ASC|DESC [, …]` — swallow direction.
3711        let _ = self.match_token(TokenType::Asc) || self.match_token(TokenType::Desc);
3712        while self.match_token(TokenType::Comma) {
3713            items.push(self.parse_group_by_item()?);
3714            if self.match_token(TokenType::As) && self.is_name_token() {
3715                self.advance();
3716            }
3717            let _ = self.match_token(TokenType::Asc) || self.match_token(TokenType::Desc);
3718        }
3719        Ok(items)
3720    }
3721
3722    /// Parse a single GROUP BY item: a CUBE, ROLLUP, GROUPING SETS, or regular expression.
3723    fn parse_group_by_item(&mut self) -> Result<Expr> {
3724        match self.peek_type() {
3725            TokenType::Cube => {
3726                self.advance();
3727                self.expect(TokenType::LParen)?;
3728                let exprs = if self.peek_type() == &TokenType::RParen {
3729                    vec![]
3730                } else {
3731                    self.parse_group_by_element_list()?
3732                };
3733                self.expect(TokenType::RParen)?;
3734                Ok(Expr::Cube { exprs })
3735            }
3736            TokenType::Rollup => {
3737                self.advance();
3738                self.expect(TokenType::LParen)?;
3739                let exprs = if self.peek_type() == &TokenType::RParen {
3740                    vec![]
3741                } else {
3742                    self.parse_group_by_element_list()?
3743                };
3744                self.expect(TokenType::RParen)?;
3745                Ok(Expr::Rollup { exprs })
3746            }
3747            TokenType::Grouping => {
3748                // Could be GROUPING SETS or GROUPING() function
3749                let saved = self.pos;
3750                self.advance();
3751                if self.peek_type() == &TokenType::Sets {
3752                    // GROUPING SETS (...)
3753                    self.advance();
3754                    self.expect(TokenType::LParen)?;
3755                    let sets = self.parse_grouping_sets_elements()?;
3756                    self.expect(TokenType::RParen)?;
3757                    Ok(Expr::GroupingSets { sets })
3758                } else {
3759                    // It's the GROUPING() function, backtrack and parse as expression
3760                    self.pos = saved;
3761                    self.parse_expr()
3762                }
3763            }
3764            _ => self.parse_expr(),
3765        }
3766    }
3767
3768    /// Parse elements inside CUBE(...) or ROLLUP(...).
3769    /// Each element can be a single expression or a parenthesized tuple of expressions.
3770    fn parse_group_by_element_list(&mut self) -> Result<Vec<Expr>> {
3771        let mut items = vec![self.parse_group_by_element()?];
3772        while self.match_token(TokenType::Comma) {
3773            items.push(self.parse_group_by_element()?);
3774        }
3775        Ok(items)
3776    }
3777
3778    /// Parse a single element inside CUBE/ROLLUP: either `expr` or `(expr, expr, ...)`.
3779    fn parse_group_by_element(&mut self) -> Result<Expr> {
3780        if self.peek_type() == &TokenType::LParen {
3781            self.advance();
3782            let exprs = self.parse_expr_list()?;
3783            self.expect(TokenType::RParen)?;
3784            if exprs.len() == 1 {
3785                Ok(Expr::Nested(Box::new(exprs.into_iter().next().unwrap())))
3786            } else {
3787                Ok(Expr::Tuple(exprs))
3788            }
3789        } else {
3790            let e = self.parse_expr()?;
3791            // ClickHouse: `GROUP BY expr AS alias`. Swallow the alias.
3792            if self.match_token(TokenType::As) && self.is_name_token() {
3793                self.advance();
3794            }
3795            Ok(e)
3796        }
3797    }
3798
3799    /// Parse elements inside GROUPING SETS (...).
3800    /// Each element can be: (), (expr, ...), CUBE(...), ROLLUP(...), or a single expr.
3801    fn parse_grouping_sets_elements(&mut self) -> Result<Vec<Expr>> {
3802        let mut items = vec![self.parse_grouping_sets_element()?];
3803        while self.match_token(TokenType::Comma) {
3804            items.push(self.parse_grouping_sets_element()?);
3805        }
3806        Ok(items)
3807    }
3808
3809    /// Parse a single GROUPING SETS element.
3810    fn parse_grouping_sets_element(&mut self) -> Result<Expr> {
3811        match self.peek_type() {
3812            TokenType::Cube => {
3813                self.advance();
3814                self.expect(TokenType::LParen)?;
3815                let exprs = if self.peek_type() == &TokenType::RParen {
3816                    vec![]
3817                } else {
3818                    self.parse_group_by_element_list()?
3819                };
3820                self.expect(TokenType::RParen)?;
3821                Ok(Expr::Cube { exprs })
3822            }
3823            TokenType::Rollup => {
3824                self.advance();
3825                self.expect(TokenType::LParen)?;
3826                let exprs = if self.peek_type() == &TokenType::RParen {
3827                    vec![]
3828                } else {
3829                    self.parse_group_by_element_list()?
3830                };
3831                self.expect(TokenType::RParen)?;
3832                Ok(Expr::Rollup { exprs })
3833            }
3834            TokenType::LParen => {
3835                self.advance();
3836                if self.peek_type() == &TokenType::RParen {
3837                    // Empty grouping set: ()
3838                    self.advance();
3839                    Ok(Expr::Tuple(vec![]))
3840                } else {
3841                    let exprs = self.parse_expr_list()?;
3842                    self.expect(TokenType::RParen)?;
3843                    if exprs.len() == 1 {
3844                        Ok(Expr::Nested(Box::new(exprs.into_iter().next().unwrap())))
3845                    } else {
3846                        Ok(Expr::Tuple(exprs))
3847                    }
3848                }
3849            }
3850            _ => self.parse_expr(),
3851        }
3852    }
3853
3854    // ── INSERT ──────────────────────────────────────────────────────
3855
3856    fn parse_insert(&mut self) -> Result<InsertStatement> {
3857        // Accept MySQL `REPLACE INTO ...` as a synonym for `INSERT INTO ...`.
3858        if !self.match_token(TokenType::Insert) {
3859            self.expect(TokenType::Replace)?;
3860        }
3861        // SQLite / DuckDB conflict-resolution prefix:
3862        //   `INSERT OR REPLACE|IGNORE|FAIL|ABORT|ROLLBACK INTO ...`.
3863        // Swallow opaquely; we don't model conflict resolution at the
3864        // statement level (ON CONFLICT covers most cases downstream).
3865        if self.match_token(TokenType::Or) {
3866            if self.match_token(TokenType::Replace) {
3867                // matched
3868            } else if self.match_token(TokenType::Ignore) {
3869                // matched
3870            } else if self.is_name_token() {
3871                let v = self.peek().value.to_uppercase();
3872                if matches!(v.as_str(), "FAIL" | "ABORT" | "ROLLBACK") {
3873                    self.advance();
3874                }
3875            }
3876        }
3877        // MySQL modifiers between INSERT/REPLACE and INTO:
3878        //   `INSERT LOW_PRIORITY|DELAYED|HIGH_PRIORITY [IGNORE] INTO ...`,
3879        //   `INSERT IGNORE INTO ...`. Swallow them so the rest parses.
3880        loop {
3881            if self.match_token(TokenType::Ignore) {
3882                continue;
3883            }
3884            if self.is_name_token() {
3885                let v = self.peek().value.to_uppercase();
3886                if matches!(v.as_str(), "LOW_PRIORITY" | "DELAYED" | "HIGH_PRIORITY") {
3887                    self.advance();
3888                    continue;
3889                }
3890            }
3891            break;
3892        }
3893        let _ = self.match_token(TokenType::Into);
3894        // Hive: `INSERT OVERWRITE [LOCAL] DIRECTORY '/path'` or
3895        // `INSERT OVERWRITE TABLE tbl ...`. Consume OVERWRITE (tokenized as
3896        // an identifier) and any DIRECTORY clause that follows.
3897        if self.check_keyword("OVERWRITE") {
3898            self.advance();
3899            if self.check_keyword("LOCAL") {
3900                self.advance();
3901            }
3902            if self.check_keyword("DIRECTORY") {
3903                self.advance();
3904                // Consume `'path'` (string) and any STORED AS / ROW FORMAT
3905                // clauses until we hit SELECT/WITH/LParen/VALUES/EOF.
3906                if matches!(self.peek_type(), TokenType::String) {
3907                    self.advance();
3908                }
3909                while !matches!(
3910                    self.peek_type(),
3911                    TokenType::Select
3912                        | TokenType::With
3913                        | TokenType::LParen
3914                        | TokenType::Values
3915                        | TokenType::Eof
3916                        | TokenType::Semicolon
3917                ) {
3918                    self.advance();
3919                }
3920            }
3921        }
3922        // Hive: `INSERT INTO TABLE tbl ...` and `INSERT OVERWRITE TABLE tbl ...`.
3923        let _ = self.match_token(TokenType::Table);
3924        let table = self.parse_table_ref()?;
3925
3926        // Hive `PARTITION (k=v, ...)` between table and column list / source.
3927        if self.peek_type() == &TokenType::Partition {
3928            self.advance();
3929            if self.match_token(TokenType::LParen) {
3930                let mut depth = 1;
3931                while depth > 0 {
3932                    match self.peek_type() {
3933                        TokenType::LParen => depth += 1,
3934                        TokenType::RParen => depth -= 1,
3935                        TokenType::Eof => break,
3936                        _ => {}
3937                    }
3938                    if depth == 0 {
3939                        self.advance();
3940                        break;
3941                    }
3942                    self.advance();
3943                }
3944            }
3945        }
3946
3947        let columns = if self.match_token(TokenType::LParen) {
3948            // BigQuery / SQLFluff fixture: `INSERT INTO t (SELECT ... )` —
3949            // no column list, the parenthesized SELECT is the source.
3950            // Rewind to the `(` and let the source dispatch handle it.
3951            if matches!(self.peek_type(), TokenType::Select | TokenType::With) {
3952                self.pos -= 1;
3953                Vec::new()
3954            } else {
3955            // ClickHouse `INSERT INTO t (COLUMNS('.*') EXCEPT (...))` — when
3956            // the list contains a function call or anything other than plain
3957            // identifiers, fall back to a balanced-paren swallow.
3958            let saved = self.pos;
3959            let try_simple: Result<Vec<String>> = (|| {
3960                let mut cols = vec![self.parse_dotted_name()?];
3961                while self.match_token(TokenType::Comma) {
3962                    cols.push(self.parse_dotted_name()?);
3963                }
3964                self.expect(TokenType::RParen)?;
3965                Ok(cols)
3966            })();
3967            match try_simple {
3968                Ok(c) => c,
3969                Err(_) => {
3970                    self.pos = saved;
3971                    let mut depth = 1_i32;
3972                    while depth > 0 && self.peek_type() != &TokenType::Eof {
3973                        match self.peek_type() {
3974                            TokenType::LParen => depth += 1,
3975                            TokenType::RParen => depth -= 1,
3976                            _ => {}
3977                        }
3978                        self.advance();
3979                    }
3980                    Vec::new()
3981                }
3982            }
3983            }
3984        } else {
3985            vec![]
3986        };
3987
3988        // ClickHouse `INSERT INTO t [(cols)] SETTINGS k=v[, …] VALUES …`.
3989        // Swallow the SETTINGS clause before the source clause so the
3990        // surrounding parse completes.
3991        if self.check_keyword("SETTINGS") {
3992            self.advance();
3993            loop {
3994                if !self.is_name_token() {
3995                    break;
3996                }
3997                self.advance(); // key
3998                if !self.match_token(TokenType::Eq) {
3999                    break;
4000                }
4001                // value: number / string / identifier / unary-signed number
4002                let _ = self.match_token(TokenType::Minus)
4003                    || self.match_token(TokenType::Plus);
4004                if matches!(
4005                    self.peek_type(),
4006                    TokenType::Number | TokenType::String
4007                ) || self.is_name_token()
4008                {
4009                    self.advance();
4010                }
4011                if !self.match_token(TokenType::Comma) {
4012                    break;
4013                }
4014            }
4015        }
4016
4017        let source = if self.match_token(TokenType::Values)
4018            || self.match_keyword("VALUE")
4019        {
4020            let mut rows = Vec::new();
4021            loop {
4022                self.expect(TokenType::LParen)?;
4023                // MySQL allows `VALUES ()` as an empty row to insert all
4024                // defaults — accept and emit as an empty row.
4025                let row = if self.peek_type() == &TokenType::RParen {
4026                    Vec::new()
4027                } else {
4028                    self.parse_expr_list()?
4029                };
4030                self.expect(TokenType::RParen)?;
4031                rows.push(row);
4032                // ClickHouse permits comma-less rows: `VALUES (1)(2)(3)`.
4033                if self.peek_type() == &TokenType::LParen {
4034                    continue;
4035                }
4036                if !self.match_token(TokenType::Comma) {
4037                    break;
4038                }
4039                // Trailing comma: `VALUES (1,2), (3,4),` — DuckDB / sqlfluff
4040                // fixture truncation. Accept and stop the row loop.
4041                if !matches!(self.peek_type(), TokenType::LParen) {
4042                    break;
4043                }
4044            }
4045            InsertSource::Values(rows)
4046        } else if matches!(
4047            self.peek_type(),
4048            TokenType::Select | TokenType::With | TokenType::LParen
4049        ) {
4050            InsertSource::Query(Box::new(self.parse_statement_inner()?))
4051        } else if self.match_token(TokenType::Default) {
4052            self.expect(TokenType::Values)?;
4053            InsertSource::Default
4054        } else if self.match_token(TokenType::Set) {
4055            // MySQL `INSERT INTO t SET col = val, col = val, ...`.
4056            // Collapse into a single-row VALUES placeholder by collecting
4057            // the right-hand expressions; column names are dropped.
4058            let mut row = Vec::new();
4059            loop {
4060                let _ = self.expect_name()?;
4061                self.expect(TokenType::Eq)?;
4062                row.push(self.parse_expr()?);
4063                if !self.match_token(TokenType::Comma) {
4064                    break;
4065                }
4066            }
4067            InsertSource::Values(vec![row])
4068        } else if self.peek_type() == &TokenType::From {
4069            // DuckDB `INSERT INTO t FROM source` shorthand for
4070            // `INSERT INTO t SELECT * FROM source`. Synthesize a SELECT *
4071            // statement so the existing query path handles it.
4072            self.advance();
4073            let from = Some(FromClause {
4074                source: self.parse_table_source()?,
4075            });
4076            let joins = self.parse_joins()?;
4077            let stmt = Statement::Select(SelectStatement {
4078                comments: vec![],
4079                ctes: vec![],
4080                distinct: false,
4081                top: None,
4082                columns: vec![SelectItem::Wildcard],
4083                from,
4084                joins,
4085                where_clause: None,
4086                group_by: vec![],
4087                having: None,
4088                order_by: vec![],
4089                limit: None,
4090                offset: None,
4091                fetch_first: None,
4092                qualify: None,
4093                window_definitions: vec![],
4094            });
4095            InsertSource::Query(Box::new(stmt))
4096        } else if self
4097            .peek()
4098            .value
4099            .eq_ignore_ascii_case("FORMAT")
4100        {
4101            // ClickHouse `INSERT INTO t FORMAT name <raw payload>`.
4102            // Swallow the format name and the remainder of the statement
4103            // as opaque bytes; we cannot parse JSONEachRow / TabSeparated
4104            // payloads, but we should not reject the statement.
4105            self.advance();
4106            let _ = self.expect_name();
4107            while !matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon) {
4108                self.advance();
4109            }
4110            InsertSource::Default
4111        } else {
4112            return Err(SqlglotError::ParserError {
4113                message: "Expected VALUES, SELECT, or DEFAULT VALUES after INSERT".into(),
4114            });
4115        };
4116
4117        // MySQL 8.0.19+ row alias: `INSERT INTO t (cols) VALUES (...) AS
4118        // alias [(col_alias, ...)] ON DUPLICATE KEY UPDATE ...`. Swallow
4119        // the alias so the ON DUPLICATE clause parses.
4120        if self.peek_type() == &TokenType::As
4121            && self
4122                .peek_offset(1)
4123                .map(|t| matches!(
4124                    t.token_type,
4125                    TokenType::Identifier | TokenType::Key | TokenType::Year
4126                        | TokenType::Month | TokenType::Day | TokenType::Hour
4127                        | TokenType::Minute | TokenType::Second
4128                ) || t.value.chars().next().is_some_and(|c| c.is_alphabetic() || c == '_'))
4129                .unwrap_or(false)
4130        {
4131            self.advance(); // AS
4132            self.advance(); // alias name
4133            if self.match_token(TokenType::LParen) {
4134                let mut depth = 1_i32;
4135                while depth > 0 && !matches!(self.peek_type(), TokenType::Eof) {
4136                    match self.peek_type() {
4137                        TokenType::LParen => depth += 1,
4138                        TokenType::RParen => depth -= 1,
4139                        _ => {}
4140                    }
4141                    self.advance();
4142                }
4143            }
4144        }
4145
4146        // MySQL `ON DUPLICATE KEY UPDATE col=val, ...`. Swallow the clause.
4147        if self.peek_type() == &TokenType::On
4148            && self
4149                .peek_offset(1)
4150                .map(|t| t.value.eq_ignore_ascii_case("DUPLICATE"))
4151                .unwrap_or(false)
4152        {
4153            self.advance();
4154            self.advance();
4155            // KEY UPDATE
4156            if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("KEY") {
4157                self.advance();
4158            }
4159            if self.match_token(TokenType::Update) {
4160                // assignments until end-of-statement
4161                loop {
4162                    let _ = self.expect_name();
4163                    if !self.match_token(TokenType::Eq) {
4164                        break;
4165                    }
4166                    let _ = self.parse_expr();
4167                    if !self.match_token(TokenType::Comma) {
4168                        break;
4169                    }
4170                }
4171            }
4172        }
4173
4174        // ON CONFLICT
4175        let on_conflict = if self.match_token(TokenType::On) {
4176            if self.match_token(TokenType::Conflict) {
4177                let columns = if self.match_token(TokenType::LParen) {
4178                    self.parse_parenthesized_raw_items()?
4179                } else {
4180                    vec![]
4181                };
4182                self.expect(TokenType::Do)?;
4183                let action = if self.match_token(TokenType::Nothing) {
4184                    ConflictAction::DoNothing
4185                } else {
4186                    self.expect(TokenType::Update)?;
4187                    self.expect(TokenType::Set)?;
4188                    let mut assignments = Vec::new();
4189                    loop {
4190                        let col = self.expect_name()?;
4191                        self.expect(TokenType::Eq)?;
4192                        let val = self.parse_expr()?;
4193                        assignments.push((col, val));
4194                        if !self.match_token(TokenType::Comma) {
4195                            break;
4196                        }
4197                    }
4198                    ConflictAction::DoUpdate(assignments)
4199                };
4200                // Postgres / DuckDB allow `ON CONFLICT (...) DO UPDATE SET
4201                // ... WHERE predicate` to limit the update. Swallow the
4202                // predicate opaquely.
4203                if self.match_token(TokenType::Where) {
4204                    let _ = self.parse_expr()?;
4205                }
4206                Some(OnConflict { columns, action })
4207            } else {
4208                None
4209            }
4210        } else {
4211            None
4212        };
4213
4214        let returning = if self.match_token(TokenType::Returning) {
4215            self.parse_select_items()?
4216        } else {
4217            vec![]
4218        };
4219
4220        Ok(InsertStatement {
4221            comments: vec![],
4222            table,
4223            columns,
4224            source,
4225            on_conflict,
4226            returning,
4227        })
4228    }
4229
4230    // ── UPDATE ──────────────────────────────────────────────────────
4231
4232    fn parse_update(&mut self) -> Result<UpdateStatement> {
4233        self.expect(TokenType::Update)?;
4234        let table = self.parse_table_ref()?;
4235        // MySQL multi-table UPDATE: `UPDATE t1, t2 [, ...] SET ...`.
4236        // Swallow the additional table refs (we keep only the first as
4237        // the primary target).
4238        while self.match_token(TokenType::Comma) {
4239            let _ = self.parse_table_ref()?;
4240        }
4241        // PG SQL:2011 temporal `UPDATE t FOR PORTION OF col FROM a TO b
4242        // [AS alias] SET ...`. Swallow the qualifier verbatim.
4243        if self.check_keyword("FOR") && self.peek_offset(1).map(|t| t.value.eq_ignore_ascii_case("PORTION")).unwrap_or(false) {
4244            while !matches!(self.peek_type(), TokenType::Set | TokenType::Eof | TokenType::Semicolon) {
4245                self.advance();
4246            }
4247        }
4248        // MySQL `UPDATE t PARTITION (p0[, p1]) SET ...` — swallow.
4249        if matches!(self.peek_type(), TokenType::Partition)
4250            && matches!(self.peek_offset(1).map(|t| &t.token_type), Some(TokenType::LParen))
4251        {
4252            self.advance();
4253            self.advance();
4254            let mut depth = 1;
4255            while depth > 0 && !matches!(self.peek_type(), TokenType::Eof) {
4256                match self.peek_type() {
4257                    TokenType::LParen => depth += 1,
4258                    TokenType::RParen => {
4259                        depth -= 1;
4260                        if depth == 0 {
4261                            self.advance();
4262                            break;
4263                        }
4264                    }
4265                    _ => {}
4266                }
4267                self.advance();
4268            }
4269        }
4270        // MySQL multi-table UPDATE: `UPDATE t1 [LEFT|RIGHT|INNER|CROSS] JOIN
4271        // t2 ON ... SET ...`. Swallow the joins so the existing single-target
4272        // update parses; the joined tables are dropped from the AST.
4273        let _ = self.parse_joins();
4274        self.expect(TokenType::Set)?;
4275
4276        let mut assignments = Vec::new();
4277        loop {
4278            // Accept qualified LHS like `alias.col` (Oracle, T-SQL idiom),
4279            // and PG/Snowflake subscripts/field access on the LHS such as
4280            // `arr[1] = …`, `arr[1:3] = …`, `obj['k']`, `(a,b) = …`.
4281            // Accept LHS row-tuple `(a, b, c) = (rhs)` (PostgreSQL).
4282            if self.peek_type() == &TokenType::LParen {
4283                let saved = self.pos;
4284                self.advance();
4285                let mut depth = 1;
4286                while depth > 0 && self.peek_type() != &TokenType::Eof {
4287                    match self.peek_type() {
4288                        TokenType::LParen => depth += 1,
4289                        TokenType::RParen => depth -= 1,
4290                        _ => {}
4291                    }
4292                    self.advance();
4293                }
4294                if self.peek_type() == &TokenType::Eq {
4295                    self.advance();
4296                    let val = self.parse_expr()?;
4297                    assignments.push(("__tuple__".to_string(), val));
4298                    if !self.match_token(TokenType::Comma) {
4299                        break;
4300                    }
4301                    continue;
4302                }
4303                self.pos = saved;
4304            }
4305            let mut col = self.expect_name()?;
4306            while self.match_token(TokenType::Dot) {
4307                col.push('.');
4308                col.push_str(&self.expect_name()?);
4309            }
4310            // Swallow `[index]` / `[a:b]` subscripts in the LHS — we don't
4311            // model array-element assignment in the AST.
4312            while self.peek_type() == &TokenType::LBracket {
4313                self.advance();
4314                let mut depth = 1;
4315                while depth > 0 && self.peek_type() != &TokenType::Eof {
4316                    match self.peek_type() {
4317                        TokenType::LBracket => depth += 1,
4318                        TokenType::RBracket => depth -= 1,
4319                        _ => {}
4320                    }
4321                    self.advance();
4322                }
4323            }
4324            self.expect(TokenType::Eq)?;
4325            let val = self.parse_expr()?;
4326            assignments.push((col, val));
4327            if !self.match_token(TokenType::Comma) {
4328                break;
4329            }
4330        }
4331
4332        let from = if self.match_token(TokenType::From) {
4333            Some(FromClause {
4334                source: self.parse_table_source()?,
4335            })
4336        } else {
4337            None
4338        };
4339
4340        let where_clause = if self.match_token(TokenType::Where) {
4341            Some(self.parse_expr()?)
4342        } else {
4343            None
4344        };
4345
4346        // Teradata `PREFERRING <expr> [PARTITION BY <list>]` skyline
4347        // clause on UPDATE. Swallow up to a known terminator.
4348        if self.check_keyword("PREFERRING") {
4349            self.advance();
4350            loop {
4351                match self.peek_type() {
4352                    TokenType::Eof
4353                    | TokenType::Semicolon
4354                    | TokenType::RParen
4355                    | TokenType::Returning => break,
4356                    _ => self.advance(),
4357                };
4358            }
4359        }
4360
4361        // MySQL: `UPDATE … [ORDER BY …] [LIMIT N]`. Swallow.
4362        if self.match_token(TokenType::Order) {
4363            self.expect(TokenType::By)?;
4364            let _ = self.parse_order_by_items()?;
4365        }
4366        if self.match_token(TokenType::Limit) {
4367            let _ = self.parse_expr()?;
4368        }
4369
4370        let returning = if self.match_token(TokenType::Returning) {
4371            self.parse_select_items()?
4372        } else {
4373            vec![]
4374        };
4375
4376        Ok(UpdateStatement {
4377            comments: vec![],
4378            table,
4379            assignments,
4380            from,
4381            where_clause,
4382            returning,
4383        })
4384    }
4385
4386    // ── DELETE ──────────────────────────────────────────────────────
4387
4388    fn parse_delete(&mut self) -> Result<DeleteStatement> {
4389        self.expect(TokenType::Delete)?;
4390        // MySQL multi-table form: `DELETE t1[, t2, ...] FROM <join expr>`.
4391        // Swallow the leading table-alias list (we don't model it) before
4392        // the mandatory FROM.
4393        let mut multi_table = false;
4394        if !matches!(self.peek_type(), TokenType::From) {
4395            let saved = self.pos;
4396            if self.is_name_token() {
4397                self.advance();
4398                let _ = self.match_token(TokenType::Dot);
4399                if self.is_name_token() {
4400                    self.advance();
4401                }
4402                while self.match_token(TokenType::Comma) {
4403                    if !self.is_name_token() {
4404                        break;
4405                    }
4406                    self.advance();
4407                    let _ = self.match_token(TokenType::Dot);
4408                    if self.is_name_token() {
4409                        self.advance();
4410                    }
4411                }
4412                if matches!(self.peek_type(), TokenType::From) {
4413                    multi_table = true;
4414                } else {
4415                    self.pos = saved;
4416                }
4417            }
4418        }
4419        // BigQuery / some Snowflake forms allow `DELETE <table> WHERE …`
4420        // (FROM optional). If FROM is missing but the next token starts a
4421        // table-ref, treat it as the implicit FROM target.
4422        let from_optional = !matches!(self.peek_type(), TokenType::From);
4423        if !from_optional {
4424            self.expect(TokenType::From)?;
4425        }
4426        let table = self.parse_table_ref()?;
4427        // MySQL: `DELETE FROM t PARTITION (p0[, p1, ...])` — swallow
4428        // partition selector.
4429        if matches!(self.peek_type(), TokenType::Partition)
4430            && matches!(self.peek_offset(1).map(|t| &t.token_type), Some(TokenType::LParen))
4431        {
4432            self.advance();
4433            self.advance();
4434            let mut depth = 1;
4435            while depth > 0 && !matches!(self.peek_type(), TokenType::Eof) {
4436                match self.peek_type() {
4437                    TokenType::LParen => depth += 1,
4438                    TokenType::RParen => {
4439                        depth -= 1;
4440                        if depth == 0 {
4441                            self.advance();
4442                            break;
4443                        }
4444                    }
4445                    _ => {}
4446                }
4447                self.advance();
4448            }
4449        }
4450        if multi_table {
4451            // Swallow JOIN clauses, additional comma-joined tables, and
4452            // any opaque tail up to USING / WHERE / RETURNING / ; / EOF.
4453            loop {
4454                if matches!(
4455                    self.peek_type(),
4456                    TokenType::Where
4457                        | TokenType::Using
4458                        | TokenType::Returning
4459                        | TokenType::Semicolon
4460                        | TokenType::Eof
4461                ) {
4462                    break;
4463                }
4464                self.advance();
4465            }
4466        }
4467
4468        let using = if self.match_token(TokenType::Using) {
4469            Some(FromClause {
4470                source: self.parse_table_source()?,
4471            })
4472        } else {
4473            None
4474        };
4475
4476        // Teradata `PREFERRING <expr> [PARTITION BY <list>]` skyline
4477        // clause on DELETE.
4478        if self.check_keyword("PREFERRING") {
4479            self.advance();
4480            loop {
4481                match self.peek_type() {
4482                    TokenType::Eof
4483                    | TokenType::Semicolon
4484                    | TokenType::Where
4485                    | TokenType::Returning
4486                    | TokenType::RParen => break,
4487                    _ => self.advance(),
4488                };
4489            }
4490        }
4491
4492        let where_clause = if self.match_token(TokenType::Where) {
4493            Some(self.parse_expr()?)
4494        } else {
4495            None
4496        };
4497
4498        // MySQL: `DELETE FROM tbl [WHERE ...] [ORDER BY ...] [LIMIT N]`.
4499        // Swallow ORDER BY and LIMIT modifiers — we don't model them on
4500        // DeleteStatement yet.
4501        if self.match_token(TokenType::Order) {
4502            self.expect(TokenType::By)?;
4503            let _ = self.parse_order_by_items()?;
4504        }
4505        if self.match_token(TokenType::Limit) {
4506            let _ = self.parse_expr()?;
4507        }
4508
4509        let returning = if self.match_token(TokenType::Returning) {
4510            self.parse_select_items()?
4511        } else {
4512            vec![]
4513        };
4514
4515        Ok(DeleteStatement {
4516            comments: vec![],
4517            table,
4518            using,
4519            where_clause,
4520            returning,
4521        })
4522    }
4523
4524    // ── MERGE ───────────────────────────────────────────────────────
4525
4526    fn parse_merge(&mut self) -> Result<MergeStatement> {
4527        self.expect(TokenType::Merge)?;
4528        let _ = self.match_token(TokenType::Into);
4529        let target = self.parse_table_ref()?;
4530
4531        self.expect(TokenType::Using)?;
4532        let source = self.parse_table_source()?;
4533
4534        // DuckDB supports `MERGE INTO t USING src USING (cols)` as a
4535        // shorthand for the ON condition (column-equality join, akin to
4536        // SQL USING for JOINs). Swallow the column list opaquely and
4537        // synthesize a trivial truthy ON expression so downstream parsing
4538        // continues. We don't model USING-style MERGE in the AST yet.
4539        let on = if self.match_token(TokenType::Using) {
4540            self.expect(TokenType::LParen)?;
4541            let _ = self.expect_name()?;
4542            while self.match_token(TokenType::Comma) {
4543                let _ = self.expect_name()?;
4544            }
4545            self.expect(TokenType::RParen)?;
4546            Expr::Boolean(true)
4547        } else {
4548            self.expect(TokenType::On)?;
4549            self.parse_expr()?
4550        };
4551
4552        let mut clauses = Vec::new();
4553        while self.match_token(TokenType::When) {
4554            clauses.push(self.parse_merge_clause()?);
4555        }
4556
4557        if clauses.is_empty() {
4558            return Err(SqlglotError::ParserError {
4559                message: "MERGE requires at least one WHEN clause".into(),
4560            });
4561        }
4562
4563        // OUTPUT clause (T-SQL extension)
4564        let output = if self.match_keyword("OUTPUT") {
4565            self.parse_select_items()?
4566        } else {
4567            vec![]
4568        };
4569
4570        // PostgreSQL: `MERGE … RETURNING <select_list>`. We don't yet model
4571        // RETURNING for MERGE, so swallow the items and discard them.
4572        if self.match_token(TokenType::Returning) {
4573            let _ = self.parse_select_items()?;
4574        }
4575
4576        Ok(MergeStatement {
4577            comments: vec![],
4578            target,
4579            source,
4580            on,
4581            clauses,
4582            output,
4583        })
4584    }
4585
4586    fn parse_merge_clause(&mut self) -> Result<MergeClause> {
4587        let kind = if self.match_token(TokenType::Not) {
4588            self.expect(TokenType::Matched)?;
4589            if self.match_keyword("BY") {
4590                if self.match_keyword("SOURCE") {
4591                    MergeClauseKind::NotMatchedBySource
4592                } else {
4593                    // BY TARGET is the default / explicit form
4594                    let _ = self.match_keyword("TARGET");
4595                    MergeClauseKind::NotMatched
4596                }
4597            } else {
4598                MergeClauseKind::NotMatched
4599            }
4600        } else {
4601            self.expect(TokenType::Matched)?;
4602            MergeClauseKind::Matched
4603        };
4604
4605        let condition = if self.match_token(TokenType::And) {
4606            Some(self.parse_expr()?)
4607        } else {
4608            None
4609        };
4610
4611        self.expect(TokenType::Then)?;
4612
4613        let action = self.parse_merge_action(&kind)?;
4614
4615        Ok(MergeClause {
4616            kind,
4617            condition,
4618            action,
4619        })
4620    }
4621
4622    fn parse_merge_action(&mut self, kind: &MergeClauseKind) -> Result<MergeAction> {
4623        if self.match_token(TokenType::Update) {
4624            self.expect(TokenType::Set)?;
4625            let mut assignments = Vec::new();
4626            loop {
4627                let mut col = self.expect_name()?;
4628                // Support dotted column names like target.col
4629                while self.match_token(TokenType::Dot) {
4630                    col.push('.');
4631                    col.push_str(&self.expect_name()?);
4632                }
4633                self.expect(TokenType::Eq)?;
4634                let val = self.parse_expr()?;
4635                assignments.push((col, val));
4636                if !self.match_token(TokenType::Comma) {
4637                    break;
4638                }
4639            }
4640            Ok(MergeAction::Update(assignments))
4641        } else if self.match_token(TokenType::Insert) {
4642            // INSERT ROW (BigQuery)
4643            if self.match_keyword("ROW") {
4644                return Ok(MergeAction::InsertRow);
4645            }
4646
4647            let columns = if self.match_token(TokenType::LParen) {
4648                let mut cols = vec![self.expect_name()?];
4649                while self.match_token(TokenType::Comma) {
4650                    cols.push(self.expect_name()?);
4651                }
4652                self.expect(TokenType::RParen)?;
4653                cols
4654            } else {
4655                vec![]
4656            };
4657
4658            self.expect(TokenType::Values)?;
4659            self.expect(TokenType::LParen)?;
4660            let values = self.parse_expr_list()?;
4661            self.expect(TokenType::RParen)?;
4662
4663            Ok(MergeAction::Insert { columns, values })
4664        } else if self.match_token(TokenType::Delete) {
4665            Ok(MergeAction::Delete)
4666        } else {
4667            Err(SqlglotError::ParserError {
4668                message: format!(
4669                    "Expected UPDATE, INSERT, or DELETE after WHEN {} THEN",
4670                    match kind {
4671                        MergeClauseKind::Matched => "MATCHED",
4672                        MergeClauseKind::NotMatched => "NOT MATCHED",
4673                        MergeClauseKind::NotMatchedBySource => "NOT MATCHED BY SOURCE",
4674                    }
4675                ),
4676            })
4677        }
4678    }
4679
4680    // ── CREATE ──────────────────────────────────────────────────────
4681
4682    fn parse_create(&mut self) -> Result<Statement> {
4683        self.expect(TokenType::Create)?;
4684
4685        let or_replace = if self.check_keyword("OR") {
4686            self.advance();
4687            self.expect(TokenType::Replace)?;
4688            true
4689        } else {
4690            false
4691        };
4692
4693        let temporary = self.match_token(TokenType::Temporary) || self.match_token(TokenType::Temp);
4694
4695        let materialized = self.match_token(TokenType::Materialized);
4696
4697        if self.match_token(TokenType::View) {
4698            return self
4699                .parse_create_view(or_replace, materialized)
4700                .map(Statement::CreateView);
4701        }
4702
4703        self.expect(TokenType::Table)?;
4704
4705        let if_not_exists = if self.match_token(TokenType::If) {
4706            self.expect(TokenType::Not)?;
4707            self.expect(TokenType::Exists)?;
4708            true
4709        } else {
4710            false
4711        };
4712
4713        let table = self.parse_table_ref_no_alias()?;
4714
4715        // CREATE TABLE ... AS SELECT ...
4716        if self.match_token(TokenType::As) {
4717            let query = self.parse_statement_inner()?;
4718            // Greenplum / Citus / etc. trailing `DISTRIBUTED BY (...)` /
4719            // `DISTRIBUTED RANDOMLY` / `DISTRIBUTED REPLICATED`. Swallow.
4720            if self.check_keyword("DISTRIBUTED") {
4721                self.advance();
4722                if self.check_keyword("BY") || matches!(self.peek_type(), TokenType::By) {
4723                    self.advance();
4724                    if self.match_token(TokenType::LParen) {
4725                        let mut depth = 1;
4726                        while depth > 0 {
4727                            match self.peek_type() {
4728                                TokenType::LParen => depth += 1,
4729                                TokenType::RParen => {
4730                                    depth -= 1;
4731                                    if depth == 0 {
4732                                        self.advance();
4733                                        break;
4734                                    }
4735                                }
4736                                TokenType::Eof => break,
4737                                _ => {}
4738                            }
4739                            self.advance();
4740                        }
4741                    }
4742                } else if self.is_name_token() {
4743                    // RANDOMLY / REPLICATED — single keyword
4744                    self.advance();
4745                }
4746            }
4747            return Ok(Statement::CreateTable(CreateTableStatement {
4748                comments: vec![],
4749                if_not_exists,
4750                temporary,
4751                table,
4752                columns: vec![],
4753                constraints: vec![],
4754                as_select: Some(Box::new(query)),
4755            }));
4756        }
4757
4758        self.expect(TokenType::LParen)?;
4759
4760        let mut columns = Vec::new();
4761        let mut constraints = Vec::new();
4762
4763        loop {
4764            // Check for table-level constraints
4765            if matches!(
4766                self.peek_type(),
4767                TokenType::Primary
4768                    | TokenType::Unique
4769                    | TokenType::Foreign
4770                    | TokenType::Check
4771                    | TokenType::Constraint
4772            ) {
4773                constraints.push(self.parse_table_constraint()?);
4774            } else if self.peek_type() != &TokenType::RParen {
4775                columns.push(self.parse_column_def()?);
4776            }
4777
4778            if !self.match_token(TokenType::Comma) {
4779                break;
4780            }
4781        }
4782        self.expect(TokenType::RParen)?;
4783
4784        // Tolerate dialect-specific trailing clauses (ClickHouse `ENGINE = X`,
4785        // `ORDER BY (...)`, `PARTITION BY ...`, `SETTINGS ...`, MySQL
4786        // `ENGINE=InnoDB DEFAULT CHARSET=utf8`, etc.) by consuming tokens
4787        // until the next statement boundary. Respects paren depth so a
4788        // top-level `;` inside `ORDER BY (a, b)` is not mistaken for end.
4789        self.skip_trailing_options();
4790
4791        Ok(Statement::CreateTable(CreateTableStatement {
4792            comments: vec![],
4793            if_not_exists,
4794            temporary,
4795            table,
4796            columns,
4797            constraints,
4798            as_select: None,
4799        }))
4800    }
4801
4802    /// Discard tokens up to (but not including) a top-level `;` or EOF.
4803    /// Used to skip dialect-specific tail clauses we don't model in the AST
4804    /// (CREATE TABLE engines, options, etc.).
4805    fn skip_trailing_options(&mut self) {
4806        let mut depth: i32 = 0;
4807        loop {
4808            match self.peek_type() {
4809                TokenType::Eof => break,
4810                TokenType::Semicolon if depth == 0 => break,
4811                TokenType::LParen => {
4812                    depth += 1;
4813                    self.advance();
4814                }
4815                TokenType::RParen => {
4816                    depth -= 1;
4817                    if depth < 0 {
4818                        break;
4819                    }
4820                    self.advance();
4821                }
4822                _ => {
4823                    self.advance();
4824                }
4825            }
4826        }
4827    }
4828
4829    fn parse_create_view(
4830        &mut self,
4831        or_replace: bool,
4832        materialized: bool,
4833    ) -> Result<CreateViewStatement> {
4834        let if_not_exists = if self.match_token(TokenType::If) {
4835            self.expect(TokenType::Not)?;
4836            self.expect(TokenType::Exists)?;
4837            true
4838        } else {
4839            false
4840        };
4841
4842        // Parse name without alias (so AS is not consumed as an alias)
4843        let name = self.parse_table_ref_no_alias()?;
4844
4845        let columns = if self.match_token(TokenType::LParen) {
4846            let mut cols = vec![self.expect_name()?];
4847            while self.match_token(TokenType::Comma) {
4848                cols.push(self.expect_name()?);
4849            }
4850            self.expect(TokenType::RParen)?;
4851            cols
4852        } else {
4853            vec![]
4854        };
4855
4856        self.expect(TokenType::As)?;
4857        let query = self.parse_statement_inner()?;
4858
4859        Ok(CreateViewStatement {
4860            comments: vec![],
4861            name,
4862            columns,
4863            query: Box::new(query),
4864            or_replace,
4865            materialized,
4866            if_not_exists,
4867        })
4868    }
4869
4870    fn parse_table_constraint(&mut self) -> Result<TableConstraint> {
4871        let name = if self.match_token(TokenType::Constraint) {
4872            Some(self.expect_name()?)
4873        } else {
4874            None
4875        };
4876
4877        if self.match_token(TokenType::Primary) {
4878            self.expect(TokenType::Key)?;
4879            self.expect(TokenType::LParen)?;
4880            let columns = self.parse_name_list()?;
4881            self.expect(TokenType::RParen)?;
4882            // TiDB / MySQL: `PRIMARY KEY (cols) GLOBAL|LOCAL` index scope
4883            // modifier and `USING BTREE|HASH` index-type modifier.
4884            if self.is_name_token()
4885                && matches!(
4886                    self.peek().value.to_uppercase().as_str(),
4887                    "GLOBAL" | "LOCAL"
4888                )
4889            {
4890                self.advance();
4891            }
4892            if self.match_token(TokenType::Using) && self.is_name_token() {
4893                self.advance();
4894            }
4895            self.swallow_constraint_modifiers();
4896            Ok(TableConstraint::PrimaryKey { name, columns })
4897        } else if self.match_token(TokenType::Unique) {
4898            let _ = self.match_token(TokenType::Index) || self.match_token(TokenType::Key);
4899            // Optional index name before `(`.
4900            if !matches!(self.peek_type(), TokenType::LParen) && self.is_name_token() {
4901                self.advance();
4902            }
4903            self.expect(TokenType::LParen)?;
4904            let columns = self.parse_name_list()?;
4905            self.expect(TokenType::RParen)?;
4906            if self.is_name_token()
4907                && matches!(
4908                    self.peek().value.to_uppercase().as_str(),
4909                    "GLOBAL" | "LOCAL"
4910                )
4911            {
4912                self.advance();
4913            }
4914            if self.match_token(TokenType::Using) && self.is_name_token() {
4915                self.advance();
4916            }
4917            self.swallow_constraint_modifiers();
4918            Ok(TableConstraint::Unique { name, columns })
4919        } else if self.match_token(TokenType::Foreign) {
4920            self.expect(TokenType::Key)?;
4921            self.expect(TokenType::LParen)?;
4922            let columns = self.parse_name_list()?;
4923            self.expect(TokenType::RParen)?;
4924            self.expect(TokenType::References)?;
4925            let ref_table = self.parse_table_ref()?;
4926            self.expect(TokenType::LParen)?;
4927            let ref_columns = self.parse_name_list()?;
4928            self.expect(TokenType::RParen)?;
4929
4930            // PG / ANSI `MATCH FULL | PARTIAL | SIMPLE` clause — swallow.
4931            if self.check_keyword("MATCH") {
4932                self.advance();
4933                if self.is_name_token() {
4934                    self.advance();
4935                }
4936            }
4937
4938            let mut on_delete = None;
4939            let mut on_update = None;
4940            // Accept ON DELETE / ON UPDATE clauses in any order. Match the
4941            // ON keyword only when the following token is DELETE / UPDATE
4942            // so a misplaced ON UPDATE doesn't consume the bare ON token
4943            // and orphan the rest of the action list.
4944            while self.peek_type() == &TokenType::On {
4945                let next = self.peek_offset(1).map(|t| &t.token_type);
4946                if matches!(next, Some(TokenType::Delete)) {
4947                    self.advance();
4948                    self.advance();
4949                    on_delete = Some(self.parse_referential_action()?);
4950                } else if matches!(next, Some(TokenType::Update)) {
4951                    self.advance();
4952                    self.advance();
4953                    on_update = Some(self.parse_referential_action()?);
4954                } else {
4955                    break;
4956                }
4957            }
4958
4959            self.swallow_constraint_modifiers();
4960            Ok(TableConstraint::ForeignKey {
4961                name,
4962                columns,
4963                ref_table,
4964                ref_columns,
4965                on_delete,
4966                on_update,
4967            })
4968        } else if self.match_token(TokenType::Check) {
4969            self.expect(TokenType::LParen)?;
4970            let expr = self.parse_expr()?;
4971            self.expect(TokenType::RParen)?;
4972            self.swallow_constraint_modifiers();
4973            Ok(TableConstraint::Check { name, expr })
4974        } else {
4975            Err(SqlglotError::ParserError {
4976                message: "Expected constraint type".into(),
4977            })
4978        }
4979    }
4980
4981    /// Swallow trailing constraint modifiers shared by FK / CHECK / PK /
4982    /// UNIQUE: `NOT VALID`, `[NOT] ENFORCED`, `DEFERRABLE`, `NOT DEFERRABLE`,
4983    /// `INITIALLY DEFERRED | IMMEDIATE`, `NO INHERIT`. Best-effort — we
4984    /// don't model them in the AST.
4985    fn swallow_constraint_modifiers(&mut self) {
4986        loop {
4987            if self.check_keyword("NOT")
4988                && self
4989                    .peek_offset(1)
4990                    .map(|t| t.value.to_uppercase())
4991                    .as_deref()
4992                    .is_some_and(|v| matches!(v, "VALID" | "ENFORCED" | "DEFERRABLE"))
4993            {
4994                self.advance();
4995                self.advance();
4996                continue;
4997            }
4998            if self.check_keyword("ENFORCED")
4999                || self.check_keyword("DEFERRABLE")
5000                || self.check_keyword("CLUSTERED")
5001                || self.check_keyword("NONCLUSTERED")
5002                || self.check_keyword("INVISIBLE")
5003                || self.check_keyword("VISIBLE")
5004            {
5005                self.advance();
5006                continue;
5007            }
5008            if self.check_keyword("INITIALLY") {
5009                self.advance();
5010                if self.is_name_token() {
5011                    self.advance();
5012                }
5013                continue;
5014            }
5015            if self.check_keyword("NO")
5016                && self
5017                    .peek_offset(1)
5018                    .map(|t| t.value.eq_ignore_ascii_case("INHERIT"))
5019                    .unwrap_or(false)
5020            {
5021                self.advance();
5022                self.advance();
5023                continue;
5024            }
5025            break;
5026        }
5027    }
5028
5029    fn parse_referential_action(&mut self) -> Result<ReferentialAction> {
5030        if self.match_token(TokenType::Cascade) {
5031            Ok(ReferentialAction::Cascade)
5032        } else if self.match_token(TokenType::Restrict) {
5033            Ok(ReferentialAction::Restrict)
5034        } else if self.match_token(TokenType::Set) {
5035            if self.match_token(TokenType::Null) {
5036                Ok(ReferentialAction::SetNull)
5037            } else if self.match_token(TokenType::Default) {
5038                Ok(ReferentialAction::SetDefault)
5039            } else {
5040                Err(SqlglotError::ParserError {
5041                    message: "Expected NULL or DEFAULT after SET".into(),
5042                })
5043            }
5044        } else if self.check_keyword("NO") {
5045            self.advance();
5046            self.expect(TokenType::Identifier)?; // ACTION
5047            Ok(ReferentialAction::NoAction)
5048        } else {
5049            Err(SqlglotError::ParserError {
5050                message: "Expected referential action (CASCADE, RESTRICT, SET NULL, SET DEFAULT, NO ACTION)".into(),
5051            })
5052        }
5053    }
5054
5055    fn parse_name_list(&mut self) -> Result<Vec<String>> {
5056        let mut names = vec![self.expect_name()?];
5057        while self.match_token(TokenType::Comma) {
5058            names.push(self.expect_name()?);
5059        }
5060        Ok(names)
5061    }
5062
5063    /// Parse a dotted column reference for INSERT column lists:
5064    /// `name` or `parent.child` (ClickHouse nested columns).
5065    fn parse_dotted_name(&mut self) -> Result<String> {
5066        let mut name = self.expect_name()?;
5067        while self.peek_type() == &TokenType::Dot {
5068            let next = self.peek_offset(1).map(|t| t.token_type.clone());
5069            let next_is_namelike = matches!(
5070                next,
5071                Some(TokenType::Identifier)
5072                    | Some(TokenType::Star)
5073                    | Some(TokenType::Int)
5074                    | Some(TokenType::BigInt)
5075                    | Some(TokenType::Text)
5076                    | Some(TokenType::Date)
5077                    | Some(TokenType::Timestamp)
5078            );
5079            if !next_is_namelike {
5080                break;
5081            }
5082            self.advance(); // .
5083            if self.peek_type() == &TokenType::Star {
5084                name.push('.');
5085                name.push('*');
5086                self.advance();
5087                break;
5088            }
5089            let part = self.expect_name()?;
5090            name.push('.');
5091            name.push_str(&part);
5092        }
5093        Ok(name)
5094    }
5095
5096    fn parse_column_def(&mut self) -> Result<ColumnDef> {
5097        let name = self.expect_name()?;
5098        let data_type = self.parse_data_type()?;
5099
5100        let mut nullable = None;
5101        let mut default = None;
5102        let mut primary_key = false;
5103        let mut unique = false;
5104        let mut auto_increment = false;
5105        let mut collation = None;
5106        let mut comment = None;
5107
5108        loop {
5109            if self.match_token(TokenType::Not) {
5110                self.expect(TokenType::Null)?;
5111                nullable = Some(false);
5112            } else if self.peek_type() == &TokenType::Null {
5113                self.advance();
5114                nullable = Some(true);
5115            } else if self.peek_type() == &TokenType::As
5116                && matches!(
5117                    self.peek_offset(1).map(|t| &t.token_type),
5118                    Some(TokenType::LParen)
5119                )
5120            {
5121                // SQLite / MySQL generated-column shorthand:
5122                //   `col TYPE AS (expr) [STORED|VIRTUAL|PERSISTENT]`.
5123                // Swallow AS, the parenthesised expression (depth-balanced),
5124                // and the optional storage-kind keyword.
5125                self.advance(); // AS
5126                self.advance(); // (
5127                let mut depth: i32 = 1;
5128                while depth > 0 {
5129                    match self.peek_type() {
5130                        TokenType::LParen => {
5131                            depth += 1;
5132                            self.advance();
5133                        }
5134                        TokenType::RParen => {
5135                            depth -= 1;
5136                            self.advance();
5137                        }
5138                        TokenType::Eof => break,
5139                        _ => {
5140                            self.advance();
5141                        }
5142                    }
5143                }
5144                if self.is_name_token()
5145                    && matches!(
5146                        self.peek().value.to_uppercase().as_str(),
5147                        "STORED" | "VIRTUAL" | "PERSISTENT" | "PERSISTED"
5148                    )
5149                {
5150                    self.advance();
5151                }
5152            } else if self.match_token(TokenType::Default) {
5153                // SQL Server / IBM `DEFAULT NEXT VALUE FOR seq[.qual]`.
5154                if self.is_name_token()
5155                    && self.peek().value.eq_ignore_ascii_case("NEXT")
5156                    && self
5157                        .peek_offset(1)
5158                        .map(|t| t.value.eq_ignore_ascii_case("VALUE"))
5159                        .unwrap_or(false)
5160                    && self
5161                        .peek_offset(2)
5162                        .map(|t| t.value.eq_ignore_ascii_case("FOR"))
5163                        .unwrap_or(false)
5164                {
5165                    self.advance();
5166                    self.advance();
5167                    self.advance();
5168                    let mut seq = self.expect_name()?;
5169                    while self.match_token(TokenType::Dot) {
5170                        seq.push('.');
5171                        seq.push_str(&self.expect_name()?);
5172                    }
5173                    default = Some(Expr::Function {
5174                        name: "NEXT_VALUE_FOR".to_string(),
5175                        args: vec![Expr::Column {
5176                            table: None,
5177                            name: seq,
5178                            quote_style: QuoteStyle::None,
5179                            table_quote_style: QuoteStyle::None,
5180                        }],
5181                        distinct: false,
5182                        filter: None,
5183                        over: None,
5184                        order_by: Vec::new(),
5185                        within_group: false,
5186                    });
5187                } else {
5188                    default = Some(self.parse_expr()?);
5189                }
5190            } else if self.match_token(TokenType::Primary) {
5191                self.expect(TokenType::Key)?;
5192                primary_key = true;
5193            } else if self.match_token(TokenType::Unique) {
5194                unique = true;
5195            } else if self.match_token(TokenType::AutoIncrement) {
5196                auto_increment = true;
5197            } else if self.match_token(TokenType::Collate) {
5198                collation = Some(self.expect_name()?);
5199            } else if self.match_token(TokenType::Comment) {
5200                let tok = self.expect(TokenType::String)?;
5201                comment = Some(tok.value);
5202            } else if self.match_token(TokenType::References) {
5203                // Inline foreign key — skip for now
5204                let _ = self.parse_table_ref()?;
5205                if self.match_token(TokenType::LParen) {
5206                    while !self.match_token(TokenType::RParen) {
5207                        self.advance();
5208                    }
5209                }
5210            } else if self.is_name_token()
5211                && self.peek().value.eq_ignore_ascii_case("GENERATED")
5212            {
5213                // SQL:2003 / MySQL / PG / SQL Server identity / computed
5214                // column: `GENERATED ALWAYS AS (expr) [VIRTUAL|STORED]`,
5215                // `GENERATED ALWAYS AS IDENTITY [(...)]`,
5216                // `GENERATED BY DEFAULT AS IDENTITY [(...)]`. Swallow up
5217                // through the trailing parenthesised body if present and
5218                // let the next loop iteration pick up VIRTUAL/STORED.
5219                self.advance();
5220                if self.is_name_token()
5221                    && (self.peek().value.eq_ignore_ascii_case("ALWAYS")
5222                        || self.peek().value.eq_ignore_ascii_case("BY"))
5223                {
5224                    self.advance();
5225                    if self.is_name_token()
5226                        && self.peek().value.eq_ignore_ascii_case("DEFAULT")
5227                    {
5228                        self.advance();
5229                    }
5230                }
5231                if self.match_token(TokenType::As) {
5232                    if self.is_name_token()
5233                        && self.peek().value.eq_ignore_ascii_case("IDENTITY")
5234                    {
5235                        self.advance();
5236                    } else if self.is_name_token()
5237                        && self.peek().value.eq_ignore_ascii_case("ROW")
5238                    {
5239                        // SQL Server `GENERATED AS ROW START | END`.
5240                        self.advance();
5241                        if self.is_name_token() {
5242                            self.advance();
5243                        }
5244                    }
5245                }
5246                if self.peek_type() == &TokenType::LParen {
5247                    let mut depth = 0_i32;
5248                    self.advance();
5249                    depth += 1;
5250                    while depth > 0 {
5251                        match self.peek_type() {
5252                            TokenType::LParen => depth += 1,
5253                            TokenType::RParen => {
5254                                depth -= 1;
5255                                if depth == 0 {
5256                                    self.advance();
5257                                    break;
5258                                }
5259                            }
5260                            TokenType::Eof => break,
5261                            _ => {}
5262                        }
5263                        self.advance();
5264                    }
5265                }
5266            } else if self.is_name_token()
5267                && matches!(
5268                    self.peek().value.to_uppercase().as_str(),
5269                    "CODEC"
5270                        | "TTL"
5271                        | "MATERIALIZED"
5272                        | "ALIAS"
5273                        | "EPHEMERAL"
5274                        | "PERSISTED"
5275                        | "PERSISTENT"
5276                        | "VIRTUAL"
5277                        | "STORED"
5278                        | "ENCODE"
5279                        | "ENCRYPT"
5280                        | "MASKED"
5281                        | "INVISIBLE"
5282                        | "VISIBLE"
5283                        | "ENFORCED"
5284                        | "OPTIONS"
5285                        | "COMPRESSION"
5286                        | "SORTKEY"
5287                        | "DISTKEY"
5288                        | "CHARSET"
5289                        | "CHARACTER"
5290                        | "SRID"
5291                        | "FORMAT"
5292                        | "TAG"
5293                        | "MASKING"
5294                )
5295            {
5296                // ClickHouse / Snowflake / Redshift column modifiers. Consume
5297                // the keyword and the optional parenthesised body (`CODEC(...)`,
5298                // `TTL expr`, etc.) so the rest of the column def parses.
5299                self.advance();
5300                if self.peek_type() == &TokenType::LParen {
5301                    let mut depth = 0_i32;
5302                    self.advance();
5303                    depth += 1;
5304                    while depth > 0 {
5305                        match self.peek_type() {
5306                            TokenType::LParen => depth += 1,
5307                            TokenType::RParen => {
5308                                depth -= 1;
5309                                if depth == 0 {
5310                                    self.advance();
5311                                    break;
5312                                }
5313                            }
5314                            TokenType::Eof => break,
5315                            _ => {}
5316                        }
5317                        self.advance();
5318                    }
5319                } else {
5320                    // Best-effort: swallow an expression up to comma /
5321                    // top-level RParen / column-def boundary, balancing
5322                    // nested parens (e.g. `TTL toDate('2000-01-02')`,
5323                    // `ALIAS arrayResize(emptyArrayUInt32(), length(\`Arr.C2\`))`).
5324                    let mut depth: i32 = 0;
5325                    loop {
5326                        match self.peek_type() {
5327                            TokenType::LParen => {
5328                                depth += 1;
5329                                self.advance();
5330                            }
5331                            TokenType::RParen => {
5332                                if depth == 0 {
5333                                    break;
5334                                }
5335                                depth -= 1;
5336                                self.advance();
5337                            }
5338                            TokenType::Comma if depth == 0 => break,
5339                            TokenType::Eof => break,
5340                            _ => {
5341                                self.advance();
5342                            }
5343                        }
5344                    }
5345                }
5346            } else {
5347                break;
5348            }
5349        }
5350
5351        Ok(ColumnDef {
5352            name,
5353            data_type,
5354            nullable,
5355            default,
5356            primary_key,
5357            unique,
5358            auto_increment,
5359            collation,
5360            comment,
5361        })
5362    }
5363
5364    fn parse_data_type(&mut self) -> Result<DataType> {
5365        let token = self.peek().clone();
5366        // DuckDB / Spark template syntax: `${var}` (or `?` placeholder) used
5367        // where a data type is expected. Lower to `Unknown(name)` so the
5368        // surrounding expression parses.
5369        if matches!(token.token_type, TokenType::Parameter) {
5370            self.advance();
5371            return Ok(DataType::Unknown(token.value));
5372        }
5373        let type_result = match &token.token_type {
5374            TokenType::Int | TokenType::Integer => {
5375                self.advance();
5376                Ok(DataType::Int)
5377            }
5378            TokenType::BigInt => {
5379                self.advance();
5380                Ok(DataType::BigInt)
5381            }
5382            TokenType::SmallInt => {
5383                self.advance();
5384                Ok(DataType::SmallInt)
5385            }
5386            TokenType::TinyInt => {
5387                self.advance();
5388                Ok(DataType::TinyInt)
5389            }
5390            TokenType::Float => {
5391                self.advance();
5392                Ok(DataType::Float)
5393            }
5394            TokenType::Double => {
5395                self.advance();
5396                let _ = self.match_keyword("PRECISION");
5397                Ok(DataType::Double)
5398            }
5399            TokenType::Real => {
5400                self.advance();
5401                Ok(DataType::Real)
5402            }
5403            TokenType::Decimal | TokenType::Numeric => {
5404                let is_numeric = token.token_type == TokenType::Numeric;
5405                self.advance();
5406                let (precision, scale) = self.parse_type_params()?;
5407                if is_numeric {
5408                    Ok(DataType::Numeric { precision, scale })
5409                } else {
5410                    Ok(DataType::Decimal { precision, scale })
5411                }
5412            }
5413            TokenType::Varchar => {
5414                self.advance();
5415                let len = self.parse_single_type_param()?;
5416                Ok(DataType::Varchar(len))
5417            }
5418            TokenType::Char => {
5419                self.advance();
5420                let len = self.parse_single_type_param()?;
5421                Ok(DataType::Char(len))
5422            }
5423            TokenType::Text => {
5424                self.advance();
5425                Ok(DataType::Text)
5426            }
5427            TokenType::Boolean => {
5428                self.advance();
5429                Ok(DataType::Boolean)
5430            }
5431            TokenType::Date => {
5432                self.advance();
5433                Ok(DataType::Date)
5434            }
5435            TokenType::Timestamp => {
5436                self.advance();
5437                let precision = self.parse_single_type_param()?;
5438                let with_tz = if self.match_keyword("WITH") {
5439                    let _ = self.match_keyword("LOCAL");
5440                    let _ = self.match_keyword("TIME");
5441                    let _ = self.match_keyword("ZONE");
5442                    true
5443                } else if self.match_keyword("WITHOUT") {
5444                    let _ = self.match_keyword("TIME");
5445                    let _ = self.match_keyword("ZONE");
5446                    false
5447                } else {
5448                    false
5449                };
5450                Ok(DataType::Timestamp { precision, with_tz })
5451            }
5452            TokenType::TimestampTz => {
5453                self.advance();
5454                let precision = self.parse_single_type_param()?;
5455                Ok(DataType::Timestamp {
5456                    precision,
5457                    with_tz: true,
5458                })
5459            }
5460            TokenType::Time => {
5461                self.advance();
5462                let precision = self.parse_single_type_param()?;
5463                Ok(DataType::Time { precision })
5464            }
5465            TokenType::Interval => {
5466                self.advance();
5467                Ok(DataType::Interval)
5468            }
5469            TokenType::Blob => {
5470                self.advance();
5471                Ok(DataType::Blob)
5472            }
5473            TokenType::Bytea => {
5474                self.advance();
5475                Ok(DataType::Bytea)
5476            }
5477            TokenType::Json => {
5478                self.advance();
5479                Ok(DataType::Json)
5480            }
5481            TokenType::Jsonb => {
5482                self.advance();
5483                Ok(DataType::Jsonb)
5484            }
5485            TokenType::Uuid => {
5486                self.advance();
5487                Ok(DataType::Uuid)
5488            }
5489            TokenType::Array => {
5490                self.advance();
5491                if self.match_token(TokenType::Lt) {
5492                    let inner = self.parse_data_type()?;
5493                    self.expect(TokenType::Gt)?;
5494                    Ok(DataType::Array(Some(Box::new(inner))))
5495                } else {
5496                    Ok(DataType::Array(None))
5497                }
5498            }
5499            TokenType::Struct => {
5500                self.advance();
5501                // STRUCT<a INT, b STRING> (Hive/Spark) or STRUCT(a INT, b INT) (DuckDB).
5502                // Swallow the body — we don't model named struct fields in the AST.
5503                let close = if self.match_token(TokenType::Lt) {
5504                    Some(TokenType::Gt)
5505                } else if self.match_token(TokenType::LParen) {
5506                    Some(TokenType::RParen)
5507                } else {
5508                    None
5509                };
5510                if let Some(close_tok) = close {
5511                    let mut depth = 1_i32;
5512                    while depth > 0 {
5513                        if self.peek_type() == &TokenType::Eof {
5514                            break;
5515                        }
5516                        if self.peek_type() == &close_tok {
5517                            depth -= 1;
5518                            if depth == 0 {
5519                                self.advance();
5520                                break;
5521                            }
5522                        } else if matches!(
5523                            self.peek_type(),
5524                            TokenType::Lt | TokenType::LParen
5525                        ) && (self.peek_type() == &TokenType::Lt
5526                            && close_tok == TokenType::Gt
5527                            || self.peek_type() == &TokenType::LParen
5528                                && close_tok == TokenType::RParen)
5529                        {
5530                            depth += 1;
5531                        }
5532                        self.advance();
5533                    }
5534                }
5535                Ok(DataType::Unknown("STRUCT".to_string()))
5536            }
5537            TokenType::Map => {
5538                self.advance();
5539                let close = if self.match_token(TokenType::Lt) {
5540                    Some(TokenType::Gt)
5541                } else if self.match_token(TokenType::LParen) {
5542                    Some(TokenType::RParen)
5543                } else {
5544                    None
5545                };
5546                if let Some(close_tok) = close {
5547                    let mut depth = 1_i32;
5548                    while depth > 0 {
5549                        if self.peek_type() == &TokenType::Eof {
5550                            break;
5551                        }
5552                        if self.peek_type() == &close_tok {
5553                            depth -= 1;
5554                            if depth == 0 {
5555                                self.advance();
5556                                break;
5557                            }
5558                        } else if (self.peek_type() == &TokenType::Lt
5559                            && close_tok == TokenType::Gt)
5560                            || (self.peek_type() == &TokenType::LParen
5561                                && close_tok == TokenType::RParen)
5562                        {
5563                            depth += 1;
5564                        }
5565                        self.advance();
5566                    }
5567                }
5568                Ok(DataType::Unknown("MAP".to_string()))
5569            }
5570            TokenType::Identifier => {
5571                let name = token.value.to_uppercase();
5572                self.advance();
5573                match name.as_str() {
5574                    "STRING" => Ok(DataType::String),
5575                    "BINARY" => {
5576                        let len = self.parse_single_type_param()?;
5577                        Ok(DataType::Binary(len))
5578                    }
5579                    "VARBINARY" => {
5580                        let len = self.parse_single_type_param()?;
5581                        Ok(DataType::Varbinary(len))
5582                    }
5583                    "DATETIME" => Ok(DataType::DateTime),
5584                    "BYTES" => Ok(DataType::Bytes),
5585                    "VARIANT" => Ok(DataType::Variant),
5586                    "OBJECT" => Ok(DataType::Object),
5587                    "XML" => Ok(DataType::Xml),
5588                    "INET" => Ok(DataType::Inet),
5589                    "CIDR" => Ok(DataType::Cidr),
5590                    "MACADDR" => Ok(DataType::Macaddr),
5591                    "BIT" => {
5592                        // Postgres `BIT VARYING(n)` is the same as VARBIT.
5593                        // Swallow the VARYING keyword if present and parse
5594                        // the length normally.
5595                        if self.is_name_token()
5596                            && self.peek().value.eq_ignore_ascii_case("VARYING")
5597                        {
5598                            self.advance();
5599                            let len = self.parse_single_type_param()?;
5600                            return Ok(DataType::Varbinary(len));
5601                        }
5602                        let len = self.parse_single_type_param()?;
5603                        Ok(DataType::Bit(len))
5604                    }
5605                    "MONEY" => Ok(DataType::Money),
5606                    "SERIAL" => Ok(DataType::Serial),
5607                    "BIGSERIAL" => Ok(DataType::BigSerial),
5608                    "SMALLSERIAL" => Ok(DataType::SmallSerial),
5609                    "REGCLASS" => Ok(DataType::Regclass),
5610                    "REGTYPE" => Ok(DataType::Regtype),
5611                    "HSTORE" => Ok(DataType::Hstore),
5612                    "GEOGRAPHY" => Ok(DataType::Geography),
5613                    "GEOMETRY" => Ok(DataType::Geometry),
5614                    "SUPER" => Ok(DataType::Super),
5615                    _ => Ok(DataType::Unknown(name)),
5616                }
5617            }
5618            _ => {
5619                // Fallback: accept any keyword-like token as an unknown
5620                // data type by its textual value. Covers PostgreSQL `cube`,
5621                // `lseg`, `path`, `polygon`, and any vendor-specific type
5622                // name that happens to collide with a TokenType variant.
5623                let v = token.value.clone();
5624                if !v.is_empty()
5625                    && v.chars()
5626                        .all(|c| c.is_ascii_alphanumeric() || c == '_')
5627                {
5628                    self.advance();
5629                    Ok(DataType::Unknown(v.to_uppercase()))
5630                } else {
5631                    Err(SqlglotError::ParserError {
5632                        message: format!("Expected data type, got {:?}", token.token_type),
5633                    })
5634                }
5635            }
5636        };
5637
5638        // PostgreSQL opt_array_bounds: typename[], typename[N], typename[][]...
5639        let mut dt = type_result?;
5640        while self.match_token(TokenType::LBracket) {
5641            // Consume optional integer bound (PostgreSQL ignores it but accepts it)
5642            let _ = self.match_token(TokenType::Number);
5643            self.expect(TokenType::RBracket)?;
5644            dt = DataType::Array(Some(Box::new(dt)));
5645        }
5646        // ClickHouse parameterized types: `DateTime('Asia/Dubai')`,
5647        // `Nullable(String)`, `Array(Int32)`, `Enum8('a' = 1, 'b' = 2)`,
5648        // `Decimal(9, 2)`, etc. The base type was already produced — swallow
5649        // the parenthesized parameter list so the surrounding expression
5650        // continues to parse.
5651        if self.peek_type() == &TokenType::LParen {
5652            let saved = self.pos;
5653            self.advance();
5654            let mut depth = 1;
5655            let mut ok = true;
5656            while depth > 0 {
5657                match self.peek_type() {
5658                    TokenType::LParen => depth += 1,
5659                    TokenType::RParen => {
5660                        depth -= 1;
5661                        if depth == 0 {
5662                            self.advance();
5663                            break;
5664                        }
5665                    }
5666                    TokenType::Eof => {
5667                        ok = false;
5668                        break;
5669                    }
5670                    _ => {}
5671                }
5672                self.advance();
5673            }
5674            if !ok {
5675                self.pos = saved;
5676            }
5677        }
5678        Ok(dt)
5679    }
5680
5681    fn parse_type_params(&mut self) -> Result<(Option<u32>, Option<u32>)> {
5682        if self.match_token(TokenType::LParen) {
5683            let p: Option<u32> = self.expect(TokenType::Number)?.value.parse().ok();
5684            let s = if self.match_token(TokenType::Comma) {
5685                self.expect(TokenType::Number)?.value.parse().ok()
5686            } else {
5687                None
5688            };
5689            self.expect(TokenType::RParen)?;
5690            Ok((p, s))
5691        } else {
5692            Ok((None, None))
5693        }
5694    }
5695
5696    fn parse_single_type_param(&mut self) -> Result<Option<u32>> {
5697        if self.match_token(TokenType::LParen) {
5698            // Handle TSQL MAX keyword (e.g. VARBINARY(MAX), VARCHAR(MAX))
5699            if self.check_keyword("MAX") {
5700                self.advance(); // consume MAX
5701                self.expect(TokenType::RParen)?;
5702                return Ok(None);
5703            }
5704            let n: Option<u32> = self.expect(TokenType::Number)?.value.parse().ok();
5705            self.expect(TokenType::RParen)?;
5706            Ok(n)
5707        } else {
5708            Ok(None)
5709        }
5710    }
5711
5712    // ── DROP ────────────────────────────────────────────────────────
5713
5714    fn parse_drop(&mut self) -> Result<Statement> {
5715        self.expect(TokenType::Drop)?;
5716
5717        if self.match_token(TokenType::Materialized) {
5718            self.expect(TokenType::View)?;
5719            let if_exists = if self.match_token(TokenType::If) {
5720                self.expect(TokenType::Exists)?;
5721                true
5722            } else {
5723                false
5724            };
5725            let name = self.parse_table_ref()?;
5726            // MySQL/MariaDB allow comma-list — swallow the rest.
5727            while self.match_token(TokenType::Comma) {
5728                let _ = self.parse_table_ref()?;
5729            }
5730            // Trailing CASCADE / RESTRICT.
5731            let _ = self.match_token(TokenType::Cascade)
5732                || self.match_token(TokenType::Restrict);
5733            return Ok(Statement::DropView(DropViewStatement {
5734                comments: vec![],
5735                name,
5736                if_exists,
5737                materialized: true,
5738            }));
5739        }
5740
5741        if self.match_token(TokenType::View) {
5742            let if_exists = if self.match_token(TokenType::If) {
5743                self.expect(TokenType::Exists)?;
5744                true
5745            } else {
5746                false
5747            };
5748            let name = self.parse_table_ref()?;
5749            while self.match_token(TokenType::Comma) {
5750                let _ = self.parse_table_ref()?;
5751            }
5752            let _ = self.match_token(TokenType::Cascade)
5753                || self.match_token(TokenType::Restrict);
5754            return Ok(Statement::DropView(DropViewStatement {
5755                comments: vec![],
5756                name,
5757                if_exists,
5758                materialized: false,
5759            }));
5760        }
5761
5762        // DROP <kind> ... — preserve as a Command for non-TABLE/VIEW drops
5763        // (FUNCTION, PROCEDURE, SCHEMA, DATABASE, INDEX, ROLE, USER, …).
5764        if self.peek_type() != &TokenType::Table {
5765            // Already consumed DROP; capture the remainder.
5766            let body = self.consume_raw_to_statement_end();
5767            return Ok(Statement::Command(CommandStatement {
5768                comments: vec![],
5769                kind: "DROP".to_string(),
5770                body,
5771            }));
5772        }
5773
5774        self.expect(TokenType::Table)?;
5775
5776        let if_exists = if self.match_token(TokenType::If) {
5777            self.expect(TokenType::Exists)?;
5778            true
5779        } else {
5780            false
5781        };
5782
5783        let table = self.parse_table_ref()?;
5784        // MySQL / MariaDB: `DROP TABLE [IF EXISTS] t1, t2, …`. Swallow the
5785        // extra table names so the statement parses.
5786        while self.match_token(TokenType::Comma) {
5787            let _ = self.parse_table_ref()?;
5788        }
5789        let cascade = self.match_token(TokenType::Cascade);
5790        // Tolerate Doris / StarRocks / Oracle trailing modifiers on DROP TABLE
5791        // (`FORCE`, `PURGE`, `RESTRICT`).
5792        while !matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon) {
5793            if self.is_name_token()
5794                && matches!(
5795                    self.peek().value.to_uppercase().as_str(),
5796                    "FORCE" | "PURGE" | "RESTRICT"
5797                )
5798            {
5799                self.advance();
5800            } else if matches!(self.peek_type(), TokenType::Restrict) {
5801                self.advance();
5802            } else {
5803                break;
5804            }
5805        }
5806
5807        Ok(Statement::DropTable(DropTableStatement {
5808            comments: vec![],
5809            if_exists,
5810            table,
5811            cascade,
5812        }))
5813    }
5814
5815    // ── ALTER TABLE ─────────────────────────────────────────────────
5816
5817    fn parse_alter_table(&mut self) -> Result<AlterTableStatement> {
5818        self.expect(TokenType::Alter)?;
5819        self.expect(TokenType::Table)?;
5820        let table = self.parse_table_ref_no_alias()?;
5821
5822        let mut actions = Vec::new();
5823        loop {
5824            let action = self.parse_alter_action()?;
5825            actions.push(action);
5826            if !self.match_token(TokenType::Comma) {
5827                break;
5828            }
5829        }
5830
5831        Ok(AlterTableStatement {
5832            comments: vec![],
5833            table,
5834            actions,
5835        })
5836    }
5837
5838    fn parse_alter_action(&mut self) -> Result<AlterTableAction> {
5839        // Hive multi-partition continuation after a comma:
5840        // `ALTER TABLE t DROP PARTITION (a), PARTITION (b)`. Swallow the
5841        // bare PARTITION clause.
5842        if self.peek_type() == &TokenType::Partition {
5843            self.advance();
5844            let mut depth: i32 = 0;
5845            while !matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon)
5846                && (depth > 0 || !matches!(self.peek_type(), TokenType::Comma))
5847            {
5848                match self.peek_type() {
5849                    TokenType::LParen => depth += 1,
5850                    TokenType::RParen => depth = depth.saturating_sub(1),
5851                    _ => {}
5852                }
5853                self.advance();
5854            }
5855            return Ok(AlterTableAction::DropColumn {
5856                name: String::new(),
5857                if_exists: false,
5858            });
5859        }
5860        if self.match_keyword("ADD") {
5861            if matches!(
5862                self.peek_type(),
5863                TokenType::Constraint
5864                    | TokenType::Primary
5865                    | TokenType::Unique
5866                    | TokenType::Foreign
5867                    | TokenType::Check
5868            ) {
5869                let constraint = self.parse_table_constraint()?;
5870                self.swallow_constraint_modifiers();
5871                Ok(AlterTableAction::AddConstraint(constraint))
5872            } else if self.check_keyword("EXCLUDE") {
5873                // PG `ADD EXCLUDE [USING method] (col WITH op [, ...]) [WHERE
5874                // (predicate)] [DEFERRABLE …]` — swallow opaquely until we
5875                // hit a top-level statement boundary or comma.
5876                let mut depth: i32 = 0;
5877                while !matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon)
5878                    && (depth > 0 || !matches!(self.peek_type(), TokenType::Comma))
5879                {
5880                    match self.peek_type() {
5881                        TokenType::LParen => depth += 1,
5882                        TokenType::RParen => depth = depth.saturating_sub(1),
5883                        _ => {}
5884                    }
5885                    self.advance();
5886                }
5887                Ok(AlterTableAction::DropColumn {
5888                    name: String::new(),
5889                    if_exists: false,
5890                })
5891            } else if self.check_keyword("INDEX")
5892                || self.check_keyword("KEY")
5893                || self.check_keyword("PROJECTION")
5894                || self.check_keyword("STATISTICS")
5895            {
5896                // ClickHouse / MySQL `ADD INDEX [name] expr TYPE x GRANULARITY n
5897                // [AFTER y]`, `ADD KEY ...`, `ADD PROJECTION ...`. The body
5898                // is heterogeneous; swallow it opaquely up to the next
5899                // top-level Comma / Semicolon / EOF.
5900                let mut depth: i32 = 0;
5901                while !matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon)
5902                    && (depth > 0 || !matches!(self.peek_type(), TokenType::Comma))
5903                {
5904                    match self.peek_type() {
5905                        TokenType::LParen => depth += 1,
5906                        TokenType::RParen => depth = depth.saturating_sub(1),
5907                        _ => {}
5908                    }
5909                    self.advance();
5910                }
5911                Ok(AlterTableAction::DropColumn {
5912                    name: String::new(),
5913                    if_exists: false,
5914                })
5915            } else if self.check_keyword("COLUMNS") {
5916                // Hive / Spark / Databricks `ALTER TABLE … ADD COLUMNS
5917                // (col type [, col type]*)` or the comma-list form
5918                // `ADD COLUMNS col type, col type`. Swallow opaquely.
5919                self.advance();
5920                let mut depth: i32 = 0;
5921                while !matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon)
5922                    && (depth > 0 || !matches!(self.peek_type(), TokenType::Comma))
5923                {
5924                    match self.peek_type() {
5925                        TokenType::LParen => depth += 1,
5926                        TokenType::RParen => depth = depth.saturating_sub(1),
5927                        _ => {}
5928                    }
5929                    self.advance();
5930                    if depth == 0 && matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon)
5931                    {
5932                        break;
5933                    }
5934                }
5935                Ok(AlterTableAction::DropColumn {
5936                    name: String::new(),
5937                    if_exists: false,
5938                })
5939            } else {
5940                let _ = self.match_keyword("COLUMN");
5941                let col = self.parse_column_def()?;
5942                // ClickHouse: `ADD COLUMN name type AFTER other` / `FIRST` —
5943                // consume the placement modifier so the rest of the action
5944                // list parses.
5945                if self.check_keyword("AFTER") {
5946                    self.advance();
5947                    if self.is_name_token() {
5948                        self.advance();
5949                    }
5950                } else if self.check_keyword("FIRST") {
5951                    self.advance();
5952                }
5953                Ok(AlterTableAction::AddColumn(col))
5954            }
5955        } else if self.match_token(TokenType::Drop) {
5956            // Hive: `DROP IF EXISTS PARTITION (…), PARTITION (…)`. The
5957            // optional `IF EXISTS` precedes PARTITION.
5958            if self.peek_type() == &TokenType::If
5959                && self
5960                    .peek_offset(1)
5961                    .map(|t| matches!(t.token_type, TokenType::Exists))
5962                    .unwrap_or(false)
5963                && self
5964                    .peek_offset(2)
5965                    .map(|t| matches!(t.token_type, TokenType::Partition))
5966                    .unwrap_or(false)
5967            {
5968                self.advance(); // IF
5969                self.advance(); // EXISTS
5970            }
5971            // MySQL / TiDB: `DROP INDEX|KEY name`, `DROP PRIMARY KEY`,
5972            // `DROP FOREIGN KEY name`, `DROP CONSTRAINT name`,
5973            // `DROP PARTITION (...)`, `DROP CHECK name`. We don't have a
5974            // dedicated AST node for these, so swallow them to end-of-action.
5975            if matches!(
5976                self.peek_type(),
5977                TokenType::Index
5978                    | TokenType::Primary
5979                    | TokenType::Foreign
5980                    | TokenType::Constraint
5981                    | TokenType::Check
5982                    | TokenType::Partition
5983                    | TokenType::Unique
5984            ) || self.check_keyword("KEY")
5985                || self.check_keyword("FEATURE")
5986                || self.check_keyword("PROJECTION")
5987                || self.check_keyword("STATISTICS")
5988                || self.check_keyword("INDEX")
5989                || self.check_keyword("DISTRIBUTION")
5990            {
5991                let mut depth: i32 = 0;
5992                while !matches!(self.peek_type(), TokenType::Eof | TokenType::Semicolon)
5993                    && (depth > 0 || !matches!(self.peek_type(), TokenType::Comma))
5994                {
5995                    match self.peek_type() {
5996                        TokenType::LParen => depth += 1,
5997                        TokenType::RParen => depth = depth.saturating_sub(1),
5998                        _ => {}
5999                    }
6000                    self.advance();
6001                }
6002                return Ok(AlterTableAction::DropColumn {
6003                    name: String::new(),
6004                    if_exists: false,
6005                });
6006            }
6007            let _ = self.match_keyword("COLUMN");
6008            let if_exists = if self.match_token(TokenType::If) {
6009                self.expect(TokenType::Exists)?;
6010                true
6011            } else {
6012                false
6013            };
6014            let mut name = self.expect_name()?;
6015            // ClickHouse `DROP COLUMN nested.col` — accept dotted suffixes;
6016            // we collapse them into the column name string for now.
6017            while self.peek_type() == &TokenType::Dot {
6018                self.advance();
6019                if !self.is_name_token() {
6020                    break;
6021                }
6022                name.push('.');
6023                name.push_str(&self.peek().value);
6024                self.advance();
6025            }
6026            Ok(AlterTableAction::DropColumn { name, if_exists })
6027        } else if self.match_keyword("RENAME") {
6028            if self.match_keyword("COLUMN") {
6029                let old_name = self.expect_name()?;
6030                self.expect(TokenType::Identifier)?; // TO
6031                let new_name = self.expect_name()?;
6032                Ok(AlterTableAction::RenameColumn { old_name, new_name })
6033            } else if self.match_keyword("TO") {
6034                let mut new_name = self.expect_name()?;
6035                while self.match_token(TokenType::Dot) {
6036                    new_name.push('.');
6037                    new_name.push_str(&self.expect_name()?);
6038                }
6039                Ok(AlterTableAction::RenameTable { new_name })
6040            } else {
6041                Err(SqlglotError::ParserError {
6042                    message: "Expected COLUMN or TO after RENAME".into(),
6043                })
6044            }
6045        } else {
6046            Err(SqlglotError::ParserError {
6047                message: "Expected ADD, DROP, or RENAME in ALTER TABLE".into(),
6048            })
6049        }
6050    }
6051
6052    /// Try [`parse_alter_table`]; on failure, rewind and capture the entire
6053    /// `ALTER …` statement verbatim as a [`Statement::Command`]. This covers
6054    /// the long tail of vendor-specific ALTER forms — MySQL `ALTER TABLE …
6055    /// CONVERT TO CHARACTER SET … COLLATE …`, Hive `ALTER TABLE … PARTITION
6056    /// (…) COMPACT 'major'`, T-SQL `ALTER TABLE … WITH (…) CHECK CONSTRAINT
6057    /// …`, etc. (Gap 5)
6058    fn parse_alter_or_command(&mut self) -> Result<Statement> {
6059        let saved = self.pos;
6060        let saved_comments = self.pending_comments.clone();
6061        match self.parse_alter_table() {
6062            Ok(stmt) => Ok(Statement::AlterTable(stmt)),
6063            Err(_) => {
6064                self.pos = saved;
6065                self.pending_comments = saved_comments;
6066                self.parse_command_kind("ALTER")
6067            }
6068        }
6069    }
6070
6071    /// Try [`parse_create`]; on failure, rewind and capture the entire
6072    /// `CREATE …` statement verbatim as a [`Statement::Command`]. Also
6073    /// handles the `CREATE TABLE t AS VALUES (…)` form (Gap 7) and rarer
6074    /// `CREATE OPERATOR / AGGREGATE / SEQUENCE / FUNCTION / TEXT SEARCH
6075    /// CONFIGURATION / …` (Gap 4).
6076    fn parse_create_or_command(&mut self) -> Result<Statement> {
6077        let saved = self.pos;
6078        let saved_comments = self.pending_comments.clone();
6079        match self.parse_create() {
6080            Ok(stmt) => Ok(stmt),
6081            Err(_) => {
6082                self.pos = saved;
6083                self.pending_comments = saved_comments;
6084                self.parse_command_kind("CREATE")
6085            }
6086        }
6087    }
6088
6089    // ── TRUNCATE ────────────────────────────────────────────────────
6090
6091    fn parse_truncate(&mut self) -> Result<TruncateStatement> {
6092        self.expect(TokenType::Truncate)?;
6093        let _ = self.match_token(TokenType::Table);
6094        let table = self.parse_table_ref()?;
6095        Ok(TruncateStatement {
6096            comments: vec![],
6097            table,
6098        })
6099    }
6100
6101    // ── Transaction ─────────────────────────────────────────────────
6102
6103    fn parse_transaction(&mut self) -> Result<TransactionStatement> {
6104        match self.peek_type() {
6105            TokenType::Begin => {
6106                self.advance();
6107                let _ = self.match_token(TokenType::Transaction);
6108                let _ = self.match_keyword("WORK");
6109                Ok(TransactionStatement::Begin)
6110            }
6111            TokenType::Commit => {
6112                self.advance();
6113                let _ = self.match_token(TokenType::Transaction);
6114                let _ = self.match_keyword("WORK");
6115                // SQL-standard COMMIT [WORK] [AND [NO] CHAIN]
6116                if self.match_token(TokenType::And) {
6117                    let _ = self.match_token(TokenType::Not);
6118                    let _ = self.match_keyword("NO");
6119                    let _ = self.match_keyword("CHAIN");
6120                }
6121                Ok(TransactionStatement::Commit)
6122            }
6123            TokenType::Rollback => {
6124                self.advance();
6125                let _ = self.match_token(TokenType::Transaction);
6126                let _ = self.match_keyword("WORK");
6127                if self.match_keyword("TO") {
6128                    let _ = self.match_token(TokenType::Savepoint);
6129                    let name = self.expect_name()?;
6130                    Ok(TransactionStatement::RollbackTo(name))
6131                } else {
6132                    // ROLLBACK [WORK] [AND [NO] CHAIN]
6133                    if self.match_token(TokenType::And) {
6134                        let _ = self.match_token(TokenType::Not);
6135                        let _ = self.match_keyword("NO");
6136                        let _ = self.match_keyword("CHAIN");
6137                    }
6138                    Ok(TransactionStatement::Rollback)
6139                }
6140            }
6141            TokenType::Savepoint => {
6142                self.advance();
6143                let name = self.expect_name()?;
6144                Ok(TransactionStatement::Savepoint(name))
6145            }
6146            _ => Err(SqlglotError::ParserError {
6147                message: "Expected transaction statement".into(),
6148            }),
6149        }
6150    }
6151
6152    // ── EXPLAIN ─────────────────────────────────────────────────────
6153
6154    fn parse_explain(&mut self) -> Result<ExplainStatement> {
6155        self.expect(TokenType::Explain)?;
6156        let analyze = self.match_token(TokenType::Analyze);
6157        // PostgreSQL `EXPLAIN (VERBOSE, COSTS OFF, ...)` option block, plus
6158        // unparenthesized `VERBOSE` / `FORMAT TEXT|JSON|YAML`.
6159        if self.match_token(TokenType::LParen) {
6160            let mut depth = 1;
6161            while depth > 0 {
6162                match self.peek_type() {
6163                    TokenType::Eof => break,
6164                    TokenType::LParen => depth += 1,
6165                    TokenType::RParen => {
6166                        depth -= 1;
6167                        if depth == 0 {
6168                            self.advance();
6169                            break;
6170                        }
6171                    }
6172                    _ => {}
6173                }
6174                self.advance();
6175            }
6176        } else {
6177            // Optional bare keywords: VERBOSE / FORMAT [=] <name|string>
6178            loop {
6179                if self.check_keyword("VERBOSE") {
6180                    self.advance();
6181                    continue;
6182                }
6183                if self.check_keyword("FORMAT") {
6184                    self.advance();
6185                    let _ = self.match_token(TokenType::Eq);
6186                    // Format name can be an identifier (TEXT/JSON/YAML/XML/...)
6187                    // or a string literal (`'plan_tree'`).
6188                    if matches!(
6189                        self.peek_type(),
6190                        TokenType::String | TokenType::Identifier
6191                    ) || self.is_name_token()
6192                    {
6193                        self.advance();
6194                    }
6195                    continue;
6196                }
6197                break;
6198            }
6199            // Hive / Spark EXPLAIN modifiers: EXTENDED, LOCKS, AUTHORIZATION,
6200            // DEPENDENCY, VECTORIZATION [ONLY] [SUMMARY|OPERATOR|EXPRESSION|DETAIL],
6201            // CBO, AST, REWRITE, FORMATTED, LOGICAL, NODE. Also ClickHouse
6202            // `EXPLAIN indexes=1 actions=1 …` bare options. Consume any
6203            // identifier-like tokens (and optional `= value`) until we hit a
6204            // statement-starting keyword.
6205            loop {
6206                match self.peek_type() {
6207                    TokenType::Select
6208                    | TokenType::With
6209                    | TokenType::Insert
6210                    | TokenType::Update
6211                    | TokenType::Delete
6212                    | TokenType::Merge
6213                    | TokenType::Create
6214                    | TokenType::Drop
6215                    | TokenType::Alter
6216                    | TokenType::Truncate
6217                    | TokenType::LParen
6218                    | TokenType::Eof
6219                    | TokenType::Semicolon => break,
6220                    TokenType::Identifier => {
6221                        self.advance();
6222                        if self.match_token(TokenType::Eq) {
6223                            // value: number, string, or identifier
6224                            if matches!(
6225                                self.peek_type(),
6226                                TokenType::Number | TokenType::String
6227                            ) || self.is_name_token()
6228                            {
6229                                self.advance();
6230                            }
6231                        }
6232                        // Optional comma between options
6233                        // (ClickHouse `dump_tree = 1, dump_ast = 1 …`).
6234                        let _ = self.match_token(TokenType::Comma);
6235                    }
6236                    _ => {
6237                        // Also accept unreserved keyword-style modifiers
6238                        // (ONLY, FORMATTED, EXTENDED, etc. that tokenize as
6239                        // their own variants). Bail when we hit anything
6240                        // that isn't a plain name token.
6241                        if self.is_name_token() {
6242                            self.advance();
6243                        } else {
6244                            break;
6245                        }
6246                    }
6247                }
6248            }
6249        }
6250        let statement = self.parse_statement_inner()?;
6251        Ok(ExplainStatement {
6252            comments: vec![],
6253            analyze,
6254            statement: Box::new(statement),
6255        })
6256    }
6257
6258    // ── USE ─────────────────────────────────────────────────────────
6259
6260    fn parse_use(&mut self) -> Result<UseStatement> {
6261        self.expect(TokenType::Use)?;
6262        // Optional kind: USE DATABASE / SCHEMA / CATALOG / WAREHOUSE / ROLE
6263        // (DuckDB / Snowflake / Spark). Swallow the leading keyword.
6264        let _ = matches!(
6265            self.peek_type(),
6266            TokenType::Database | TokenType::Schema
6267        ) && {
6268            self.advance();
6269            true
6270        } || (self.is_name_token()
6271            && matches!(
6272                self.peek().value.to_uppercase().as_str(),
6273                "CATALOG" | "WAREHOUSE" | "ROLE"
6274            )
6275            && {
6276                self.advance();
6277                true
6278            });
6279        // `USE default` (Hive): `default` is a keyword, accept it as a name.
6280        let mut name = if matches!(self.peek_type(), TokenType::Default) {
6281            let v = self.peek().value.clone();
6282            self.advance();
6283            v
6284        } else if self.is_name_token()
6285            && self.peek().value.eq_ignore_ascii_case("IDENTIFIER")
6286            && matches!(
6287                self.peek_offset(1).map(|t| &t.token_type),
6288                Some(TokenType::LParen)
6289            )
6290        {
6291            // Snowflake / Databricks IDENTIFIER('name') indirection —
6292            // swallow the call and use a synthetic name.
6293            self.advance(); // IDENTIFIER
6294            self.advance(); // (
6295            let mut depth: i32 = 1;
6296            while depth > 0 {
6297                match self.peek_type() {
6298                    TokenType::LParen => {
6299                        depth += 1;
6300                        self.advance();
6301                    }
6302                    TokenType::RParen => {
6303                        depth -= 1;
6304                        self.advance();
6305                    }
6306                    TokenType::Eof => break,
6307                    _ => {
6308                        self.advance();
6309                    }
6310                }
6311            }
6312            "IDENTIFIER".to_string()
6313        } else {
6314            self.expect_name()?
6315        };
6316        while self.match_token(TokenType::Dot) {
6317            name.push('.');
6318            if matches!(self.peek_type(), TokenType::Default) {
6319                name.push_str(&self.peek().value);
6320                self.advance();
6321            } else {
6322                name.push_str(&self.expect_name()?);
6323            }
6324        }
6325        Ok(UseStatement {
6326            comments: vec![],
6327            name,
6328        })
6329    }
6330
6331    // ══════════════════════════════════════════════════════════════
6332    // Expression parsing (precedence climbing)
6333    // ══════════════════════════════════════════════════════════════
6334
6335    fn parse_expr(&mut self) -> Result<Expr> {
6336        // DuckDB lambda: `lambda x: body` or `lambda x, y: body`. Lower to a
6337        // `Function("lambda", [name(s), body])` placeholder so the call parses.
6338        if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("lambda") {
6339            let saved = self.pos;
6340            self.advance();
6341            let mut names: Vec<Expr> = Vec::new();
6342            let mut ok = self.is_name_token();
6343            while ok {
6344                let n = self.advance().clone();
6345                names.push(Expr::Column {
6346                    table: None,
6347                    name: n.value.clone(),
6348                    table_quote_style: QuoteStyle::None,
6349                    quote_style: QuoteStyle::None,
6350                });
6351                if !self.match_token(TokenType::Comma) {
6352                    break;
6353                }
6354                if !self.is_name_token() {
6355                    ok = false;
6356                    break;
6357                }
6358            }
6359            if ok && self.match_token(TokenType::Colon) {
6360                let body = self.parse_expr()?;
6361                let mut args = names;
6362                args.push(body);
6363                return Ok(Expr::Function {
6364                    name: "lambda".to_string(),
6365                    args,
6366                    distinct: false,
6367                    filter: None,
6368                    over: None,
6369                    order_by: Vec::new(),
6370                    within_group: false,
6371                });
6372            }
6373            self.pos = saved;
6374        }
6375        // DuckDB / PostgreSQL named-argument prefix `name := value` and
6376        // BigQuery `name => value` — discard the name so the surrounding
6377        // function call parses. Only triggered when the lookahead clearly
6378        // matches the named-arg shape.
6379        if self.is_name_token() {
6380            let next = self.peek_offset(1).map(|t| &t.token_type);
6381            let after = self.peek_offset(2).map(|t| &t.token_type);
6382            if matches!(next, Some(TokenType::Colon)) && matches!(after, Some(TokenType::Eq)) {
6383                self.advance();
6384                self.advance();
6385                self.advance();
6386            } else if matches!(next, Some(TokenType::DoubleArrow)) {
6387                self.advance();
6388                self.advance();
6389            } else if matches!(next, Some(TokenType::Eq))
6390                && matches!(after, Some(TokenType::Gt))
6391            {
6392                // `name => value` tokenized as `Eq Gt` (no DoubleArrow merge).
6393                self.advance();
6394                self.advance();
6395                self.advance();
6396            }
6397        }
6398        let cond = self.parse_or_expr()?;
6399        // MySQL session-variable assignment in expression position:
6400        // `@var := expr`. Tokenized as `Colon Eq`. Lower to `BinaryOp Eq`
6401        // so the surrounding query parses.
6402        if matches!(self.peek_type(), TokenType::Colon)
6403            && matches!(self.peek_offset(1).map(|t| &t.token_type), Some(TokenType::Eq))
6404        {
6405            self.advance();
6406            self.advance();
6407            let rhs = self.parse_expr()?;
6408            return Ok(Expr::BinaryOp {
6409                left: Box::new(cond),
6410                op: BinaryOperator::Eq,
6411                right: Box::new(rhs),
6412            });
6413        }
6414        // ClickHouse C-style ternary: `cond ? then : else`. Tokenized as
6415        // `Parameter('?')` followed later by `Colon`. Lower to a CASE.
6416        if matches!(self.peek_type(), TokenType::Parameter) && self.peek().value == "?" {
6417            self.advance();
6418            let then_branch = self.parse_or_expr()?;
6419            if self.match_token(TokenType::Colon) {
6420                let else_branch = self.parse_expr()?;
6421                return Ok(Expr::Case {
6422                    operand: None,
6423                    when_clauses: vec![(cond, then_branch)],
6424                    else_clause: Some(Box::new(else_branch)),
6425                });
6426            }
6427        }
6428        Ok(cond)
6429    }
6430
6431    fn parse_or_expr(&mut self) -> Result<Expr> {
6432        let mut left = self.parse_and_expr()?;
6433        while self.match_token(TokenType::Or) {
6434            let right = self.parse_and_expr()?;
6435            left = Expr::BinaryOp {
6436                left: Box::new(left),
6437                op: BinaryOperator::Or,
6438                right: Box::new(right),
6439            };
6440        }
6441        Ok(left)
6442    }
6443
6444    fn parse_and_expr(&mut self) -> Result<Expr> {
6445        let mut left = self.parse_not_expr()?;
6446        while self.match_token(TokenType::And) {
6447            let right = self.parse_not_expr()?;
6448            left = Expr::BinaryOp {
6449                left: Box::new(left),
6450                op: BinaryOperator::And,
6451                right: Box::new(right),
6452            };
6453        }
6454        Ok(left)
6455    }
6456
6457    fn parse_not_expr(&mut self) -> Result<Expr> {
6458        if self.match_token(TokenType::Not) {
6459            let expr = self.parse_not_expr()?;
6460            Ok(Expr::UnaryOp {
6461                op: UnaryOperator::Not,
6462                expr: Box::new(expr),
6463            })
6464        } else {
6465            self.parse_comparison()
6466        }
6467    }
6468
6469    fn parse_comparison(&mut self) -> Result<Expr> {
6470        let mut left = self.parse_addition()?;
6471
6472        loop {
6473            // ClickHouse distributed predicates: `expr GLOBAL [NOT] IN (...)`
6474            // and `expr GLOBAL JOIN ...`. The keyword tokenizes as a plain
6475            // identifier — swallow it so the following predicate parses.
6476            if self.check_keyword("GLOBAL") {
6477                let next = self.peek_offset(1).map(|t| &t.token_type);
6478                if matches!(next, Some(TokenType::In) | Some(TokenType::Not)) {
6479                    self.advance();
6480                }
6481            }
6482            // ANSI / Postgres `period1 OVERLAPS period2` — model as Eq for
6483            // acceptance purposes.
6484            if self.check_keyword("OVERLAPS") {
6485                self.advance();
6486                let right = self.parse_addition()?;
6487                left = Expr::BinaryOp {
6488                    left: Box::new(left),
6489                    op: BinaryOperator::Eq,
6490                    right: Box::new(right),
6491                };
6492                continue;
6493            }
6494            // MySQL JSON `value MEMBER OF (json_array_expr)` — model as Eq.
6495            if self.check_keyword("MEMBER")
6496                && self
6497                    .peek_offset(1)
6498                    .map(|t| t.value.eq_ignore_ascii_case("OF"))
6499                    .unwrap_or(false)
6500            {
6501                self.advance();
6502                self.advance();
6503                let right = self.parse_addition()?;
6504                left = Expr::BinaryOp {
6505                    left: Box::new(left),
6506                    op: BinaryOperator::Eq,
6507                    right: Box::new(right),
6508                };
6509                continue;
6510            }
6511            // PostgreSQL geometric and full-text operators that tokenize as
6512            // multi-character sequences our tokenizer doesn't fuse:
6513            //   `<->`  (distance)         tokens: Lt, Arrow
6514            //   `&&` `&<` `&>`            (array / range overlap)
6515            //   `@@`                      (text search match)
6516            //   `|>` `<|`                 (range left/right of)
6517            // Lower all of them to a generic Eq so the surrounding
6518            // expression parses; the bench only cares about acceptance.
6519            {
6520                let p0 = self.peek_type().clone();
6521                let p1 = self.peek_offset(1).map(|t| t.token_type.clone());
6522                let p2 = self.peek_offset(2).map(|t| t.token_type.clone());
6523                let p1v = self.peek_offset(1).map(|t| t.value.clone()).unwrap_or_default();
6524                let consume_count = match (&p0, &p1, &p2) {
6525                    // <-> distance
6526                    (TokenType::Lt, Some(TokenType::Arrow), _) => 2,
6527                    // && overlap
6528                    (TokenType::BitwiseAnd, Some(TokenType::BitwiseAnd), _) => 2,
6529                    // &<| / &>| geometric variants
6530                    (TokenType::BitwiseAnd, Some(TokenType::Lt), Some(TokenType::BitwiseOr))
6531                    | (TokenType::BitwiseAnd, Some(TokenType::Gt), Some(TokenType::BitwiseOr)) => 3,
6532                    // &< / &>
6533                    (TokenType::BitwiseAnd, Some(TokenType::Lt), _)
6534                    | (TokenType::BitwiseAnd, Some(TokenType::Gt), _) => 2,
6535                    // @@ and @?
6536                    (TokenType::AtSign, Some(TokenType::AtSign), _) => 2,
6537                    // |> and <|
6538                    (TokenType::BitwiseOr, Some(TokenType::Gt), _)
6539                    | (TokenType::Lt, Some(TokenType::BitwiseOr), _) => 2,
6540                    // <<| / >>|
6541                    (TokenType::ShiftLeft, Some(TokenType::BitwiseOr), _)
6542                    | (TokenType::ShiftRight, Some(TokenType::BitwiseOr), _) => 2,
6543                    // ^@ starts_with operator
6544                    (TokenType::BitwiseXor, Some(TokenType::AtSign), _) => 2,
6545                    _ if matches!(p0, TokenType::AtSign)
6546                        && matches!(p1, Some(TokenType::Parameter))
6547                        && p1v == "?" =>
6548                    {
6549                        2
6550                    }
6551                    _ => 0,
6552                };
6553                if consume_count > 0 {
6554                    for _ in 0..consume_count {
6555                        self.advance();
6556                    }
6557                    let right = self.parse_addition()?;
6558                    left = Expr::BinaryOp {
6559                        left: Box::new(left),
6560                        op: BinaryOperator::Eq,
6561                        right: Box::new(right),
6562                    };
6563                    continue;
6564                }
6565            }
6566            let op = match self.peek_type() {
6567                TokenType::Eq => Some(BinaryOperator::Eq),
6568                TokenType::Neq => Some(BinaryOperator::Neq),
6569                TokenType::Lt => Some(BinaryOperator::Lt),
6570                TokenType::Gt => Some(BinaryOperator::Gt),
6571                TokenType::LtEq => {
6572                    // Hive / MySQL `<=>` null-safe equality tokenizes as `Lte Gt`.
6573                    if matches!(self.peek_offset(1).map(|t| &t.token_type), Some(TokenType::Gt)) {
6574                        self.advance();
6575                        self.advance();
6576                        let right = self.parse_addition()?;
6577                        left = Expr::BinaryOp {
6578                            left: Box::new(left),
6579                            op: BinaryOperator::Eq,
6580                            right: Box::new(right),
6581                        };
6582                        continue;
6583                    }
6584                    Some(BinaryOperator::LtEq)
6585                }
6586                TokenType::GtEq => Some(BinaryOperator::GtEq),
6587                TokenType::AtArrow => Some(BinaryOperator::AtArrow),
6588                TokenType::ArrowAt => Some(BinaryOperator::ArrowAt),
6589                // PostgreSQL geometric / regex operators starting with `~`:
6590                //   ~=, ~<, ~>, ~<=, ~>=, ~~, ~~*, !~, !~*. We lower all of
6591                //   them to a generic Eq comparison so the surrounding
6592                //   expression parses; the bench only cares about acceptance.
6593                TokenType::BitwiseNot => {
6594                    self.advance();
6595                    // Optional follow-up: =, <, >, <=, >=, ~, ~*, *.
6596                    let _ = match self.peek_type() {
6597                        TokenType::Eq
6598                        | TokenType::Lt
6599                        | TokenType::Gt
6600                        | TokenType::LtEq
6601                        | TokenType::GtEq
6602                        | TokenType::Star
6603                        | TokenType::BitwiseNot => {
6604                            self.advance();
6605                            // Allow `~~*` (LIKE-like, case-insensitive).
6606                            if self.peek_type() == &TokenType::Star {
6607                                self.advance();
6608                            }
6609                            true
6610                        }
6611                        _ => false,
6612                    };
6613                    let right = self.parse_addition()?;
6614                    left = Expr::BinaryOp {
6615                        left: Box::new(left),
6616                        op: BinaryOperator::Eq,
6617                        right: Box::new(right),
6618                    };
6619                    continue;
6620                }
6621                _ => None,
6622            };
6623
6624            if let Some(op) = op {
6625                self.advance();
6626                // ClickHouse / SQLite accept `==` as a synonym for `=`.
6627                if matches!(op, BinaryOperator::Eq) && self.peek_type() == &TokenType::Eq {
6628                    self.advance();
6629                }
6630                if matches!(self.peek_type(), TokenType::Any | TokenType::Some) {
6631                    self.advance();
6632                    self.expect(TokenType::LParen)?;
6633                    let right = if matches!(self.peek_type(), TokenType::Select | TokenType::With) {
6634                        Expr::Subquery(Box::new(self.parse_statement_inner()?))
6635                    } else {
6636                        self.parse_expr()?
6637                    };
6638                    self.expect(TokenType::RParen)?;
6639                    left = Expr::AnyOp {
6640                        expr: Box::new(left),
6641                        op,
6642                        right: Box::new(right),
6643                    };
6644                } else if self.peek_type() == &TokenType::All {
6645                    self.advance();
6646                    self.expect(TokenType::LParen)?;
6647                    let right = if matches!(self.peek_type(), TokenType::Select | TokenType::With) {
6648                        Expr::Subquery(Box::new(self.parse_statement_inner()?))
6649                    } else {
6650                        self.parse_expr()?
6651                    };
6652                    self.expect(TokenType::RParen)?;
6653                    left = Expr::AllOp {
6654                        expr: Box::new(left),
6655                        op,
6656                        right: Box::new(right),
6657                    };
6658                } else {
6659                    let right = self.parse_addition()?;
6660                    left = Expr::BinaryOp {
6661                        left: Box::new(left),
6662                        op,
6663                        right: Box::new(right),
6664                    };
6665                }
6666            } else if self.peek_type() == &TokenType::Is {
6667                self.advance();
6668                let negated = self.match_token(TokenType::Not);
6669                if self.match_token(TokenType::True) {
6670                    left = Expr::IsBool {
6671                        expr: Box::new(left),
6672                        value: true,
6673                        negated,
6674                    };
6675                } else if self.match_token(TokenType::False) {
6676                    left = Expr::IsBool {
6677                        expr: Box::new(left),
6678                        value: false,
6679                        negated,
6680                    };
6681                } else if self.match_token(TokenType::Distinct) {
6682                    // SQL-standard `IS [NOT] DISTINCT FROM y` — null-safe
6683                    // comparison. We lower it to `(x <> y OR (x IS NULL) <>
6684                    // (y IS NULL))` for `DISTINCT FROM` (negated == false) and
6685                    // its inverse for `NOT DISTINCT FROM`. To keep the AST
6686                    // simple, model both as a binary inequality / equality
6687                    // wrapped in BinaryOp so the surrounding query parses.
6688                    self.expect(TokenType::From)?;
6689                    let right = self.parse_addition()?;
6690                    let op = if negated {
6691                        BinaryOperator::Eq
6692                    } else {
6693                        BinaryOperator::Neq
6694                    };
6695                    left = Expr::BinaryOp {
6696                        left: Box::new(left),
6697                        op,
6698                        right: Box::new(right),
6699                    };
6700                } else if matches!(self.peek_type(), TokenType::Json | TokenType::Jsonb)
6701                    || self
6702                        .peek()
6703                        .value
6704                        .eq_ignore_ascii_case("DOCUMENT")
6705                    || self.peek().value.eq_ignore_ascii_case("UNKNOWN")
6706                {
6707                    // PG / Db2 / SQL:2016 `expr IS [NOT] JSON [VALUE|ARRAY|
6708                    // OBJECT|SCALAR] [WITH|WITHOUT UNIQUE [KEYS]]`,
6709                    // `IS [NOT] DOCUMENT`, `IS [NOT] UNKNOWN`. We don't model
6710                    // these — fold to IsNull as a placeholder so the surrounding
6711                    // expression parses.
6712                    self.advance();
6713                    // Optional JSON kind keyword.
6714                    if matches!(
6715                        self.peek().value.to_uppercase().as_str(),
6716                        "VALUE" | "ARRAY" | "OBJECT" | "SCALAR"
6717                    ) && self.is_name_token()
6718                    {
6719                        self.advance();
6720                    }
6721                    // Optional `WITH|WITHOUT UNIQUE [KEYS]`.
6722                    if matches!(
6723                        self.peek().value.to_uppercase().as_str(),
6724                        "WITH" | "WITHOUT"
6725                    ) && self.is_name_token()
6726                    {
6727                        self.advance();
6728                        if self.peek().value.eq_ignore_ascii_case("UNIQUE") {
6729                            self.advance();
6730                            if self.peek().value.eq_ignore_ascii_case("KEYS") {
6731                                self.advance();
6732                            }
6733                        }
6734                    }
6735                    left = Expr::IsNull {
6736                        expr: Box::new(left),
6737                        negated,
6738                    };
6739                } else {
6740                    self.expect(TokenType::Null)?;
6741                    left = Expr::IsNull {
6742                        expr: Box::new(left),
6743                        negated,
6744                    };
6745                }
6746            } else if matches!(
6747                self.peek_type(),
6748                TokenType::Not
6749                    | TokenType::In
6750                    | TokenType::Like
6751                    | TokenType::ILike
6752                    | TokenType::Between
6753            ) {
6754                // Peek ahead: if NOT, only consume it if followed by IN/LIKE/ILIKE/BETWEEN
6755                if self.peek_type() == &TokenType::Not {
6756                    let saved_pos = self.pos;
6757                    self.advance(); // consume NOT
6758                    if !matches!(
6759                        self.peek_type(),
6760                        TokenType::In | TokenType::Like | TokenType::ILike | TokenType::Between
6761                    ) {
6762                        // NOT is not part of a comparison predicate — restore position
6763                        self.pos = saved_pos;
6764                        break;
6765                    }
6766                    // NOT was consumed, negated = true
6767                }
6768                let negated =
6769                    self.pos > 0 && self.tokens[self.pos - 1].token_type == TokenType::Not;
6770
6771                if self.match_token(TokenType::In) {
6772                    // ClickHouse: `x IN [1, 2, 3]` — array literal directly
6773                    // after IN. Parse the array as the RHS and model as a
6774                    // single-element InList so downstream code emits IN (…).
6775                    if matches!(self.peek_type(), TokenType::LBracket) {
6776                        let rhs = self.parse_primary()?;
6777                        left = Expr::InList {
6778                            expr: Box::new(left),
6779                            list: vec![rhs],
6780                            negated,
6781                        };
6782                        continue;
6783                    }
6784                    // ClickHouse: `x IN funcCall(...)` / `x IN tableName` —
6785                    // bare function call or identifier as RHS. Parse a
6786                    // single primary expression and wrap as InList.
6787                    if !matches!(self.peek_type(), TokenType::LParen) {
6788                        let rhs = self.parse_primary()?;
6789                        left = Expr::InList {
6790                            expr: Box::new(left),
6791                            list: vec![rhs],
6792                            negated,
6793                        };
6794                        continue;
6795                    }
6796                    self.expect(TokenType::LParen)?;
6797                    // Check for subquery
6798                    if matches!(self.peek_type(), TokenType::Select | TokenType::With) {
6799                        let subquery = self.parse_statement_inner()?;
6800                        // ClickHouse accepts `IN ((SELECT ...) AS alias)`.
6801                        if self.match_token(TokenType::As) && self.is_name_token() {
6802                            self.advance();
6803                        } else if self.is_name_token() {
6804                            // also tolerate alias without AS
6805                            self.advance();
6806                        }
6807                        self.expect(TokenType::RParen)?;
6808                        left = Expr::InSubquery {
6809                            expr: Box::new(left),
6810                            subquery: Box::new(subquery),
6811                            negated,
6812                        };
6813                    } else {
6814                        let list = self.parse_expr_list()?;
6815                        self.expect(TokenType::RParen)?;
6816                        left = Expr::InList {
6817                            expr: Box::new(left),
6818                            list,
6819                            negated,
6820                        };
6821                    }
6822                } else if self.match_token(TokenType::Like) {
6823                    let pattern = self.parse_addition()?;
6824                    let escape = if self.match_token(TokenType::Escape) {
6825                        Some(Box::new(self.parse_primary()?))
6826                    } else {
6827                        None
6828                    };
6829                    left = Expr::Like {
6830                        expr: Box::new(left),
6831                        pattern: Box::new(pattern),
6832                        negated,
6833                        escape,
6834                    };
6835                } else if self.match_token(TokenType::ILike) {
6836                    let pattern = self.parse_addition()?;
6837                    let escape = if self.match_token(TokenType::Escape) {
6838                        Some(Box::new(self.parse_primary()?))
6839                    } else {
6840                        None
6841                    };
6842                    left = Expr::ILike {
6843                        expr: Box::new(left),
6844                        pattern: Box::new(pattern),
6845                        negated,
6846                        escape,
6847                    };
6848                } else if self.match_token(TokenType::Between) {
6849                    let low = self.parse_addition()?;
6850                    self.expect(TokenType::And)?;
6851                    let high = self.parse_addition()?;
6852                    left = Expr::Between {
6853                        expr: Box::new(left),
6854                        low: Box::new(low),
6855                        high: Box::new(high),
6856                        negated,
6857                    };
6858                } else {
6859                    break;
6860                }
6861            } else if self.check_keyword("SIMILAR") {
6862                // SIMILAR TO pattern [ESCAPE escape_char]
6863                self.advance(); // consume SIMILAR
6864                self.expect_keyword("TO")?;
6865                let pattern = self.parse_addition()?;
6866                let escape = if self.match_token(TokenType::Escape) {
6867                    Some(Box::new(self.parse_primary()?))
6868                } else {
6869                    None
6870                };
6871                left = Expr::SimilarTo {
6872                    expr: Box::new(left),
6873                    pattern: Box::new(pattern),
6874                    negated: false,
6875                    escape,
6876                };
6877            } else if self.peek_type() == &TokenType::Not && self.check_keyword_offset("SIMILAR", 1)
6878            {
6879                // NOT SIMILAR TO pattern [ESCAPE escape_char]
6880                self.advance(); // consume NOT
6881                self.advance(); // consume SIMILAR
6882                self.expect_keyword("TO")?;
6883                let pattern = self.parse_addition()?;
6884                let escape = if self.match_token(TokenType::Escape) {
6885                    Some(Box::new(self.parse_primary()?))
6886                } else {
6887                    None
6888                };
6889                left = Expr::SimilarTo {
6890                    expr: Box::new(left),
6891                    pattern: Box::new(pattern),
6892                    negated: true,
6893                    escape,
6894                };
6895            } else if self.check_keyword("REGEXP")
6896                || self.check_keyword("RLIKE")
6897                || self.check_keyword("GLOB")
6898                || self.check_keyword("IREGEXP")
6899            {
6900                // MySQL / Hive `expr REGEXP pat`, `expr RLIKE pat`, and
6901                // SQLite / DuckDB `expr GLOB pat`. Modeled as a Like with
6902                // no escape.
6903                self.advance();
6904                let pattern = self.parse_addition()?;
6905                left = Expr::Like {
6906                    expr: Box::new(left),
6907                    pattern: Box::new(pattern),
6908                    negated: false,
6909                    escape: None,
6910                };
6911            } else if self.peek_type() == &TokenType::Not
6912                && (self.check_keyword_offset("REGEXP", 1)
6913                    || self.check_keyword_offset("RLIKE", 1)
6914                    || self.check_keyword_offset("GLOB", 1)
6915                    || self.check_keyword_offset("IREGEXP", 1))
6916            {
6917                self.advance();
6918                self.advance();
6919                let pattern = self.parse_addition()?;
6920                left = Expr::Like {
6921                    expr: Box::new(left),
6922                    pattern: Box::new(pattern),
6923                    negated: true,
6924                    escape: None,
6925                };
6926            } else {
6927                break;
6928            }
6929        }
6930
6931        Ok(left)
6932    }
6933
6934    fn parse_addition(&mut self) -> Result<Expr> {
6935        let mut left = self.parse_multiplication()?;
6936        loop {
6937            let op = match self.peek_type() {
6938                TokenType::Plus => Some(BinaryOperator::Plus),
6939                TokenType::Minus => Some(BinaryOperator::Minus),
6940                TokenType::Concat => Some(BinaryOperator::Concat),
6941                TokenType::BitwiseOr => {
6942                    // Don't consume `|` when it is the start of `|>`; that
6943                    // is handled at comparison level (PG range/geom op).
6944                    if matches!(
6945                        self.peek_offset(1).map(|t| &t.token_type),
6946                        Some(TokenType::Gt)
6947                    ) {
6948                        None
6949                    } else {
6950                        Some(BinaryOperator::BitwiseOr)
6951                    }
6952                }
6953                TokenType::BitwiseXor => {
6954                    // Preserve PostgreSQL `^@` for comparison-level handling.
6955                    if matches!(self.peek_offset(1).map(|t| &t.token_type), Some(TokenType::AtSign)) {
6956                        None
6957                    } else {
6958                        Some(BinaryOperator::BitwiseXor)
6959                    }
6960                }
6961                TokenType::ShiftLeft => {
6962                    // Preserve PostgreSQL `<<|` for comparison-level handling.
6963                    if matches!(
6964                        self.peek_offset(1).map(|t| &t.token_type),
6965                        Some(TokenType::BitwiseOr)
6966                    ) {
6967                        None
6968                    } else {
6969                        Some(BinaryOperator::ShiftLeft)
6970                    }
6971                }
6972                TokenType::ShiftRight => {
6973                    // Preserve PostgreSQL `>>|` for comparison-level handling.
6974                    if matches!(
6975                        self.peek_offset(1).map(|t| &t.token_type),
6976                        Some(TokenType::BitwiseOr)
6977                    ) {
6978                        None
6979                    } else {
6980                        Some(BinaryOperator::ShiftRight)
6981                    }
6982                }
6983                _ => None,
6984            };
6985            if let Some(op) = op {
6986                self.advance();
6987                // Oracle SQL*Plus continuation: `2359-\n,'AR'` keeps the
6988                // trailing `-` in the token stream. If the operator has no
6989                // valid right operand (next token is a delimiter), rewind
6990                // and treat the `-` as a no-op so the surrounding INSERT /
6991                // tuple keeps parsing.
6992                if matches!(op, BinaryOperator::Minus | BinaryOperator::Plus)
6993                    && matches!(
6994                        self.peek_type(),
6995                        TokenType::Comma
6996                            | TokenType::RParen
6997                            | TokenType::RBracket
6998                            | TokenType::Eof
6999                            | TokenType::Semicolon
7000                    )
7001                {
7002                    continue;
7003                }
7004                let right = self.parse_multiplication()?;
7005                left = Expr::BinaryOp {
7006                    left: Box::new(left),
7007                    op,
7008                    right: Box::new(right),
7009                };
7010            } else {
7011                break;
7012            }
7013        }
7014        Ok(left)
7015    }
7016
7017    fn parse_multiplication(&mut self) -> Result<Expr> {
7018        let mut left = self.parse_unary()?;
7019        loop {
7020            let op = match self.peek_type() {
7021                TokenType::Star => Some(BinaryOperator::Multiply),
7022                TokenType::Slash => {
7023                    // DuckDB / Python-style integer division `//` — consume
7024                    // both slashes and lower to Divide so the surrounding
7025                    // expression parses.
7026                    if matches!(
7027                        self.peek_offset(1).map(|t| &t.token_type),
7028                        Some(TokenType::Slash)
7029                    ) {
7030                        self.advance();
7031                        self.advance();
7032                        let right = self.parse_unary()?;
7033                        left = Expr::BinaryOp {
7034                            left: Box::new(left),
7035                            op: BinaryOperator::Divide,
7036                            right: Box::new(right),
7037                        };
7038                        continue;
7039                    }
7040                    Some(BinaryOperator::Divide)
7041                }
7042                TokenType::Percent2 => Some(BinaryOperator::Modulo),
7043                TokenType::BitwiseAnd => {
7044                    // Don't consume the first `&` when it is the start of a
7045                    // multi-char PG operator (`&&`, `&<`, `&>`); leave it for
7046                    // the comparison-level handler.
7047                    if matches!(
7048                        self.peek_offset(1).map(|t| &t.token_type),
7049                        Some(TokenType::BitwiseAnd)
7050                            | Some(TokenType::Lt)
7051                            | Some(TokenType::Gt)
7052                    ) {
7053                        None
7054                    } else {
7055                        Some(BinaryOperator::BitwiseAnd)
7056                    }
7057                }
7058                _ => {
7059                    // MySQL / ClickHouse keyword operators `DIV` (integer
7060                    // divide) and `MOD` (modulo). Treated as multiplicative.
7061                    if self.check_keyword("DIV") {
7062                        Some(BinaryOperator::Divide)
7063                    } else if self.check_keyword("MOD") {
7064                        Some(BinaryOperator::Modulo)
7065                    } else {
7066                        None
7067                    }
7068                }
7069            };
7070            if let Some(op) = op {
7071                self.advance();
7072                let right = self.parse_unary()?;
7073                left = Expr::BinaryOp {
7074                    left: Box::new(left),
7075                    op,
7076                    right: Box::new(right),
7077                };
7078            } else {
7079                break;
7080            }
7081        }
7082        Ok(left)
7083    }
7084
7085    fn parse_unary(&mut self) -> Result<Expr> {
7086        match self.peek_type() {
7087            TokenType::Minus => {
7088                self.advance();
7089                let expr = self.parse_unary()?;
7090                Ok(Expr::UnaryOp {
7091                    op: UnaryOperator::Minus,
7092                    expr: Box::new(expr),
7093                })
7094            }
7095            TokenType::Plus => {
7096                self.advance();
7097                let expr = self.parse_unary()?;
7098                Ok(Expr::UnaryOp {
7099                    op: UnaryOperator::Plus,
7100                    expr: Box::new(expr),
7101                })
7102            }
7103            TokenType::BitwiseNot => {
7104                self.advance();
7105                let expr = self.parse_unary()?;
7106                Ok(Expr::UnaryOp {
7107                    op: UnaryOperator::BitwiseNot,
7108                    expr: Box::new(expr),
7109                })
7110            }
7111            _ => self.parse_postfix(),
7112        }
7113    }
7114
7115    /// Parse postfix operators: `::type`, `[index]`, `->`, `->>`
7116    fn parse_postfix(&mut self) -> Result<Expr> {
7117        let mut expr = self.parse_primary()?;
7118
7119        loop {
7120            if self.match_token(TokenType::DoubleColon) {
7121                // PostgreSQL-style cast: expr::type
7122                let data_type = self.parse_data_type()?;
7123                expr = Expr::Cast {
7124                    expr: Box::new(expr),
7125                    data_type,
7126                };
7127            } else if self.match_token(TokenType::LBracket) {
7128                // DuckDB list slicing: expr[start:end] or expr[:end] or expr[start:].
7129                // We model both index and slice as ArrayIndex (the slice
7130                // expression is discarded — the bench cares only about parse
7131                // acceptance).
7132                if self.match_token(TokenType::RBracket) {
7133                    // ClickHouse JSON empty subscript: `arr.k1[]` projects
7134                    // through every element. Treat as `ArrayIndex` against
7135                    // `NULL` so the surrounding expression parses.
7136                    expr = Expr::ArrayIndex {
7137                        expr: Box::new(expr),
7138                        index: Box::new(Expr::Null),
7139                    };
7140                } else if self.match_token(TokenType::Colon) {
7141                    // [:end] or [:end:step]
7142                    if !matches!(self.peek_type(), TokenType::RBracket | TokenType::Colon) {
7143                        let _ = self.parse_expr()?;
7144                    }
7145                    if self.match_token(TokenType::Colon)
7146                        && !matches!(self.peek_type(), TokenType::RBracket)
7147                    {
7148                        let _ = self.parse_expr()?;
7149                    }
7150                    self.expect(TokenType::RBracket)?;
7151                    expr = Expr::ArrayIndex {
7152                        expr: Box::new(expr),
7153                        index: Box::new(Expr::Null),
7154                    };
7155                } else {
7156                    let index = self.parse_expr()?;
7157                    if self.match_token(TokenType::Colon) {
7158                        // [start:end] / [start:] / [start:end:step] / [start::step]
7159                        if !matches!(self.peek_type(), TokenType::RBracket | TokenType::Colon) {
7160                            let _ = self.parse_expr()?;
7161                        }
7162                        if self.match_token(TokenType::Colon)
7163                            && !matches!(self.peek_type(), TokenType::RBracket)
7164                        {
7165                            let _ = self.parse_expr()?;
7166                        }
7167                    }
7168                    self.expect(TokenType::RBracket)?;
7169                    expr = Expr::ArrayIndex {
7170                        expr: Box::new(expr),
7171                        index: Box::new(index),
7172                    };
7173                }
7174            } else if self.match_token(TokenType::Arrow) {
7175                let path = self.parse_primary()?;
7176                expr = Expr::JsonAccess {
7177                    expr: Box::new(expr),
7178                    path: Box::new(path),
7179                    as_text: false,
7180                };
7181            } else if self.match_token(TokenType::DoubleArrow) {
7182                let path = self.parse_primary()?;
7183                expr = Expr::JsonAccess {
7184                    expr: Box::new(expr),
7185                    path: Box::new(path),
7186                    as_text: true,
7187                };
7188            } else if self.peek_type() == &TokenType::Colon
7189                && self
7190                    .peek_offset(1)
7191                    .map(|t| matches!(t.token_type, TokenType::Identifier))
7192                    .unwrap_or(false)
7193                && matches!(
7194                    expr,
7195                    Expr::Column { .. } | Expr::JsonAccess { .. } | Expr::Cast { .. } | Expr::ArrayIndex { .. }
7196                )
7197            {
7198                // Snowflake VARIANT path accessor: `col:key`, `col:a:b`,
7199                // `col:a.b`. Treat each `:<name>` as a JSON access. We avoid
7200                // ambiguity with bind parameters (`:name`) by gating on a
7201                // preceding identifier-style expression.
7202                self.advance(); // :
7203                let part = self.advance().clone();
7204                expr = Expr::JsonAccess {
7205                    expr: Box::new(expr),
7206                    path: Box::new(Expr::StringLiteral(part.value)),
7207                    as_text: false,
7208                };
7209            } else if self.match_token(TokenType::Collate) {
7210                // Postgres / Spark `expr COLLATE collation_name` — we don't
7211                // model collations in the AST; consume the collation name
7212                // and continue. Accept any identifier-or-keyword name token.
7213                if self.is_name_token() || matches!(self.peek_type(), TokenType::String) {
7214                    self.advance();
7215                }
7216            } else if self.check_keyword("AT")
7217                && self.peek_offset(1).map(|t| t.value.eq_ignore_ascii_case("TIME")).unwrap_or(false)
7218                && self.peek_offset(2).map(|t| t.value.eq_ignore_ascii_case("ZONE")).unwrap_or(false)
7219            {
7220                // PostgreSQL / DuckDB: `expr AT TIME ZONE 'tz'`. Swallow the
7221                // suffix; the timezone-shifted value attaches to `expr`.
7222                self.advance(); // AT
7223                self.advance(); // TIME
7224                self.advance(); // ZONE
7225                let _ = self.parse_primary()?;
7226            } else if self.check_keyword("EXPORT_STATE")
7227                && matches!(expr, Expr::Function { .. } | Expr::TypedFunction { .. })
7228            {
7229                // DuckDB postfix `agg(...) EXPORT_STATE` returning the
7230                // serialized aggregate state instead of its final value.
7231                self.advance();
7232            } else if self.peek_type() == &TokenType::Dot
7233                && matches!(
7234                    self.peek_offset(1).map(|t| &t.token_type),
7235                    Some(TokenType::Colon | TokenType::BitwiseXor)
7236                )
7237            {
7238                // ClickHouse typed/subobject access after complex expressions:
7239                //   `expr.:Int64`, `expr.^a`, `expr.:`Array(Nullable(Int64))``.
7240                self.advance(); // .
7241                let _ = self.match_token(TokenType::BitwiseXor);
7242                let _ = self.match_token(TokenType::Colon);
7243                if self.is_name_token() || self.is_data_type_token()
7244                    || matches!(self.peek_type(), TokenType::Null | TokenType::Identifier)
7245                {
7246                    let part = self.advance().clone();
7247                    expr = Expr::JsonAccess {
7248                        expr: Box::new(expr),
7249                        path: Box::new(Expr::StringLiteral(part.value)),
7250                        as_text: false,
7251                    };
7252                } else {
7253                    return Err(SqlglotError::UnexpectedToken {
7254                        token: self.peek().clone(),
7255                    });
7256                }
7257            } else if self.peek_type() == &TokenType::Dot
7258                && matches!(
7259                    self.peek_offset(1).map(|t| &t.token_type),
7260                    Some(TokenType::Number)
7261                )
7262            {
7263                // ClickHouse tuple element access: `t.1`, `t[1].2`. Model as
7264                // an ArrayIndex on a numeric literal so the surrounding
7265                // expression parses.
7266                self.advance(); // .
7267                let n = self.advance().clone();
7268                expr = Expr::ArrayIndex {
7269                    expr: Box::new(expr),
7270                    index: Box::new(Expr::Number(n.value)),
7271                };
7272            } else if self.peek_type() == &TokenType::Dot
7273                && self
7274                    .peek_offset(1)
7275                    .map(|t| matches!(t.token_type, TokenType::Identifier))
7276                    .unwrap_or(false)
7277            {
7278                // Postfix field access after a non-primary expression
7279                // (e.g. `arr[].field`, `arr.k1[].k2.k3`). Also handles
7280                // DuckDB method-call style `expr.method(args)` by
7281                // rewriting to `method(expr, args)`.
7282                self.advance(); // .
7283                let part = self.advance().clone();
7284                if self.match_token(TokenType::LParen) {
7285                    let mut args = vec![expr];
7286                    if self.peek_type() != &TokenType::RParen {
7287                        args.push(self.parse_function_arg()?);
7288                        while self.match_token(TokenType::Comma) {
7289                            args.push(self.parse_function_arg()?);
7290                        }
7291                    }
7292                    self.expect(TokenType::RParen)?;
7293                    expr = Expr::Function {
7294                        name: part.value,
7295                        args,
7296                        distinct: false,
7297                        within_group: false,
7298                        order_by: vec![],
7299                        filter: None,
7300                        over: None,
7301                    };
7302                } else {
7303                    expr = Expr::JsonAccess {
7304                        expr: Box::new(expr),
7305                        path: Box::new(Expr::StringLiteral(part.value)),
7306                        as_text: false,
7307                    };
7308                }
7309            } else if matches!(expr, Expr::Function { .. })
7310                && self.peek_type() == &TokenType::LParen
7311            {
7312                // ClickHouse combinator-style application: `f(a)(b)` —
7313                // apply the result of `f(a)` to `(b)`. We model this as a
7314                // nested function call where the outer call's name is the
7315                // serialized inner function-call expression — we just pack
7316                // both arg lists into a single Function node so the parse
7317                // does not stop here.
7318                // apply the result of `f(a)` to `(b)`. We model this as a
7319                // nested function call where the outer call's name is the
7320                // serialized inner function-call expression — we just pack
7321                // both arg lists into a single Function node so the parse
7322                // does not stop here.
7323                self.advance();
7324                let extra_args = if self.peek_type() != &TokenType::RParen {
7325                    let mut a = vec![self.parse_function_arg()?];
7326                    while self.match_token(TokenType::Comma) {
7327                        a.push(self.parse_function_arg()?);
7328                    }
7329                    a
7330                } else {
7331                    vec![]
7332                };
7333                self.expect(TokenType::RParen)?;
7334                if let Expr::Function {
7335                    name,
7336                    mut args,
7337                    distinct,
7338                    filter,
7339                    over,
7340                    order_by,
7341                    within_group,
7342                } = expr
7343                {
7344                    args.extend(extra_args);
7345                    expr = Expr::Function {
7346                        name,
7347                        args,
7348                        distinct,
7349                        filter,
7350                        over,
7351                        order_by,
7352                        within_group,
7353                    };
7354                } else {
7355                    unreachable!();
7356                }
7357            } else {
7358                break;
7359            }
7360        }
7361
7362        // Check for window function: expr OVER (...)
7363        // BigQuery / DuckDB / ClickHouse / Snowflake: window-function nulls
7364        // modifier outside the call: `first_value(x) IGNORE NULLS OVER (...)`
7365        // or `first_value(x) RESPECT NULLS`. Swallow opaquely.
7366        if (self.peek().value.eq_ignore_ascii_case("IGNORE")
7367            || self.peek().value.eq_ignore_ascii_case("RESPECT"))
7368            && self
7369                .peek_offset(1)
7370                .map(|t| t.token_type == TokenType::Null || t.value.eq_ignore_ascii_case("NULLS"))
7371                .unwrap_or(false)
7372        {
7373            self.advance();
7374            self.advance();
7375        }
7376        if self.match_token(TokenType::Over) {
7377            let spec = if self.match_token(TokenType::LParen) {
7378                let ws = self.parse_window_spec()?;
7379                self.expect(TokenType::RParen)?;
7380                ws
7381            } else {
7382                // Named window reference
7383                let wref = self.expect_name()?;
7384                WindowSpec {
7385                    window_ref: Some(wref),
7386                    partition_by: vec![],
7387                    order_by: vec![],
7388                    frame: None,
7389                }
7390            };
7391            match expr {
7392                Expr::Function {
7393                    name,
7394                    args,
7395                    distinct,
7396                    filter,
7397                    order_by,
7398                    within_group,
7399                    ..
7400                } => {
7401                    expr = Expr::Function {
7402                        name,
7403                        args,
7404                        distinct,
7405                        filter,
7406                        over: Some(spec),
7407                        order_by,
7408                        within_group,
7409                    };
7410                }
7411                Expr::TypedFunction { func, filter, .. } => {
7412                    expr = Expr::TypedFunction {
7413                        func,
7414                        filter,
7415                        over: Some(spec),
7416                    };
7417                }
7418                _ => {}
7419            }
7420        }
7421
7422        // FILTER (WHERE ...) for aggregate functions
7423        if self.match_token(TokenType::Filter) {
7424            self.expect(TokenType::LParen)?;
7425            self.expect(TokenType::Where)?;
7426            let filter_expr = self.parse_expr()?;
7427            self.expect(TokenType::RParen)?;
7428            match expr {
7429                Expr::Function {
7430                    name,
7431                    args,
7432                    distinct,
7433                    over,
7434                    order_by,
7435                    within_group,
7436                    ..
7437                } => {
7438                    expr = Expr::Function {
7439                        name,
7440                        args,
7441                        distinct,
7442                        filter: Some(Box::new(filter_expr)),
7443                        over,
7444                        order_by,
7445                        within_group,
7446                    };
7447                }
7448                Expr::TypedFunction { func, over, .. } => {
7449                    expr = Expr::TypedFunction {
7450                        func,
7451                        filter: Some(Box::new(filter_expr)),
7452                        over,
7453                    };
7454                }
7455                _ => {}
7456            }
7457            // PostgreSQL / DuckDB: `agg(x) FILTER (WHERE …) OVER (…)`.
7458            // Parse the trailing OVER clause after FILTER so window-call
7459            // aggregates with filters still resolve.
7460            if self.match_token(TokenType::Over) {
7461                let spec = if self.match_token(TokenType::LParen) {
7462                    let ws = self.parse_window_spec()?;
7463                    self.expect(TokenType::RParen)?;
7464                    ws
7465                } else {
7466                    let wref = self.expect_name()?;
7467                    WindowSpec {
7468                        window_ref: Some(wref),
7469                        partition_by: vec![],
7470                        order_by: vec![],
7471                        frame: None,
7472                    }
7473                };
7474                match expr {
7475                    Expr::Function {
7476                        name,
7477                        args,
7478                        distinct,
7479                        filter,
7480                        order_by,
7481                        within_group,
7482                        ..
7483                    } => {
7484                        expr = Expr::Function {
7485                            name,
7486                            args,
7487                            distinct,
7488                            filter,
7489                            over: Some(spec),
7490                            order_by,
7491                            within_group,
7492                        };
7493                    }
7494                    Expr::TypedFunction { func, filter, .. } => {
7495                        expr = Expr::TypedFunction {
7496                            func,
7497                            filter,
7498                            over: Some(spec),
7499                        };
7500                    }
7501                    _ => {}
7502                }
7503            }
7504        }
7505
7506        Ok(expr)
7507    }
7508
7509    fn parse_window_spec(&mut self) -> Result<WindowSpec> {
7510        let window_ref = if self.is_name_token()
7511            && !matches!(
7512                self.peek_type(),
7513                TokenType::Partition | TokenType::Order | TokenType::Rows | TokenType::Range
7514            ) {
7515            let saved = self.pos;
7516            let name = self.expect_name()?;
7517            // Check if it's actually a keyword we need
7518            if matches!(
7519                self.peek_type(),
7520                TokenType::RParen
7521                    | TokenType::Partition
7522                    | TokenType::Order
7523                    | TokenType::Rows
7524                    | TokenType::Range
7525            ) {
7526                Some(name)
7527            } else {
7528                self.pos = saved;
7529                None
7530            }
7531        } else {
7532            None
7533        };
7534
7535        let partition_by = if self.match_token(TokenType::Partition) {
7536            self.expect(TokenType::By)?;
7537            self.parse_expr_list_allow_item_alias()?
7538        } else if self.is_name_token()
7539            && (self.peek().value.eq_ignore_ascii_case("DISTRIBUTE")
7540                || self.peek().value.eq_ignore_ascii_case("CLUSTER"))
7541        {
7542            // Hive `DISTRIBUTE BY` / `CLUSTER BY` inside OVER(...) — treat
7543            // as PARTITION BY.
7544            self.advance();
7545            self.expect(TokenType::By)?;
7546            self.parse_expr_list_allow_item_alias()?
7547        } else {
7548            vec![]
7549        };
7550
7551        let order_by = if self.match_token(TokenType::Order) {
7552            self.expect(TokenType::By)?;
7553            self.parse_order_by_items()?
7554        } else if self.is_name_token()
7555            && self.peek().value.eq_ignore_ascii_case("SORT")
7556        {
7557            // Hive `SORT BY` inside OVER(...) — treat as ORDER BY.
7558            self.advance();
7559            self.expect(TokenType::By)?;
7560            self.parse_order_by_items()?
7561        } else {
7562            vec![]
7563        };
7564
7565        let frame = if matches!(self.peek_type(), TokenType::Rows | TokenType::Range) {
7566            Some(self.parse_window_frame()?)
7567        } else {
7568            None
7569        };
7570
7571        Ok(WindowSpec {
7572            window_ref,
7573            partition_by,
7574            order_by,
7575            frame,
7576        })
7577    }
7578
7579    fn parse_window_frame(&mut self) -> Result<WindowFrame> {
7580        let kind = if self.match_token(TokenType::Rows) {
7581            WindowFrameKind::Rows
7582        } else if self.match_token(TokenType::Range) {
7583            WindowFrameKind::Range
7584        } else {
7585            WindowFrameKind::Rows
7586        };
7587
7588        if self.match_keyword("BETWEEN") {
7589            let start = self.parse_window_frame_bound()?;
7590            self.expect(TokenType::And)?;
7591            let end = self.parse_window_frame_bound()?;
7592            // SQL:2011 / DuckDB frame exclusion clause:
7593            //   `EXCLUDE CURRENT ROW | EXCLUDE GROUP | EXCLUDE TIES |
7594            //    EXCLUDE NO OTHERS`. Swallow opaquely; we don't model it.
7595            if self.check_keyword("EXCLUDE") {
7596                self.advance();
7597                if self.check_keyword("CURRENT") {
7598                    self.advance();
7599                    let _ = self.match_keyword("ROW");
7600                } else if self.check_keyword("NO") {
7601                    self.advance();
7602                    let _ = self.match_keyword("OTHERS");
7603                } else if self.check_keyword("GROUP") || self.check_keyword("TIES") {
7604                    self.advance();
7605                }
7606            }
7607            Ok(WindowFrame {
7608                kind,
7609                start,
7610                end: Some(end),
7611            })
7612        } else {
7613            let start = self.parse_window_frame_bound()?;
7614            if self.check_keyword("EXCLUDE") {
7615                self.advance();
7616                if self.check_keyword("CURRENT") {
7617                    self.advance();
7618                    let _ = self.match_keyword("ROW");
7619                } else if self.check_keyword("NO") {
7620                    self.advance();
7621                    let _ = self.match_keyword("OTHERS");
7622                } else if self.check_keyword("GROUP") || self.check_keyword("TIES") {
7623                    self.advance();
7624                }
7625            }
7626            Ok(WindowFrame {
7627                kind,
7628                start,
7629                end: None,
7630            })
7631        }
7632    }
7633
7634    fn parse_window_frame_bound(&mut self) -> Result<WindowFrameBound> {
7635        if self.check_keyword("CURRENT") {
7636            self.advance();
7637            let _ = self.match_keyword("ROW");
7638            Ok(WindowFrameBound::CurrentRow)
7639        } else if self.match_token(TokenType::Unbounded) {
7640            if self.match_token(TokenType::Preceding) {
7641                Ok(WindowFrameBound::Preceding(None))
7642            } else {
7643                self.expect(TokenType::Following)?;
7644                Ok(WindowFrameBound::Following(None))
7645            }
7646        } else {
7647            let n = self.parse_expr()?;
7648            if self.match_token(TokenType::Preceding) {
7649                Ok(WindowFrameBound::Preceding(Some(Box::new(n))))
7650            } else {
7651                self.expect(TokenType::Following)?;
7652                Ok(WindowFrameBound::Following(Some(Box::new(n))))
7653            }
7654        }
7655    }
7656
7657    fn parse_primary(&mut self) -> Result<Expr> {
7658        let token = self.peek().clone();
7659
7660        // DuckDB / Spark leading-dot float literal: `.5`, `.25`. The
7661        // tokenizer emits `Dot` then `Number`; glue them back together.
7662        if matches!(token.token_type, TokenType::Dot)
7663            && matches!(
7664                self.peek_offset(1).map(|t| &t.token_type),
7665                Some(TokenType::Number)
7666            )
7667        {
7668            self.advance();
7669            let n = self.peek().value.clone();
7670            self.advance();
7671            return Ok(Expr::Number(format!("0.{}", n)));
7672        }
7673
7674        match &token.token_type {
7675            TokenType::Number => {
7676                self.advance();
7677                // Trailing-dot fractional literal: `10.` — accept the dot as
7678                // part of the number when it isn't followed by something that
7679                // would be a member access (column reference like `t.col` or
7680                // tuple element access).
7681                let mut value = token.value;
7682                if self.peek_type() == &TokenType::Dot {
7683                    let after = self.peek_offset(1).map(|t| &t.token_type);
7684                    let looks_like_member = matches!(
7685                        after,
7686                        Some(TokenType::Identifier)
7687                            | Some(TokenType::Number)
7688                            | Some(TokenType::Star)
7689                    );
7690                    if !looks_like_member {
7691                        self.advance();
7692                        value.push('.');
7693                    }
7694                }
7695                // Spark / Hive float suffixes: `10.0F`, `20L`, `3.14D`, `5BD`.
7696                // Swallow the suffix identifier so the literal parses.
7697                if self.is_name_token() {
7698                    let v = self.peek().value.as_str();
7699                    if matches!(v, "F" | "f" | "L" | "l" | "D" | "d" | "BD" | "bd") {
7700                        self.advance();
7701                    }
7702                }
7703                Ok(Expr::Number(value))
7704            }
7705            TokenType::HexString => {
7706                self.advance();
7707                Ok(Expr::Number(token.value))
7708            }
7709            TokenType::String => {
7710                self.advance();
7711                // ANSI / Oracle interval literal: `'1-2' YEAR TO MONTH`,
7712                // `'12 03:04:05.6' DAY TO SECOND(2)`. After a bare string,
7713                // accept an optional interval qualifier and swallow it so
7714                // the surrounding expression parses. Skip this when the
7715                // previous token was `INTERVAL` — that has its own path.
7716                let prev_was_interval = self
7717                    .pos
7718                    .checked_sub(2)
7719                    .and_then(|i| self.tokens.get(i))
7720                    .map(|t| matches!(t.token_type, TokenType::Interval))
7721                    .unwrap_or(false);
7722                if !prev_was_interval
7723                    && matches!(
7724                        self.peek_type(),
7725                        TokenType::Year
7726                            | TokenType::Month
7727                            | TokenType::Day
7728                            | TokenType::Hour
7729                            | TokenType::Minute
7730                            | TokenType::Second
7731                    )
7732                {
7733                    self.advance();
7734                    if self.match_token(TokenType::LParen) {
7735                        // qualifier precision: `SECOND(2)`
7736                        if matches!(self.peek_type(), TokenType::Number) {
7737                            self.advance();
7738                            if self.match_token(TokenType::Comma) {
7739                                if matches!(self.peek_type(), TokenType::Number) {
7740                                    self.advance();
7741                                }
7742                            }
7743                        }
7744                        let _ = self.match_token(TokenType::RParen);
7745                    }
7746                    if self.is_name_token() && self.peek().value.eq_ignore_ascii_case("TO") {
7747                        self.advance();
7748                        if matches!(
7749                            self.peek_type(),
7750                            TokenType::Year
7751                                | TokenType::Month
7752                                | TokenType::Day
7753                                | TokenType::Hour
7754                                | TokenType::Minute
7755                                | TokenType::Second
7756                        ) {
7757                            self.advance();
7758                            if self.match_token(TokenType::LParen) {
7759                                if matches!(self.peek_type(), TokenType::Number) {
7760                                    self.advance();
7761                                }
7762                                let _ = self.match_token(TokenType::RParen);
7763                            }
7764                        }
7765                    }
7766                    return Ok(Expr::Cast {
7767                        expr: Box::new(Expr::StringLiteral(token.value)),
7768                        data_type: DataType::Interval,
7769                    });
7770                }
7771                // SQL-92 / MySQL: adjacent string literals concatenate
7772                // (`'a' 'b'` → `'ab'`). Also fold in identifier-quoted
7773                // strings the lexer surfaces when MySQL ANSI_QUOTES is off
7774                // (`"a" "b" "c"` reaches us as a String followed by quoted
7775                // identifiers). Greedily consume any run of immediately
7776                // following String / quoted-Identifier tokens.
7777                let mut combined = token.value;
7778                loop {
7779                    let next = self.peek();
7780                    if matches!(next.token_type, TokenType::String) {
7781                        combined.push_str(&next.value);
7782                        self.advance();
7783                        continue;
7784                    }
7785                    if matches!(next.token_type, TokenType::Identifier)
7786                        && (next.quote_char == '"' || next.quote_char == '\'')
7787                    {
7788                        combined.push_str(&next.value);
7789                        self.advance();
7790                        continue;
7791                    }
7792                    break;
7793                }
7794                Ok(Expr::StringLiteral(combined))
7795            }
7796            TokenType::NationalString => {
7797                self.advance();
7798                Ok(Expr::NationalStringLiteral(token.value))
7799            }
7800            TokenType::True => {
7801                self.advance();
7802                Ok(Expr::Boolean(true))
7803            }
7804            TokenType::False => {
7805                self.advance();
7806                Ok(Expr::Boolean(false))
7807            }
7808            TokenType::Null => {
7809                self.advance();
7810                Ok(Expr::Null)
7811            }
7812            TokenType::Default => {
7813                self.advance();
7814                // MySQL `DEFAULT(col)` — emit as function call so the
7815                // surrounding tuple parses.
7816                if self.peek_type() == &TokenType::LParen {
7817                    self.advance();
7818                    let args = if self.peek_type() != &TokenType::RParen {
7819                        let mut a = vec![self.parse_function_arg()?];
7820                        while self.match_token(TokenType::Comma) {
7821                            a.push(self.parse_function_arg()?);
7822                        }
7823                        a
7824                    } else {
7825                        vec![]
7826                    };
7827                    self.expect(TokenType::RParen)?;
7828                    return Ok(Expr::Function {
7829                        name: "DEFAULT".to_string(),
7830                        args,
7831                        distinct: false,
7832                        filter: None,
7833                        over: None,
7834                        order_by: Vec::new(),
7835                        within_group: false,
7836                    });
7837                }
7838                Ok(Expr::Default)
7839            }
7840            TokenType::Star => {
7841                self.advance();
7842                Ok(Expr::Wildcard)
7843            }
7844            // ClickHouse / various: `values` used as a column name inside
7845            // expressions (e.g. `arrayExists(x -> x > 5, values)`). Accept
7846            // it as a bare column reference when it isn't followed by `(`.
7847            TokenType::Values if self.peek_offset(1).map(|t| &t.token_type) != Some(&TokenType::LParen) => {
7848                self.advance();
7849                Ok(Expr::Column {
7850                    table: None,
7851                    name: token.value,
7852                    quote_style: QuoteStyle::None,
7853                    table_quote_style: QuoteStyle::None,
7854                })
7855            }
7856            TokenType::Parameter => {
7857                self.advance();
7858                Ok(Expr::Parameter(token.value))
7859            }
7860
7861            // ── `@var`, `@@global_var`, `:var` style placeholders ──
7862            //
7863            // MySQL/T-SQL session and global variables tokenize as a bare
7864            // `@` (or `:`) followed by an identifier. We glue the prefix and
7865            // following name into a single `Parameter` expression so the
7866            // surrounding query parses.
7867            TokenType::AtSign | TokenType::Colon => {
7868                self.advance();
7869                let mut name = match token.token_type {
7870                    TokenType::AtSign => String::from("@"),
7871                    TokenType::Colon => String::from(":"),
7872                    _ => unreachable!(),
7873                };
7874                // T-SQL `@@global` — second `@`.
7875                if matches!(token.token_type, TokenType::AtSign)
7876                    && self.peek_type() == &TokenType::AtSign
7877                {
7878                    name.push('@');
7879                    self.advance();
7880                }
7881                // Name part: identifier-or-keyword, number, or none.
7882                // T-SQL accepts reserved keywords after `@` (e.g. `@limit`,
7883                // `@order`). Accept any token that "looks like" a name.
7884                if self.is_name_token()
7885                    || matches!(
7886                        self.peek_type(),
7887                        TokenType::Limit
7888                            | TokenType::Offset
7889                            | TokenType::Order
7890                            | TokenType::Group
7891                            | TokenType::Having
7892                            | TokenType::Where
7893                            | TokenType::From
7894                            | TokenType::Select
7895                            | TokenType::Insert
7896                            | TokenType::Update
7897                            | TokenType::Delete
7898                            | TokenType::Union
7899                            | TokenType::Intersect
7900                            | TokenType::Except
7901                            | TokenType::Join
7902                            | TokenType::Inner
7903                            | TokenType::Cross
7904                            | TokenType::On
7905                            | TokenType::As
7906                            | TokenType::Distinct
7907                            | TokenType::Default
7908                            | TokenType::Null
7909                            | TokenType::True
7910                            | TokenType::False
7911                            | TokenType::Date
7912                            | TokenType::Time
7913                            | TokenType::Timestamp
7914                            | TokenType::Year
7915                            | TokenType::Month
7916                            | TokenType::Day
7917                            | TokenType::Hour
7918                            | TokenType::Minute
7919                            | TokenType::Second
7920                    )
7921                {
7922                    let nt = self.advance().clone();
7923                    name.push_str(&nt.value);
7924                } else if matches!(self.peek_type(), TokenType::Number | TokenType::Int) {
7925                    let nt = self.advance().clone();
7926                    name.push_str(&nt.value);
7927                }
7928                Ok(Expr::Parameter(name))
7929            }
7930
7931            // ── DuckDB / BigQuery struct literal: `{ key: expr, ... }` ──
7932            //
7933            // We capture the values as positional `STRUCT(...)` arguments
7934            // (keys are syntactically optional). This keeps surrounding
7935            // expressions parseable; the original AST shape is not preserved
7936            // because there is no dedicated struct-literal variant yet.
7937            TokenType::LBrace => {
7938                self.advance();
7939                let mut args = Vec::new();
7940                if self.peek_type() != &TokenType::RBrace {
7941                    loop {
7942                        // Optional `key:` prefix — discard the key, keep value.
7943                        if self.is_name_token()
7944                            && self
7945                                .peek_offset(1)
7946                                .is_some_and(|t| t.token_type == TokenType::Colon)
7947                        {
7948                            self.advance(); // key
7949                            self.advance(); // colon
7950                        } else if self.peek_type() == &TokenType::String
7951                            && self
7952                                .peek_offset(1)
7953                                .is_some_and(|t| t.token_type == TokenType::Colon)
7954                        {
7955                            self.advance(); // string key
7956                            self.advance(); // colon
7957                        }
7958                        let value = self.parse_expr()?;
7959                        args.push(value);
7960                        if !self.match_token(TokenType::Comma) {
7961                            break;
7962                        }
7963                    }
7964                }
7965                self.expect(TokenType::RBrace)?;
7966                Ok(Expr::Function {
7967                    name: "STRUCT".to_string(),
7968                    args,
7969                    distinct: false,
7970                    filter: None,
7971                    over: None,
7972                    order_by: Vec::new(),
7973                    within_group: false,
7974                })
7975            }
7976
7977            // ── CAST ────────────────────────────────────────────────
7978            TokenType::Cast if self.peek_offset(1).is_some_and(|t| t.token_type == TokenType::LParen) => {
7979                self.advance();
7980                self.expect(TokenType::LParen)?;
7981                let expr = self.parse_expr()?;
7982                // Standard form: `CAST(expr AS type)`. ClickHouse also accepts
7983                // `CAST(expr, 'TypeName')` with a string literal type.
7984                let data_type = if self.match_token(TokenType::As) {
7985                    self.parse_data_type()?
7986                } else if self.match_token(TokenType::Comma) {
7987                    if matches!(self.peek_type(), TokenType::String) {
7988                        let s = self.peek().value.clone();
7989                        self.advance();
7990                        DataType::Unknown(s)
7991                    } else {
7992                        self.parse_data_type()?
7993                    }
7994                } else {
7995                    self.expect(TokenType::As)?; // produce the canonical error
7996                    self.parse_data_type()?
7997                };
7998                // BigQuery: `CAST(expr AS type FORMAT 'fmt' [AT TIME ZONE …])`.
7999                if self.check_keyword("FORMAT") {
8000                    self.advance();
8001                    let _ = self.parse_expr();
8002                    if self.check_keyword("AT")
8003                        && self
8004                            .peek_offset(1)
8005                            .map(|t| t.value.eq_ignore_ascii_case("TIME"))
8006                            .unwrap_or(false)
8007                        && self
8008                            .peek_offset(2)
8009                            .map(|t| t.value.eq_ignore_ascii_case("ZONE"))
8010                            .unwrap_or(false)
8011                    {
8012                        self.advance();
8013                        self.advance();
8014                        self.advance();
8015                        let _ = self.parse_expr();
8016                    }
8017                }
8018                self.expect(TokenType::RParen)?;
8019                Ok(Expr::Cast {
8020                    expr: Box::new(expr),
8021                    data_type,
8022                })
8023            }
8024
8025            // ── EXTRACT ─────────────────────────────────────────────
8026            TokenType::Extract => {
8027                self.advance();
8028                self.expect(TokenType::LParen)?;
8029                let field = self.parse_datetime_field()?;
8030                self.expect(TokenType::From)?;
8031                let expr = self.parse_expr()?;
8032                // BigQuery: `EXTRACT(field FROM ts AT TIME ZONE 'tz')`.
8033                // Swallow the trailing timezone clause so the function
8034                // parses; we lose the explicit zone but keep the AST.
8035                if self.check_keyword("AT")
8036                    && self
8037                        .peek_offset(1)
8038                        .map(|t| t.value.eq_ignore_ascii_case("TIME"))
8039                        .unwrap_or(false)
8040                    && self
8041                        .peek_offset(2)
8042                        .map(|t| t.value.eq_ignore_ascii_case("ZONE"))
8043                        .unwrap_or(false)
8044                {
8045                    self.advance(); // AT
8046                    self.advance(); // TIME
8047                    self.advance(); // ZONE
8048                    let _ = self.parse_expr();
8049                }
8050                self.expect(TokenType::RParen)?;
8051                Ok(Expr::Extract {
8052                    field,
8053                    expr: Box::new(expr),
8054                })
8055            }
8056
8057            // ── CASE ────────────────────────────────────────────────
8058            TokenType::Case => self.parse_case_expr(),
8059
8060            // ── EXISTS ──────────────────────────────────────────────
8061            TokenType::Exists => {
8062                self.advance();
8063                self.expect(TokenType::LParen)?;
8064                let subquery = self.parse_statement_inner()?;
8065                self.expect(TokenType::RParen)?;
8066                Ok(Expr::Exists {
8067                    subquery: Box::new(subquery),
8068                    negated: false,
8069                })
8070            }
8071
8072            // ── NOT EXISTS ──────────────────────────────────────────
8073            TokenType::Not
8074                if {
8075                    let next_pos = self.pos + 1;
8076                    next_pos < self.tokens.len()
8077                        && self.tokens[next_pos].token_type == TokenType::Exists
8078                } =>
8079            {
8080                self.advance(); // NOT
8081                self.advance(); // EXISTS
8082                self.expect(TokenType::LParen)?;
8083                let subquery = self.parse_statement_inner()?;
8084                self.expect(TokenType::RParen)?;
8085                Ok(Expr::Exists {
8086                    subquery: Box::new(subquery),
8087                    negated: true,
8088                })
8089            }
8090
8091            // ── INTERVAL ────────────────────────────────────────────
8092            TokenType::Interval => {
8093                self.advance();
8094                // ClickHouse accepts arithmetic in the value position
8095                // (e.g. `INTERVAL number - 15 MONTH`). Parse an additive
8096                // expression instead of a single primary so the trailing
8097                // unit keyword is reached cleanly.
8098                let value = self.parse_addition()?;
8099                let unit = self.try_parse_datetime_field();
8100                // ANSI / Spark composite ranges: `INTERVAL '0-0' YEAR TO MONTH`,
8101                // `INTERVAL '15:40' HOUR TO MINUTE` etc. Swallow the trailing
8102                // `TO <unit>` clause; we keep only the leading unit.
8103                if self.check_keyword("TO") {
8104                    let saved = self.pos;
8105                    self.advance();
8106                    if self.try_parse_datetime_field().is_none() {
8107                        self.pos = saved;
8108                    }
8109                }
8110                // PostgreSQL fractional precision on the trailing unit:
8111                //   `INTERVAL '1.234' SECOND(2)`, `INTERVAL '…' MINUTE TO SECOND(2)`.
8112                // Swallow the `(N)` after the unit.
8113                if self.peek_type() == &TokenType::LParen
8114                    && self
8115                        .peek_offset(1)
8116                        .map(|t| matches!(t.token_type, TokenType::Number))
8117                        .unwrap_or(false)
8118                    && self
8119                        .peek_offset(2)
8120                        .map(|t| matches!(t.token_type, TokenType::RParen))
8121                        .unwrap_or(false)
8122                {
8123                    self.advance();
8124                    self.advance();
8125                    self.advance();
8126                }
8127                Ok(Expr::Interval {
8128                    value: Box::new(value),
8129                    unit,
8130                })
8131            }
8132
8133            // ── Parenthesized expression or subquery ────────────────
8134            TokenType::LParen => {
8135                self.advance();
8136                // Check for subquery
8137                if matches!(self.peek_type(), TokenType::Select | TokenType::With) {
8138                    let subquery = self.parse_statement_inner()?;
8139                    self.expect(TokenType::RParen)?;
8140                    Ok(Expr::Subquery(Box::new(subquery)))
8141                } else {
8142                    let expr = self.parse_expr()?;
8143                    // ClickHouse: `(expr AS alias)` — swallow the alias.
8144                    if self.match_token(TokenType::As) && self.is_name_token() {
8145                        self.advance();
8146                    }
8147                    // Tuple: (a, b, c) — also accept ClickHouse trailing
8148                    // comma `(a,)`, `(a, b,)`.
8149                    if self.match_token(TokenType::Comma) {
8150                        let mut items = vec![expr];
8151                        if self.peek_type() == &TokenType::RParen {
8152                            self.advance();
8153                            return Ok(Expr::Tuple(items));
8154                        }
8155                        let next = self.parse_expr()?;
8156                        if self.match_token(TokenType::As) && self.is_name_token() {
8157                            self.advance();
8158                        }
8159                        items.push(next);
8160                        while self.match_token(TokenType::Comma) {
8161                            if self.peek_type() == &TokenType::RParen {
8162                                break;
8163                            }
8164                            let n = self.parse_expr()?;
8165                            if self.match_token(TokenType::As) && self.is_name_token() {
8166                                self.advance();
8167                            }
8168                            items.push(n);
8169                        }
8170                        self.expect(TokenType::RParen)?;
8171                        Ok(Expr::Tuple(items))
8172                    } else {
8173                        self.expect(TokenType::RParen)?;
8174                        Ok(Expr::Nested(Box::new(expr)))
8175                    }
8176                }
8177            }
8178
8179            // ── DuckDB MAP literal: `MAP { 'k': v, ... }` ──────────
8180            // Captured as a `MAP(...)` function call with the values as
8181            // positional arguments; keys are discarded for now.
8182            TokenType::Map if self.peek_offset(1).map(|t| matches!(t.token_type, TokenType::LBrace)).unwrap_or(false) => {
8183                self.advance(); // MAP
8184                self.advance(); // {
8185                let mut args = Vec::new();
8186                if self.peek_type() != &TokenType::RBrace {
8187                    loop {
8188                        // Optional `key:` prefix — keep the value only.
8189                        let saved = self.pos;
8190                        let _ = self.parse_expr()?;
8191                        if self.match_token(TokenType::Colon) {
8192                            let v = self.parse_expr()?;
8193                            args.push(v);
8194                        } else {
8195                            self.pos = saved;
8196                            let v = self.parse_expr()?;
8197                            args.push(v);
8198                        }
8199                        if !self.match_token(TokenType::Comma) {
8200                            break;
8201                        }
8202                    }
8203                }
8204                self.expect(TokenType::RBrace)?;
8205                Ok(Expr::Function {
8206                    name: "MAP".to_string(),
8207                    args,
8208                    distinct: false,
8209                    filter: None,
8210                    over: None,
8211                    order_by: Vec::new(),
8212                    within_group: false,
8213                })
8214            }
8215
8216            // ── Array literal: ARRAY[...] ──────────────────────────
8217            TokenType::Array => {
8218                self.advance();
8219                if self.match_token(TokenType::LBracket) {
8220                    let items = self.parse_array_items(TokenType::RBracket)?;
8221                    self.expect(TokenType::RBracket)?;
8222                    Ok(Expr::ArrayLiteral(items))
8223                } else if self.match_token(TokenType::LParen) {
8224                    // ARRAY(SELECT ...) for subqueries, or Hive
8225                    // `ARRAY(expr, expr, ...)` for inline array literals.
8226                    if matches!(self.peek_type(), TokenType::Select | TokenType::With) {
8227                        let subquery = self.parse_statement_inner()?;
8228                        self.expect(TokenType::RParen)?;
8229                        Ok(Expr::Subquery(Box::new(subquery)))
8230                    } else {
8231                        let items = self.parse_array_items(TokenType::RParen)?;
8232                        self.expect(TokenType::RParen)?;
8233                        Ok(Expr::ArrayLiteral(items))
8234                    }
8235                } else {
8236                    Ok(Expr::Column {
8237                        table: None,
8238                        name: "ARRAY".to_string(),
8239                        quote_style: QuoteStyle::None,
8240                        table_quote_style: QuoteStyle::None,
8241                    })
8242                }
8243            }
8244
8245            // ── Bracket array literal: [...] ────────────────────────
8246            TokenType::LBracket => {
8247                self.advance();
8248                let items = self.parse_array_items(TokenType::RBracket)?;
8249                // DuckDB list comprehension: `[expr FOR x IN list [IF cond]]`.
8250                // Swallow the comprehension tail opaquely; we keep the
8251                // initial expression as the AST representation.
8252                if self.peek().value.eq_ignore_ascii_case("FOR") {
8253                    let mut depth = 1_i32;
8254                    while depth > 0 && !matches!(self.peek_type(), TokenType::Eof) {
8255                        match self.peek_type() {
8256                            TokenType::LBracket | TokenType::LParen => depth += 1,
8257                            TokenType::RBracket => {
8258                                depth -= 1;
8259                                if depth == 0 {
8260                                    break;
8261                                }
8262                            }
8263                            TokenType::RParen => depth -= 1,
8264                            _ => {}
8265                        }
8266                        self.advance();
8267                    }
8268                }
8269                self.expect(TokenType::RBracket)?;
8270                Ok(Expr::ArrayLiteral(items))
8271            }
8272
8273            // ── Identifier: column ref, function call, or qualified name ─
8274            _ if self.is_name_token() || self.is_data_type_token() => {
8275                let name_token = self.advance().clone();
8276                let name = name_token.value.clone();
8277                let name_qs = quote_style_from_char(name_token.quote_char);
8278
8279                // ── ANSI typed string literals: DATE 'x', TIMESTAMP 'x', TIME 'x' ──
8280                if matches!(
8281                    name_token.token_type,
8282                    TokenType::Date
8283                        | TokenType::Timestamp
8284                        | TokenType::TimestampTz
8285                        | TokenType::Time
8286                ) {
8287                    // PG / ANSI `TIMESTAMP [WITH [LOCAL] TIME ZONE] 'lit'`
8288                    // and `TIMESTAMP WITHOUT TIME ZONE 'lit'`. Swallow the
8289                    // optional timezone modifier so the string literal
8290                    // attaches to the right typed-literal form.
8291                    let mut explicit_tz: Option<bool> = None;
8292                    if matches!(
8293                        name_token.token_type,
8294                        TokenType::Timestamp | TokenType::Time
8295                    ) && self.peek_type() == &TokenType::With
8296                    {
8297                        let saved = self.pos;
8298                        self.advance(); // WITH
8299                        let _ = self.match_keyword("LOCAL");
8300                        if self.check_keyword("TIME")
8301                            && self
8302                                .peek_offset(1)
8303                                .map(|t| t.value.eq_ignore_ascii_case("ZONE"))
8304                                .unwrap_or(false)
8305                        {
8306                            self.advance(); // TIME
8307                            self.advance(); // ZONE
8308                            explicit_tz = Some(true);
8309                        } else {
8310                            self.pos = saved;
8311                        }
8312                    } else if matches!(
8313                        name_token.token_type,
8314                        TokenType::Timestamp | TokenType::Time
8315                    ) && self.check_keyword("WITHOUT")
8316                    {
8317                        let saved = self.pos;
8318                        self.advance(); // WITHOUT
8319                        if self.check_keyword("TIME")
8320                            && self
8321                                .peek_offset(1)
8322                                .map(|t| t.value.eq_ignore_ascii_case("ZONE"))
8323                                .unwrap_or(false)
8324                        {
8325                            self.advance();
8326                            self.advance();
8327                            explicit_tz = Some(false);
8328                        } else {
8329                            self.pos = saved;
8330                        }
8331                    }
8332
8333                    if self.peek_type() == &TokenType::String {
8334                        let value_token = self.advance().clone();
8335                        let data_type = match name_token.token_type {
8336                            TokenType::Date => DataType::Date,
8337                            TokenType::Timestamp => DataType::Timestamp {
8338                                precision: None,
8339                                with_tz: explicit_tz.unwrap_or(false),
8340                            },
8341                            TokenType::TimestampTz => DataType::Timestamp {
8342                                precision: None,
8343                                with_tz: true,
8344                            },
8345                            TokenType::Time => DataType::Time { precision: None },
8346                            _ => unreachable!(),
8347                        };
8348                        return Ok(Expr::Cast {
8349                            expr: Box::new(Expr::StringLiteral(value_token.value)),
8350                            data_type,
8351                        });
8352                    }
8353                }
8354
8355                // ── ANSI / PG generic typed string literal: `TYPE 'lit'` ──
8356                // (e.g. `bool 'true'`, `int4 '42'`, `varchar 'x'`). When the
8357                // current token is a data-type keyword (not already handled
8358                // above) and a String literal follows, fold the pair into a
8359                // Cast so the surrounding expression parses.
8360                if self.is_data_type_token_kind(&name_token.token_type)
8361                    && self.peek_type() == &TokenType::String
8362                {
8363                    let value_token = self.advance().clone();
8364                    let data_type = match name_token.token_type {
8365                        TokenType::Boolean => DataType::Boolean,
8366                        TokenType::Int | TokenType::Integer => DataType::Int,
8367                        TokenType::BigInt => DataType::BigInt,
8368                        TokenType::SmallInt => DataType::SmallInt,
8369                        TokenType::TinyInt => DataType::TinyInt,
8370                        TokenType::Float => DataType::Float,
8371                        TokenType::Double => DataType::Double,
8372                        TokenType::Real => DataType::Real,
8373                        TokenType::Decimal => DataType::Decimal {
8374                            precision: None,
8375                            scale: None,
8376                        },
8377                        TokenType::Numeric => DataType::Numeric {
8378                            precision: None,
8379                            scale: None,
8380                        },
8381                        TokenType::Varchar => DataType::Varchar(None),
8382                        TokenType::Char => DataType::Char(None),
8383                        TokenType::Text => DataType::Text,
8384                        TokenType::Json => DataType::Json,
8385                        TokenType::Jsonb => DataType::Jsonb,
8386                        TokenType::Uuid => DataType::Uuid,
8387                        TokenType::Bytea => DataType::Bytea,
8388                        TokenType::Blob => DataType::Blob,
8389                        _ => DataType::Unknown(name.clone()),
8390                    };
8391                    return Ok(Expr::Cast {
8392                        expr: Box::new(Expr::StringLiteral(value_token.value)),
8393                        data_type,
8394                    });
8395                }
8396
8397                // PostgreSQL geometric / network / OID type aliases used as
8398                // typed-literal prefixes (e.g. `box '(1,2,3,4)'`,
8399                // `point '(1,2)'`, `inet '127.0.0.1'`). Recognize a curated
8400                // list of bare identifiers followed by a String literal and
8401                // fold the pair into a Cast(Unknown(name)).
8402                if name_qs == QuoteStyle::None
8403                    && self.peek_type() == &TokenType::String
8404                    && matches!(
8405                        name.to_ascii_lowercase().as_str(),
8406                        "box"
8407                            | "point"
8408                            | "circle"
8409                            | "line"
8410                            | "lseg"
8411                            | "path"
8412                            | "polygon"
8413                            | "inet"
8414                            | "cidr"
8415                            | "macaddr"
8416                            | "macaddr8"
8417                            | "money"
8418                            | "regclass"
8419                            | "regtype"
8420                            | "regproc"
8421                            | "regprocedure"
8422                            | "regrole"
8423                            | "regnamespace"
8424                            | "regoperator"
8425                            | "regoper"
8426                            | "oid"
8427                            | "xml"
8428                            | "tsvector"
8429                            | "tsquery"
8430                            | "jsonpath"
8431                            | "name"
8432                            | "bit"
8433                            | "varbit"
8434                            | "interval"
8435                            | "bool"
8436                            | "int2"
8437                            | "int4"
8438                            | "int8"
8439                            | "float4"
8440                            | "float8"
8441                    )
8442                {
8443                    let value_token = self.advance().clone();
8444                    return Ok(Expr::Cast {
8445                        expr: Box::new(Expr::StringLiteral(value_token.value)),
8446                        data_type: DataType::Unknown(name.clone()),
8447                    });
8448                }
8449
8450                // ── Bare niladic temporal keywords: CURRENT_TIME, CURRENT_DATE,
8451                //    CURRENT_TIMESTAMP, LOCALTIMESTAMP (no parens) ──
8452                // ANSI SQL allows these without parentheses. Materialize them
8453                // as typed functions so the generator can emit dialect-specific
8454                // forms (e.g. TSQL requires CAST(GETDATE() AS TIME) rather than
8455                // a bare CURRENT_TIME reserved word).
8456                if name_qs == QuoteStyle::None && self.peek_type() != &TokenType::LParen {
8457                    let upper = name.to_ascii_uppercase();
8458                    let typed = match upper.as_str() {
8459                        "CURRENT_DATE" => Some(TypedFunction::CurrentDate),
8460                        "CURRENT_TIME" => Some(TypedFunction::CurrentTime),
8461                        "CURRENT_TIMESTAMP" | "LOCALTIMESTAMP" => {
8462                            Some(TypedFunction::CurrentTimestamp)
8463                        }
8464                        _ => None,
8465                    };
8466                    if let Some(tf) = typed {
8467                        return Ok(Expr::TypedFunction {
8468                            func: tf,
8469                            filter: None,
8470                            over: None,
8471                        });
8472                    }
8473                }
8474
8475                // Function call: name(...)
8476                if self.peek_type() == &TokenType::LParen {
8477                    self.advance();
8478
8479                    // TRY_CAST / SAFE_CAST / TRY_TO_TIMESTAMP / … — same shape
8480                    // as `CAST(expr AS type)`. Lower to `Expr::Cast` when the
8481                    // body matches; fall back to ordinary function call when
8482                    // it does not (e.g. comma-separated args).
8483                    if matches!(
8484                        name.to_ascii_uppercase().as_str(),
8485                        "TRY_CAST" | "SAFE_CAST"
8486                    ) {
8487                        let save = self.pos;
8488                        let inner = self.parse_expr()?;
8489                        if self.match_token(TokenType::As) {
8490                            let dt = self.parse_data_type()?;
8491                            self.expect(TokenType::RParen)?;
8492                            return Ok(Expr::Cast {
8493                                expr: Box::new(inner),
8494                                data_type: dt,
8495                            });
8496                        }
8497                        self.pos = save;
8498                    }
8499
8500                    // Special: COUNT(*), COUNT(DISTINCT x)
8501                    let distinct = self.match_token(TokenType::Distinct);
8502                    // ANSI / ClickHouse `agg(ALL …)` — `ALL` is the opposite
8503                    // of DISTINCT and the default. Swallow so the args parse.
8504                    if !distinct {
8505                        let _ = self.match_token(TokenType::All);
8506                    }
8507
8508                    // Standard SQL syntactic forms for string functions:
8509                    //   SUBSTRING(expr FROM start [FOR len])
8510                    //   SUBSTRING(expr FOR len)
8511                    //   TRIM([LEADING|TRAILING|BOTH] [chars] FROM expr)
8512                    //   POSITION(needle IN haystack)
8513                    //   OVERLAY(expr PLACING str FROM start [FOR len])
8514                    let upper_name = name.to_ascii_uppercase();
8515                    if !distinct && self.peek_type() != &TokenType::RParen {
8516                        match upper_name.as_str() {
8517                            "SUBSTRING" | "SUBSTR" => {
8518                                let saved = self.pos;
8519                                let first = self.parse_expr()?;
8520                                if self.match_token(TokenType::From) {
8521                                    let start = self.parse_expr()?;
8522                                    let length = if self.check_keyword("FOR") {
8523                                        self.advance();
8524                                        Some(self.parse_expr()?)
8525                                    } else {
8526                                        None
8527                                    };
8528                                    self.expect(TokenType::RParen)?;
8529                                    let mut a = vec![first, start];
8530                                    if let Some(l) = length {
8531                                        a.push(l);
8532                                    }
8533                                    return Ok(Expr::Function {
8534                                        name: name.clone(),
8535                                        args: a,
8536                                        distinct: false,
8537                                        filter: None,
8538                                        over: None,
8539                                        order_by: Vec::new(),
8540                                        within_group: false,
8541                                    });
8542                                } else if self.check_keyword("FOR") {
8543                                    self.advance();
8544                                    let len = self.parse_expr()?;
8545                                    self.expect(TokenType::RParen)?;
8546                                    return Ok(Expr::Function {
8547                                        name: name.clone(),
8548                                        args: vec![first, len],
8549                                        distinct: false,
8550                                        filter: None,
8551                                        over: None,
8552                                        order_by: Vec::new(),
8553                                        within_group: false,
8554                                    });
8555                                }
8556                                self.pos = saved;
8557                            }
8558                            "TRIM" => {
8559                                let saved = self.pos;
8560                                if self.check_keyword("LEADING")
8561                                    || self.check_keyword("TRAILING")
8562                                    || self.check_keyword("BOTH")
8563                                {
8564                                    self.advance();
8565                                }
8566                                if self.peek_type() == &TokenType::From {
8567                                    self.advance();
8568                                    let expr = self.parse_expr()?;
8569                                    self.expect(TokenType::RParen)?;
8570                                    return Ok(Expr::Function {
8571                                        name: name.clone(),
8572                                        args: vec![expr],
8573                                        distinct: false,
8574                                        filter: None,
8575                                        over: None,
8576                                        order_by: Vec::new(),
8577                                        within_group: false,
8578                                    });
8579                                }
8580                                let chars = self.parse_expr()?;
8581                                if self.match_token(TokenType::From) {
8582                                    let expr = self.parse_expr()?;
8583                                    self.expect(TokenType::RParen)?;
8584                                    return Ok(Expr::Function {
8585                                        name: name.clone(),
8586                                        args: vec![expr, chars],
8587                                        distinct: false,
8588                                        filter: None,
8589                                        over: None,
8590                                        order_by: Vec::new(),
8591                                        within_group: false,
8592                                    });
8593                                }
8594                                self.pos = saved;
8595                            }
8596                            "POSITION" => {
8597                                let saved = self.pos;
8598                                let needle = self.parse_expr()?;
8599                                if self.match_token(TokenType::In) {
8600                                    let haystack = self.parse_expr()?;
8601                                    self.expect(TokenType::RParen)?;
8602                                    return Ok(Expr::Function {
8603                                        name: name.clone(),
8604                                        args: vec![needle, haystack],
8605                                        distinct: false,
8606                                        filter: None,
8607                                        over: None,
8608                                        order_by: Vec::new(),
8609                                        within_group: false,
8610                                    });
8611                                }
8612                                self.pos = saved;
8613                            }
8614                            "OVERLAY" => {
8615                                let saved = self.pos;
8616                                let target = self.parse_expr()?;
8617                                if self.check_keyword("PLACING") {
8618                                    self.advance();
8619                                    let placing = self.parse_expr()?;
8620                                    if self.match_token(TokenType::From) {
8621                                        let from = self.parse_expr()?;
8622                                        let len = if self.check_keyword("FOR") {
8623                                            self.advance();
8624                                            Some(self.parse_expr()?)
8625                                        } else {
8626                                            None
8627                                        };
8628                                        self.expect(TokenType::RParen)?;
8629                                        let mut a = vec![target, placing, from];
8630                                        if let Some(l) = len {
8631                                            a.push(l);
8632                                        }
8633                                        return Ok(Expr::Function {
8634                                            name: name.clone(),
8635                                            args: a,
8636                                            distinct: false,
8637                                            filter: None,
8638                                            over: None,
8639                                            order_by: Vec::new(),
8640                                            within_group: false,
8641                                        });
8642                                    }
8643                                }
8644                                self.pos = saved;
8645                            }
8646                            _ => {}
8647                        }
8648                    }
8649
8650                    // MySQL's GROUP_CONCAT has bespoke grammar
8651                    // (ORDER BY ..., SEPARATOR ...) — parse it into a typed
8652                    // expression so the structure is preserved across dialects.
8653                    if name.eq_ignore_ascii_case("GROUP_CONCAT") {
8654                        let expr = self.parse_group_concat_call(distinct)?;
8655                        self.expect(TokenType::RParen)?;
8656                        return Ok(expr);
8657                    }
8658
8659                    let args = if self.peek_type() == &TokenType::RParen {
8660                        vec![]
8661                    } else if self.peek_type() == &TokenType::Star {
8662                        self.advance();
8663                        vec![Expr::Wildcard]
8664                    } else {
8665                        let mut a = vec![self.parse_function_arg()?];
8666                        while self.match_token(TokenType::Comma) {
8667                            a.push(self.parse_function_arg()?);
8668                        }
8669                        a
8670                    };
8671
8672                    // Optional aggregate ORDER BY inside arg list (Postgres / Spark):
8673                    //   array_agg(x ORDER BY y DESC)
8674                    //   string_agg(x, ',' ORDER BY y)
8675                    let mut agg_order_by: Vec<OrderByItem> = vec![];
8676                    if self.peek_type() == &TokenType::Order {
8677                        self.advance();
8678                        self.expect(TokenType::By)?;
8679                        agg_order_by = self.parse_order_by_items()?;
8680                    }
8681                    // BigQuery / Snowflake: `ARRAY_AGG(x [ORDER BY y] LIMIT n)`.
8682                    // Swallow the trailing LIMIT clause inside the function call.
8683                    if self.peek_type() == &TokenType::Limit {
8684                        self.advance();
8685                        let _ = self.parse_expr();
8686                    }
8687                    // DuckDB aggregate-state modifier:
8688                    //   `count(1) EXPORT_STATE` returns the aggregate state
8689                    //   rather than its final value. We don't model it.
8690                    if self.check_keyword("EXPORT_STATE") {
8691                        self.advance();
8692                    }
8693                    self.expect(TokenType::RParen)?;
8694
8695                    // Optional WITHIN GROUP (ORDER BY ...) — ordered-set aggregates
8696                    //   percentile_cont(0.5) WITHIN GROUP (ORDER BY x)
8697                    //   listagg(x, ',') WITHIN GROUP (ORDER BY x)
8698                    let mut within_group = false;
8699                    let mut wg_order_by: Vec<OrderByItem> = vec![];
8700                    if self.check_keyword("WITHIN") {
8701                        self.advance();
8702                        self.expect_keyword("GROUP")?;
8703                        self.expect(TokenType::LParen)?;
8704                        self.expect(TokenType::Order)?;
8705                        self.expect(TokenType::By)?;
8706                        wg_order_by = self.parse_order_by_items()?;
8707                        self.expect(TokenType::RParen)?;
8708                        within_group = true;
8709                    }
8710
8711                    let final_order_by = if within_group { wg_order_by } else { agg_order_by };
8712
8713                    // Try to construct a typed function variant only when there are no
8714                    // aggregate-specific clauses (otherwise we lose them).
8715                    if final_order_by.is_empty()
8716                        && !within_group
8717                        && let Some(typed) = Self::try_typed_function(&name, args.clone(), distinct)
8718                    {
8719                        return Ok(typed);
8720                    }
8721
8722                    Ok(Expr::Function {
8723                        name,
8724                        args,
8725                        distinct,
8726                        filter: None,
8727                        over: None,
8728                        order_by: final_order_by,
8729                        within_group,
8730                    })
8731                }
8732                // Qualified column: table.column or table.*
8733                else if self.match_token(TokenType::Dot) {
8734                    if self.peek_type() == &TokenType::Star {
8735                        self.advance();
8736                        Ok(Expr::QualifiedWildcard { table: name })
8737                    } else {
8738                        // ClickHouse JSON subobject and typed access at the
8739                        // first dot: `json.^a`, `json.:Int64`.
8740                        let _ = self.match_token(TokenType::BitwiseXor);
8741                        let _ = self.match_token(TokenType::Colon);
8742                        let (mut col, mut col_qs) = if matches!(
8743                            self.peek_type(),
8744                            TokenType::Number
8745                        ) {
8746                            // ClickHouse tuple index `x.1`.
8747                            let v = self.peek().value.clone();
8748                            self.advance();
8749                            (v, QuoteStyle::None)
8750                        } else if matches!(self.peek_type(), TokenType::Null) {
8751                            // ClickHouse JSON subcolumn `.null` (e.g.
8752                            // `arr.null`, `t.s.null`). Accept the keyword as
8753                            // a field name in dotted-access position.
8754                            let v = self.peek().value.clone();
8755                            self.advance();
8756                            (v, QuoteStyle::None)
8757                        } else {
8758                            self.expect_name_with_quote()?
8759                        };
8760                        // Handle 3+ part qualified names like `db.schema.table.column`
8761                        // (DuckDB, ClickHouse). We collapse everything except the
8762                        // final segment into the `table` field as a dotted string.
8763                        let mut table = name;
8764                        let mut table_qs = name_qs;
8765                        while self.match_token(TokenType::Dot) {
8766                            if self.peek_type() == &TokenType::Star {
8767                                self.advance();
8768                                let mut full = table;
8769                                full.push('.');
8770                                full.push_str(&col);
8771                                return Ok(Expr::QualifiedWildcard { table: full });
8772                            }
8773                            // ClickHouse JSON subobject (`json.^a`) and typed
8774                            // access (`json.a.:Int64`) — swallow the operator
8775                            // so the following name can be consumed normally.
8776                            let _ = self.match_token(TokenType::BitwiseXor);
8777                            let _ = self.match_token(TokenType::Colon);
8778                            // ClickHouse tuple index (`t.1`): treat number as
8779                            // a synthetic field name.
8780                            let (next_col, next_qs) = if matches!(
8781                                self.peek_type(),
8782                                TokenType::Number
8783                            ) {
8784                                let v = self.peek().value.clone();
8785                                self.advance();
8786                                (v, QuoteStyle::None)
8787                            } else if matches!(self.peek_type(), TokenType::Null) {
8788                                let v = self.peek().value.clone();
8789                                self.advance();
8790                                (v, QuoteStyle::None)
8791                            } else {
8792                                self.expect_name_with_quote()?
8793                            };
8794                            table.push('.');
8795                            table.push_str(&col);
8796                            table_qs = col_qs;
8797                            col = next_col;
8798                            col_qs = next_qs;
8799                        }
8800                        // Function call on dotted name: db.schema.func(args).
8801                        if self.peek_type() == &TokenType::LParen {
8802                            self.advance();
8803                            let mut full = table;
8804                            full.push('.');
8805                            full.push_str(&col);
8806                            let args = if self.peek_type() != &TokenType::RParen {
8807                                let mut a = vec![self.parse_function_arg()?];
8808                                while self.match_token(TokenType::Comma) {
8809                                    a.push(self.parse_function_arg()?);
8810                                }
8811                                a
8812                            } else {
8813                                vec![]
8814                            };
8815                            self.expect(TokenType::RParen)?;
8816                            return Ok(Expr::Function {
8817                                name: full,
8818                                args,
8819                                distinct: false,
8820                                filter: None,
8821                                over: None,
8822                                order_by: Vec::new(),
8823                                within_group: false,
8824                            });
8825                        }
8826                        Ok(Expr::Column {
8827                            table: Some(table),
8828                            name: col,
8829                            quote_style: col_qs,
8830                            table_quote_style: table_qs,
8831                        })
8832                    }
8833                } else {
8834                    Ok(Expr::Column {
8835                        table: None,
8836                        name,
8837                        quote_style: name_qs,
8838                        table_quote_style: QuoteStyle::None,
8839                    })
8840                }
8841            }
8842
8843            _ => {
8844                // Fallback: any other token whose value is a valid identifier
8845                // and is immediately followed by `(` is treated as a function
8846                // call. This handles reserved keywords used as Spark/Hive
8847                // built-ins (IF, ALL, ANY, EXISTS, MOD, etc.) and dialect
8848                // functions that happen to collide with token types.
8849                let v = token.value.clone();
8850                let is_word = !v.is_empty()
8851                    && v.chars()
8852                        .all(|c| c.is_ascii_alphanumeric() || c == '_');
8853                if is_word
8854                    && matches!(
8855                        self.peek_offset(1).map(|t| &t.token_type),
8856                        Some(TokenType::LParen)
8857                    )
8858                {
8859                    // TRY_CAST / SAFE_CAST / TRY_TO_TIMESTAMP / … — same
8860                    // shape as `CAST(expr AS type)`. Lower to `Expr::Cast`
8861                    // (or back to a function call when the form doesn't
8862                    // match).
8863                    let upper = v.to_ascii_uppercase();
8864                    if matches!(
8865                        upper.as_str(),
8866                        "TRY_CAST" | "SAFE_CAST"
8867                    ) {
8868                        self.advance();
8869                        self.advance(); // consume '('
8870                        let inner = self.parse_expr()?;
8871                        if self.match_token(TokenType::As) {
8872                            let data_type = self.parse_data_type()?;
8873                            self.expect(TokenType::RParen)?;
8874                            return Ok(Expr::Cast {
8875                                expr: Box::new(inner),
8876                                data_type,
8877                            });
8878                        }
8879                        // Fall back: treat as ordinary function call.
8880                        let mut args = vec![inner];
8881                        while self.match_token(TokenType::Comma) {
8882                            args.push(self.parse_expr()?);
8883                        }
8884                        self.expect(TokenType::RParen)?;
8885                        return Ok(Expr::Function {
8886                            name: v,
8887                            args,
8888                            distinct: false,
8889                            filter: None,
8890                            over: None,
8891                            order_by: Vec::new(),
8892                            within_group: false,
8893                        });
8894                    }
8895                    self.advance();
8896                    self.advance(); // consume '('
8897                    let upper = v.to_ascii_uppercase();
8898                    // Standard SQL `SUBSTRING(expr FROM start [FOR length])`
8899                    // and MySQL `SUBSTRING(expr FROM start)` / `…FOR length`.
8900                    if matches!(upper.as_str(), "SUBSTRING" | "SUBSTR")
8901                        && self.peek_type() != &TokenType::RParen
8902                    {
8903                        let saved = self.pos;
8904                        let first = self.parse_expr()?;
8905                        if self.match_token(TokenType::From) {
8906                            let start = self.parse_expr()?;
8907                            let length = if self.check_keyword("FOR") {
8908                                self.advance();
8909                                Some(self.parse_expr()?)
8910                            } else {
8911                                None
8912                            };
8913                            self.expect(TokenType::RParen)?;
8914                            let mut args = vec![first, start];
8915                            if let Some(len) = length {
8916                                args.push(len);
8917                            }
8918                            return Ok(Expr::Function {
8919                                name: v,
8920                                args,
8921                                distinct: false,
8922                                filter: None,
8923                                over: None,
8924                                order_by: Vec::new(),
8925                                within_group: false,
8926                            });
8927                        }
8928                        if self.check_keyword("FOR") {
8929                            self.advance();
8930                            let length = self.parse_expr()?;
8931                            self.expect(TokenType::RParen)?;
8932                            return Ok(Expr::Function {
8933                                name: v,
8934                                args: vec![first, length],
8935                                distinct: false,
8936                                filter: None,
8937                                over: None,
8938                                order_by: Vec::new(),
8939                                within_group: false,
8940                            });
8941                        }
8942                        // Fall back: re-parse as comma list.
8943                        self.pos = saved;
8944                    }
8945                    // Standard `TRIM([LEADING|TRAILING|BOTH] [chars] FROM expr)`
8946                    // and `TRIM(expr [, chars])` (already covered by comma).
8947                    if upper == "TRIM" && self.peek_type() != &TokenType::RParen {
8948                        let saved = self.pos;
8949                        if self.check_keyword("LEADING")
8950                            || self.check_keyword("TRAILING")
8951                            || self.check_keyword("BOTH")
8952                        {
8953                            self.advance();
8954                        }
8955                        if self.peek_type() == &TokenType::From {
8956                            self.advance();
8957                            let expr = self.parse_expr()?;
8958                            self.expect(TokenType::RParen)?;
8959                            return Ok(Expr::Function {
8960                                name: v,
8961                                args: vec![expr],
8962                                distinct: false,
8963                                filter: None,
8964                                over: None,
8965                                order_by: Vec::new(),
8966                                within_group: false,
8967                            });
8968                        }
8969                        // chars FROM expr
8970                        let chars = self.parse_expr()?;
8971                        if self.match_token(TokenType::From) {
8972                            let expr = self.parse_expr()?;
8973                            self.expect(TokenType::RParen)?;
8974                            return Ok(Expr::Function {
8975                                name: v,
8976                                args: vec![expr, chars],
8977                                distinct: false,
8978                                filter: None,
8979                                over: None,
8980                                order_by: Vec::new(),
8981                                within_group: false,
8982                            });
8983                        }
8984                        // Plain comma list — fall back.
8985                        self.pos = saved;
8986                    }
8987                    // Standard `OVERLAY(expr PLACING str FROM start [FOR len])`.
8988                    if upper == "OVERLAY" && self.peek_type() != &TokenType::RParen {
8989                        let saved = self.pos;
8990                        let target = self.parse_expr()?;
8991                        if self.check_keyword("PLACING") {
8992                            self.advance();
8993                            let placing = self.parse_expr()?;
8994                            self.expect(TokenType::From)?;
8995                            let from = self.parse_expr()?;
8996                            let len = if self.check_keyword("FOR") {
8997                                self.advance();
8998                                Some(self.parse_expr()?)
8999                            } else {
9000                                None
9001                            };
9002                            self.expect(TokenType::RParen)?;
9003                            let mut args = vec![target, placing, from];
9004                            if let Some(l) = len {
9005                                args.push(l);
9006                            }
9007                            return Ok(Expr::Function {
9008                                name: v,
9009                                args,
9010                                distinct: false,
9011                                filter: None,
9012                                over: None,
9013                                order_by: Vec::new(),
9014                                within_group: false,
9015                            });
9016                        }
9017                        self.pos = saved;
9018                    }
9019                    // Standard `POSITION(needle IN haystack)`.
9020                    if upper == "POSITION" && self.peek_type() != &TokenType::RParen {
9021                        let saved = self.pos;
9022                        let needle = self.parse_expr()?;
9023                        if self.check_keyword("IN") {
9024                            self.advance();
9025                            let haystack = self.parse_expr()?;
9026                            self.expect(TokenType::RParen)?;
9027                            return Ok(Expr::Function {
9028                                name: v,
9029                                args: vec![needle, haystack],
9030                                distinct: false,
9031                                filter: None,
9032                                over: None,
9033                                order_by: Vec::new(),
9034                                within_group: false,
9035                            });
9036                        }
9037                        self.pos = saved;
9038                    }
9039                    let mut args = Vec::new();
9040                    if self.peek_type() != &TokenType::RParen {
9041                        args.push(self.parse_function_arg()?);
9042                        while self.match_token(TokenType::Comma) {
9043                            args.push(self.parse_function_arg()?);
9044                        }
9045                    }
9046                    self.expect(TokenType::RParen)?;
9047                    return Ok(Expr::Function {
9048                        name: v,
9049                        args,
9050                        distinct: false,
9051                        filter: None,
9052                        over: None,
9053                        order_by: Vec::new(),
9054                        within_group: false,
9055                    });
9056                }
9057                Err(SqlglotError::UnexpectedToken { token })
9058            }
9059        }
9060    }
9061
9062    /// Parse a single function-call argument. Accepts the DuckDB / PostgreSQL
9063    /// named-argument syntaxes `name := value` and `name => value` and falls
9064    /// back to a plain expression for positional arguments. The argument
9065    /// name is discarded — we don't model it in the AST.
9066    fn parse_function_arg(&mut self) -> Result<Expr> {
9067        // Hive table-valued function clause: `noop(on tbl partition by p
9068        // order by q distribute by r cluster by s sort by t)`. The arg
9069        // list begins with the `ON` keyword and is followed by a series
9070        // of windowing-style clauses we don't model. Swallow it as an
9071        // opaque payload so we don't reject the call.
9072        if matches!(self.peek_type(), TokenType::On) {
9073            let mut depth = 0usize;
9074            while !matches!(self.peek_type(), TokenType::Eof) {
9075                match self.peek_type() {
9076                    TokenType::LParen => depth += 1,
9077                    TokenType::RParen => {
9078                        if depth == 0 {
9079                            break;
9080                        }
9081                        depth -= 1;
9082                    }
9083                    TokenType::Comma if depth == 0 => break,
9084                    _ => {}
9085                }
9086                self.advance();
9087            }
9088            return Ok(Expr::Null);
9089        }
9090        if self.is_name_token() || self.is_data_type_token() || matches!(self.peek_type(), TokenType::Recursive) {
9091            let next = self.peek_offset(1).map(|t| &t.token_type);
9092            if matches!(next, Some(TokenType::Colon)) {
9093                let after = self.peek_offset(2).map(|t| &t.token_type);
9094                if matches!(after, Some(TokenType::Eq)) {
9095                    self.advance();
9096                    self.advance();
9097                    self.advance();
9098                    return self.parse_expr();
9099                }
9100            }
9101            if matches!(next, Some(TokenType::DoubleArrow)) {
9102                self.advance();
9103                self.advance();
9104                return self.parse_expr();
9105            }
9106        }
9107        // ClickHouse table functions: `view(SELECT …)`, `cluster(…)` etc.
9108        // accept a full SELECT / WITH / UNION inside the arg list. Parse
9109        // it as a Subquery so the surrounding call closes properly.
9110        if matches!(self.peek_type(), TokenType::Select | TokenType::With) {
9111            let stmt = self.parse_statement_inner()?;
9112            return Ok(Expr::Subquery(Box::new(stmt)));
9113        }
9114        let mut expr = self.parse_expr()?;
9115        // Oracle / Snowflake / MySQL `JSON_OBJECT('k' : value, ...)` and the
9116        // `JSON_OBJECTAGG(k : v)` family use `:` as a key-value separator
9117        // inside function args. After parsing the first expression, swallow
9118        // a bare `:` and parse the value side; emit the value as the arg
9119        // (we don't model JSON key-value pairs in the AST). Only fire when
9120        // the next-after-colon is not another `:` (`::` cast) and not `=`
9121        // (`:=` named arg, already handled above).
9122        if matches!(self.peek_type(), TokenType::Colon)
9123            && !matches!(
9124                self.peek_offset(1).map(|t| &t.token_type),
9125                Some(TokenType::Colon) | Some(TokenType::Eq)
9126            )
9127        {
9128            self.advance(); // :
9129            expr = self.parse_expr()?;
9130            // Optional `FORMAT JSON` suffix (Oracle).
9131            if self.peek().value.eq_ignore_ascii_case("FORMAT")
9132                && self.peek_offset(1).map(|t| t.value.eq_ignore_ascii_case("JSON")).unwrap_or(false)
9133            {
9134                self.advance();
9135                self.advance();
9136            }
9137        }
9138        // ClickHouse: `func(expr AS alias)` — swallow the alias.
9139        if self.match_token(TokenType::As) && self.is_name_token() {
9140            self.advance();
9141        }
9142        // Spark / DataBricks UDTF call: `UDTF(TABLE(t) [PARTITION BY cols]
9143        // [ORDER BY cols])`. Swallow the table-argument modifiers opaquely.
9144        if self.peek_type() == &TokenType::Partition
9145            && self
9146                .peek_offset(1)
9147                .map(|t| matches!(t.token_type, TokenType::By))
9148                .unwrap_or(false)
9149        {
9150            self.advance(); // PARTITION
9151            self.advance(); // BY
9152            // Comma-separated expression list (column refs / exprs).
9153            let _ = self.parse_expr()?;
9154            while self.match_token(TokenType::Comma) {
9155                let _ = self.parse_expr()?;
9156            }
9157        }
9158        if self.peek_type() == &TokenType::Order
9159            && self
9160                .peek_offset(1)
9161                .map(|t| matches!(t.token_type, TokenType::By))
9162                .unwrap_or(false)
9163        {
9164            self.advance(); // ORDER
9165            self.advance(); // BY
9166            let _ = self.parse_order_by_items()?;
9167        }
9168        // BigQuery / DuckDB / Snowflake / Oracle window-function nulls
9169        // modifier: `LAST_VALUE(arg IGNORE NULLS)`, `... RESPECT NULLS`.
9170        // Swallow opaquely; we don't model it in the AST.
9171        if (self.peek().value.eq_ignore_ascii_case("IGNORE")
9172            || self.peek().value.eq_ignore_ascii_case("RESPECT"))
9173            && self
9174                .peek_offset(1)
9175                .map(|t| t.token_type == TokenType::Null || t.value.eq_ignore_ascii_case("NULLS"))
9176                .unwrap_or(false)
9177        {
9178            self.advance();
9179            self.advance();
9180        }
9181        // Postgres JSON helpers: `JSON_SERIALIZE(expr RETURNING type)`,
9182        // `JSON_QUERY(... RETURNING jsonb FORMAT JSON)`,
9183        // `JSON_VALUE(... RETURNING type DEFAULT v ON EMPTY|ERROR …)`. After
9184        // any RETURNING clause, swallow the optional FORMAT, DEFAULT, ON
9185        // EMPTY/ERROR tail so the call parses cleanly.
9186        if self.match_token(TokenType::Returning) {
9187            if self.is_data_type_token() || self.is_name_token() {
9188                let _ = self.parse_data_type();
9189            }
9190        }
9191        // SQL/JSON `PASSING v AS name [, v AS name]*` clause inside
9192        // JSON_EXISTS / JSON_VALUE / JSON_QUERY argument lists.
9193        if self.check_keyword("PASSING") {
9194            self.advance();
9195            loop {
9196                let _ = self.parse_expr()?;
9197                if self.match_token(TokenType::As) && self.is_name_token() {
9198                    self.advance();
9199                }
9200                if !self.match_token(TokenType::Comma) {
9201                    break;
9202                }
9203            }
9204        }
9205        // SQL/JSON behavior clauses: `NULL|ERROR|EMPTY [ARRAY|OBJECT]|
9206        // DEFAULT expr ON EMPTY|ERROR`. Swallow them opaquely; the
9207        // surrounding call still resolves to its primary expression.
9208        loop {
9209            let is_default = self.peek_type() == &TokenType::Default;
9210            let is_behavior_kw = self.check_keyword("ERROR")
9211                || self.check_keyword("NULL")
9212                || self.peek_type() == &TokenType::Null
9213                || self.check_keyword("EMPTY")
9214                || self.check_keyword("TRUE")
9215                || self.check_keyword("FALSE")
9216                || self.check_keyword("UNKNOWN");
9217            if !is_default && !is_behavior_kw {
9218                break;
9219            }
9220            // Look ahead: behavior keyword must be followed (possibly via
9221            // optional ARRAY/OBJECT/expr) by `ON ERROR|EMPTY` to qualify.
9222            let saved = self.pos;
9223            if is_default {
9224                self.advance();
9225                let _ = self.parse_expr();
9226            } else {
9227                self.advance();
9228                if self.check_keyword("ARRAY") || self.check_keyword("OBJECT") {
9229                    self.advance();
9230                }
9231            }
9232            if self.peek_type() == &TokenType::On
9233                && self
9234                    .peek_offset(1)
9235                    .map(|t| t.value.eq_ignore_ascii_case("ERROR")
9236                        || t.value.eq_ignore_ascii_case("EMPTY"))
9237                    .unwrap_or(false)
9238            {
9239                self.advance(); // ON
9240                self.advance(); // ERROR / EMPTY
9241            } else {
9242                // Not actually a behavior clause — rewind.
9243                self.pos = saved;
9244                break;
9245            }
9246        }
9247        // MySQL `CONVERT(expr USING charset)` — swallow USING + name.
9248        if self.match_token(TokenType::Using) {
9249            if self.is_name_token() {
9250                self.advance();
9251            }
9252        }
9253        // ON EMPTY / ON ERROR / DEFAULT … ON EMPTY|ERROR / FORMAT … —
9254        // tolerated tail clauses common to JSON_VALUE / JSON_QUERY /
9255        // JSON_EXISTS. Loop while one of the recognized starters appears.
9256        loop {
9257            let starts = self.peek_type() == &TokenType::Default
9258                || self.match_keyword_clone("FORMAT")
9259                || (self.peek_type() == &TokenType::On
9260                    && self
9261                        .peek_offset(1)
9262                        .map(|t| {
9263                            t.value.eq_ignore_ascii_case("EMPTY")
9264                                || t.value.eq_ignore_ascii_case("ERROR")
9265                        })
9266                        .unwrap_or(false));
9267            if !starts {
9268                break;
9269            }
9270            // Consume up to the next top-level `,` / `)` / EOF, tracking
9271            // nesting so embedded parens (e.g. `DEFAULT ('C' COLLATE "C")`)
9272            // don't terminate prematurely.
9273            let mut depth = 0i32;
9274            while !matches!(self.peek_type(), TokenType::Eof) {
9275                match self.peek_type() {
9276                    TokenType::LParen | TokenType::LBracket => depth += 1,
9277                    TokenType::RParen | TokenType::RBracket => {
9278                        if depth == 0 {
9279                            break;
9280                        }
9281                        depth -= 1;
9282                    }
9283                    TokenType::Comma if depth == 0 => break,
9284                    _ => {}
9285                }
9286                self.advance();
9287            }
9288        }
9289        Ok(expr)
9290    }
9291
9292    /// True when the current token is a name token whose uppercase value
9293    /// equals `kw`. Does NOT advance the token cursor.
9294    fn match_keyword_clone(&self, kw: &str) -> bool {
9295        self.check_keyword(kw)
9296    }
9297
9298    fn is_data_type_token(&self) -> bool {
9299        self.is_data_type_token_kind(self.peek_type())
9300    }
9301
9302    fn is_data_type_token_kind(&self, tt: &TokenType) -> bool {
9303        matches!(
9304            tt,
9305            TokenType::Int
9306                | TokenType::Integer
9307                | TokenType::BigInt
9308                | TokenType::SmallInt
9309                | TokenType::TinyInt
9310                | TokenType::Float
9311                | TokenType::Double
9312                | TokenType::Decimal
9313                | TokenType::Numeric
9314                | TokenType::Real
9315                | TokenType::Varchar
9316                | TokenType::Char
9317                | TokenType::Text
9318                | TokenType::Boolean
9319                | TokenType::Date
9320                | TokenType::Timestamp
9321                | TokenType::TimestampTz
9322                | TokenType::Time
9323                | TokenType::Interval
9324                | TokenType::Blob
9325                | TokenType::Bytea
9326                | TokenType::Json
9327                | TokenType::Jsonb
9328                | TokenType::Uuid
9329                | TokenType::Array
9330                | TokenType::Map
9331                | TokenType::Struct
9332        )
9333    }
9334
9335    fn parse_datetime_field(&mut self) -> Result<DateTimeField> {
9336        let token = self.peek().clone();
9337        let field = match &token.token_type {
9338            TokenType::Year => DateTimeField::Year,
9339            TokenType::Month => DateTimeField::Month,
9340            TokenType::Day => DateTimeField::Day,
9341            TokenType::Hour => DateTimeField::Hour,
9342            TokenType::Minute => DateTimeField::Minute,
9343            TokenType::Second => DateTimeField::Second,
9344            TokenType::Epoch => DateTimeField::Epoch,
9345            _ => {
9346                let name = token.value.to_uppercase();
9347                match name.as_str() {
9348                    "YEAR" => DateTimeField::Year,
9349                    "QUARTER" => DateTimeField::Quarter,
9350                    "MONTH" => DateTimeField::Month,
9351                    "WEEK" => DateTimeField::Week,
9352                    "DAY" => DateTimeField::Day,
9353                    "DOW" | "DAYOFWEEK" => DateTimeField::DayOfWeek,
9354                    "DOY" | "DAYOFYEAR" => DateTimeField::DayOfYear,
9355                    "HOUR" => DateTimeField::Hour,
9356                    "MINUTE" => DateTimeField::Minute,
9357                    "SECOND" => DateTimeField::Second,
9358                    "MILLISECOND" | "MILLISECONDS" | "MS" => DateTimeField::Millisecond,
9359                    "MICROSECOND" | "MICROSECONDS" | "US" => DateTimeField::Microsecond,
9360                    "NANOSECOND" | "NANOSECONDS" | "NS" => DateTimeField::Nanosecond,
9361                    "YEARS" => DateTimeField::Year,
9362                    "QUARTERS" => DateTimeField::Quarter,
9363                    "MONTHS" => DateTimeField::Month,
9364                    "WEEKS" => DateTimeField::Week,
9365                    "DAYS" => DateTimeField::Day,
9366                    "HOURS" => DateTimeField::Hour,
9367                    "MINUTES" => DateTimeField::Minute,
9368                    "SECONDS" => DateTimeField::Second,
9369                    "EPOCH" => DateTimeField::Epoch,
9370                    "TIMEZONE" => DateTimeField::Timezone,
9371                    "TIMEZONE_HOUR" => DateTimeField::TimezoneHour,
9372                    "TIMEZONE_MINUTE" => DateTimeField::TimezoneMinute,
9373                    // MySQL composite interval units. We don't model them
9374                    // distinctly; lower to the dominant component so the
9375                    // surrounding parse completes.
9376                    "DAY_HOUR" | "DAY_MINUTE" | "DAY_SECOND" | "DAY_MICROSECOND" => {
9377                        DateTimeField::Day
9378                    }
9379                    "HOUR_MINUTE" | "HOUR_SECOND" | "HOUR_MICROSECOND" => {
9380                        DateTimeField::Hour
9381                    }
9382                    "MINUTE_SECOND" | "MINUTE_MICROSECOND" => DateTimeField::Minute,
9383                    "SECOND_MICROSECOND" => DateTimeField::Second,
9384                    "YEAR_MONTH" => DateTimeField::Year,
9385                    _ => {
9386                        return Err(SqlglotError::ParserError {
9387                            message: format!("Unknown datetime field: {name}"),
9388                        });
9389                    }
9390                }
9391            }
9392        };
9393        self.advance();
9394        Ok(field)
9395    }
9396
9397    fn try_parse_datetime_field(&mut self) -> Option<DateTimeField> {
9398        let saved = self.pos;
9399        match self.parse_datetime_field() {
9400            Ok(field) => Some(field),
9401            Err(_) => {
9402                self.pos = saved;
9403                None
9404            }
9405        }
9406    }
9407
9408    /// Parse the inside of `GROUP_CONCAT(...)` (caller has already consumed
9409    /// the `(` and optional `DISTINCT`). Returns a typed `GroupConcat`
9410    /// expression. Does NOT consume the trailing `)`.
9411    fn parse_group_concat_call(&mut self, distinct: bool) -> Result<Expr> {
9412        let mut exprs: Vec<Expr> = Vec::new();
9413        let mut order_by: Vec<OrderByItem> = Vec::new();
9414        let mut separator: Option<Box<Expr>> = None;
9415
9416        if self.peek_type() != &TokenType::RParen {
9417            exprs.push(self.parse_expr()?);
9418            while self.peek_type() == &TokenType::Comma {
9419                // ORDER BY / SEPARATOR are alternative terminators, not args.
9420                // Peek one past the comma to disambiguate `f(a, b)` from
9421                // `f(a, b ORDER BY ...)` — but comma here always introduces
9422                // another positional arg, so just keep consuming.
9423                self.advance();
9424                exprs.push(self.parse_expr()?);
9425            }
9426
9427            if self.match_token(TokenType::Order) {
9428                self.expect(TokenType::By)?;
9429                order_by = self.parse_order_by_items()?;
9430            }
9431
9432            if self.match_keyword("SEPARATOR") {
9433                separator = Some(Box::new(self.parse_expr()?));
9434            }
9435        }
9436
9437        Ok(Expr::TypedFunction {
9438            func: TypedFunction::GroupConcat {
9439                exprs,
9440                separator,
9441                order_by,
9442                distinct,
9443            },
9444            filter: None,
9445            over: None,
9446        })
9447    }
9448
9449    /// Try to construct a typed function expression from a parsed function call.
9450    /// Returns `None` if the function name is not recognized, falling back to
9451    /// the generic `Expr::Function`.
9452    fn try_typed_function(name: &str, args: Vec<Expr>, distinct: bool) -> Option<Expr> {
9453        let upper = name.to_uppercase();
9454        let tf = match upper.as_str() {
9455            // ── Date/Time ──────────────────────────────────────────
9456            "DATE_ADD" | "DATEADD" | "TIMESTAMPADD" => {
9457                let mut it = args.into_iter();
9458                let first = it.next()?;
9459                let second = it.next()?;
9460                let third = it.next();
9461                // Handle DATEADD(unit, interval, expr) — TSQL/Snowflake arg order
9462                if upper == "DATEADD" {
9463                    if let Some(third_arg) = third {
9464                        // 3-arg: DATEADD(unit, interval, expr)
9465                        let unit = Self::expr_to_datetime_field(&first);
9466                        TypedFunction::DateAdd {
9467                            expr: Box::new(third_arg),
9468                            interval: Box::new(second),
9469                            unit,
9470                        }
9471                    } else {
9472                        TypedFunction::DateAdd {
9473                            expr: Box::new(first),
9474                            interval: Box::new(second),
9475                            unit: None,
9476                        }
9477                    }
9478                } else {
9479                    // DATE_ADD(expr, interval [, unit])
9480                    let unit = third.as_ref().and_then(Self::expr_to_datetime_field);
9481                    TypedFunction::DateAdd {
9482                        expr: Box::new(first),
9483                        interval: Box::new(second),
9484                        unit,
9485                    }
9486                }
9487            }
9488            "DATE_DIFF" | "DATEDIFF" | "TIMESTAMPDIFF" => {
9489                let mut it = args.into_iter();
9490                let first = it.next()?;
9491                let second = it.next()?;
9492                let third = it.next();
9493                if let Some(third_arg) = third {
9494                    if upper == "DATEDIFF" {
9495                        // DATEDIFF(unit, start, end) — TSQL/Snowflake
9496                        let unit = Self::expr_to_datetime_field(&first);
9497                        TypedFunction::DateDiff {
9498                            start: Box::new(second),
9499                            end: Box::new(third_arg),
9500                            unit,
9501                        }
9502                    } else {
9503                        let unit = Self::expr_to_datetime_field(&third_arg);
9504                        TypedFunction::DateDiff {
9505                            start: Box::new(first),
9506                            end: Box::new(second),
9507                            unit,
9508                        }
9509                    }
9510                } else {
9511                    TypedFunction::DateDiff {
9512                        start: Box::new(first),
9513                        end: Box::new(second),
9514                        unit: None,
9515                    }
9516                }
9517            }
9518            "DATE_TRUNC" | "DATETRUNC" => {
9519                let mut it = args.into_iter();
9520                let first = it.next()?;
9521                let second = it.next()?;
9522                // DATE_TRUNC('unit', expr) or DATE_TRUNC(unit, expr)
9523                let (unit, expr) = if let Some(u) = Self::expr_to_datetime_field(&first) {
9524                    (u, second)
9525                } else if let Some(u) = Self::expr_to_datetime_field(&second) {
9526                    (u, first)
9527                } else {
9528                    // Default: first = unit string, second = expr
9529                    return None;
9530                };
9531                TypedFunction::DateTrunc {
9532                    unit,
9533                    expr: Box::new(expr),
9534                }
9535            }
9536            "DATE_SUB" | "DATESUB" => {
9537                let mut it = args.into_iter();
9538                let first = it.next()?;
9539                let second = it.next()?;
9540                let third = it.next();
9541                let unit = third.as_ref().and_then(Self::expr_to_datetime_field);
9542                TypedFunction::DateSub {
9543                    expr: Box::new(first),
9544                    interval: Box::new(second),
9545                    unit,
9546                }
9547            }
9548            "CURRENT_DATE" => TypedFunction::CurrentDate,
9549            "CURRENT_TIME" | "CURTIME" => TypedFunction::CurrentTime,
9550            "CURRENT_TIMESTAMP" | "NOW" | "GETDATE" | "SYSDATE" => TypedFunction::CurrentTimestamp,
9551            "STR_TO_TIME" | "STR_TO_DATE" | "TO_TIMESTAMP" | "PARSE_TIMESTAMP"
9552            | "PARSE_DATETIME" => {
9553                let mut it = args.into_iter();
9554                let expr = it.next()?;
9555                let format = it.next()?;
9556                TypedFunction::StrToTime {
9557                    expr: Box::new(expr),
9558                    format: Box::new(format),
9559                }
9560            }
9561            "TIME_TO_STR" | "DATE_FORMAT" | "FORMAT_TIMESTAMP" | "FORMAT_DATETIME" | "TO_CHAR" => {
9562                let mut it = args.into_iter();
9563                let expr = it.next()?;
9564                let format = it.next()?;
9565                TypedFunction::TimeToStr {
9566                    expr: Box::new(expr),
9567                    format: Box::new(format),
9568                }
9569            }
9570            "TS_OR_DS_TO_DATE" => {
9571                let mut it = args.into_iter();
9572                TypedFunction::TsOrDsToDate {
9573                    expr: Box::new(it.next()?),
9574                }
9575            }
9576            "YEAR" => {
9577                let mut it = args.into_iter();
9578                TypedFunction::Year {
9579                    expr: Box::new(it.next()?),
9580                }
9581            }
9582            "MONTH" => {
9583                let mut it = args.into_iter();
9584                TypedFunction::Month {
9585                    expr: Box::new(it.next()?),
9586                }
9587            }
9588            "DAY" | "DAYOFMONTH" => {
9589                let mut it = args.into_iter();
9590                TypedFunction::Day {
9591                    expr: Box::new(it.next()?),
9592                }
9593            }
9594
9595            // ── String ─────────────────────────────────────────────
9596            "TRIM" => {
9597                let mut it = args.into_iter();
9598                let expr = it.next()?;
9599                TypedFunction::Trim {
9600                    expr: Box::new(expr),
9601                    trim_type: TrimType::Both,
9602                    trim_chars: None,
9603                }
9604            }
9605            "LTRIM" => {
9606                let mut it = args.into_iter();
9607                let expr = it.next()?;
9608                TypedFunction::Trim {
9609                    expr: Box::new(expr),
9610                    trim_type: TrimType::Leading,
9611                    trim_chars: None,
9612                }
9613            }
9614            "RTRIM" => {
9615                let mut it = args.into_iter();
9616                let expr = it.next()?;
9617                TypedFunction::Trim {
9618                    expr: Box::new(expr),
9619                    trim_type: TrimType::Trailing,
9620                    trim_chars: None,
9621                }
9622            }
9623            "SUBSTRING" | "SUBSTR" => {
9624                let mut it = args.into_iter();
9625                let expr = it.next()?;
9626                let start = it.next()?;
9627                let length = it.next();
9628                TypedFunction::Substring {
9629                    expr: Box::new(expr),
9630                    start: Box::new(start),
9631                    length: length.map(Box::new),
9632                }
9633            }
9634            "UPPER" | "UCASE" => {
9635                let mut it = args.into_iter();
9636                TypedFunction::Upper {
9637                    expr: Box::new(it.next()?),
9638                }
9639            }
9640            "LOWER" | "LCASE" => {
9641                let mut it = args.into_iter();
9642                TypedFunction::Lower {
9643                    expr: Box::new(it.next()?),
9644                }
9645            }
9646            "REGEXP_LIKE" | "RLIKE" => {
9647                let mut it = args.into_iter();
9648                let expr = it.next()?;
9649                let pattern = it.next()?;
9650                let flags = it.next();
9651                TypedFunction::RegexpLike {
9652                    expr: Box::new(expr),
9653                    pattern: Box::new(pattern),
9654                    flags: flags.map(Box::new),
9655                }
9656            }
9657            "REGEXP_EXTRACT" | "REGEXP_SUBSTR" => {
9658                let mut it = args.into_iter();
9659                let expr = it.next()?;
9660                let pattern = it.next()?;
9661                let group_index = it.next();
9662                TypedFunction::RegexpExtract {
9663                    expr: Box::new(expr),
9664                    pattern: Box::new(pattern),
9665                    group_index: group_index.map(Box::new),
9666                }
9667            }
9668            "REGEXP_REPLACE" => {
9669                let mut it = args.into_iter();
9670                let expr = it.next()?;
9671                let pattern = it.next()?;
9672                let replacement = it.next()?;
9673                let flags = it.next();
9674                TypedFunction::RegexpReplace {
9675                    expr: Box::new(expr),
9676                    pattern: Box::new(pattern),
9677                    replacement: Box::new(replacement),
9678                    flags: flags.map(Box::new),
9679                }
9680            }
9681            "CONCAT_WS" => {
9682                let mut it = args.into_iter();
9683                let separator = it.next()?;
9684                let exprs: Vec<Expr> = it.collect();
9685                TypedFunction::ConcatWs {
9686                    separator: Box::new(separator),
9687                    exprs,
9688                }
9689            }
9690            "SPLIT" | "STRING_SPLIT" => {
9691                let mut it = args.into_iter();
9692                let expr = it.next()?;
9693                let delimiter = it.next()?;
9694                TypedFunction::Split {
9695                    expr: Box::new(expr),
9696                    delimiter: Box::new(delimiter),
9697                }
9698            }
9699            "INITCAP" => {
9700                let mut it = args.into_iter();
9701                TypedFunction::Initcap {
9702                    expr: Box::new(it.next()?),
9703                }
9704            }
9705            "LENGTH" | "LEN" | "CHAR_LENGTH" | "CHARACTER_LENGTH" => {
9706                let mut it = args.into_iter();
9707                TypedFunction::Length {
9708                    expr: Box::new(it.next()?),
9709                }
9710            }
9711            "REPLACE" => {
9712                let mut it = args.into_iter();
9713                let expr = it.next()?;
9714                let from = it.next()?;
9715                let to = it.next()?;
9716                TypedFunction::Replace {
9717                    expr: Box::new(expr),
9718                    from: Box::new(from),
9719                    to: Box::new(to),
9720                }
9721            }
9722            "REVERSE" => {
9723                let mut it = args.into_iter();
9724                TypedFunction::Reverse {
9725                    expr: Box::new(it.next()?),
9726                }
9727            }
9728            "LEFT" => {
9729                let mut it = args.into_iter();
9730                let expr = it.next()?;
9731                let n = it.next()?;
9732                TypedFunction::Left {
9733                    expr: Box::new(expr),
9734                    n: Box::new(n),
9735                }
9736            }
9737            "RIGHT" => {
9738                let mut it = args.into_iter();
9739                let expr = it.next()?;
9740                let n = it.next()?;
9741                TypedFunction::Right {
9742                    expr: Box::new(expr),
9743                    n: Box::new(n),
9744                }
9745            }
9746            "LPAD" => {
9747                let mut it = args.into_iter();
9748                let expr = it.next()?;
9749                let length = it.next()?;
9750                let pad = it.next();
9751                TypedFunction::Lpad {
9752                    expr: Box::new(expr),
9753                    length: Box::new(length),
9754                    pad: pad.map(Box::new),
9755                }
9756            }
9757            "RPAD" => {
9758                let mut it = args.into_iter();
9759                let expr = it.next()?;
9760                let length = it.next()?;
9761                let pad = it.next();
9762                TypedFunction::Rpad {
9763                    expr: Box::new(expr),
9764                    length: Box::new(length),
9765                    pad: pad.map(Box::new),
9766                }
9767            }
9768
9769            // ── Aggregate ──────────────────────────────────────────
9770            "COUNT" => {
9771                let mut it = args.into_iter();
9772                let expr = it.next().unwrap_or(Expr::Wildcard);
9773                TypedFunction::Count {
9774                    expr: Box::new(expr),
9775                    distinct,
9776                }
9777            }
9778            "SUM" => {
9779                let mut it = args.into_iter();
9780                TypedFunction::Sum {
9781                    expr: Box::new(it.next()?),
9782                    distinct,
9783                }
9784            }
9785            "AVG" => {
9786                let mut it = args.into_iter();
9787                TypedFunction::Avg {
9788                    expr: Box::new(it.next()?),
9789                    distinct,
9790                }
9791            }
9792            "MIN" => {
9793                let mut it = args.into_iter();
9794                TypedFunction::Min {
9795                    expr: Box::new(it.next()?),
9796                }
9797            }
9798            "MAX" => {
9799                let mut it = args.into_iter();
9800                TypedFunction::Max {
9801                    expr: Box::new(it.next()?),
9802                }
9803            }
9804            "ARRAY_AGG" | "LIST" | "COLLECT_LIST" => {
9805                let mut it = args.into_iter();
9806                TypedFunction::ArrayAgg {
9807                    expr: Box::new(it.next()?),
9808                    distinct,
9809                }
9810            }
9811            "APPROX_DISTINCT" | "APPROX_COUNT_DISTINCT" => {
9812                let mut it = args.into_iter();
9813                TypedFunction::ApproxDistinct {
9814                    expr: Box::new(it.next()?),
9815                }
9816            }
9817            "VARIANCE" | "VAR_SAMP" | "VAR" => {
9818                let mut it = args.into_iter();
9819                TypedFunction::Variance {
9820                    expr: Box::new(it.next()?),
9821                }
9822            }
9823            "STDDEV" | "STDDEV_SAMP" => {
9824                let mut it = args.into_iter();
9825                TypedFunction::Stddev {
9826                    expr: Box::new(it.next()?),
9827                }
9828            }
9829
9830            // ── Array ──────────────────────────────────────────────
9831            "ARRAY_CONCAT" | "ARRAY_CAT" => TypedFunction::ArrayConcat { arrays: args },
9832            "ARRAY_CONTAINS" => {
9833                let mut it = args.into_iter();
9834                let array = it.next()?;
9835                let element = it.next()?;
9836                TypedFunction::ArrayContains {
9837                    array: Box::new(array),
9838                    element: Box::new(element),
9839                }
9840            }
9841            "ARRAY_SIZE" | "ARRAY_LENGTH" | "CARDINALITY" => {
9842                let mut it = args.into_iter();
9843                TypedFunction::ArraySize {
9844                    expr: Box::new(it.next()?),
9845                }
9846            }
9847            "EXPLODE" => {
9848                let mut it = args.into_iter();
9849                TypedFunction::Explode {
9850                    expr: Box::new(it.next()?),
9851                }
9852            }
9853            "GENERATE_SERIES" | "SEQUENCE" => {
9854                let mut it = args.into_iter();
9855                let start = it.next()?;
9856                let stop = it.next()?;
9857                let step = it.next();
9858                TypedFunction::GenerateSeries {
9859                    start: Box::new(start),
9860                    stop: Box::new(stop),
9861                    step: step.map(Box::new),
9862                }
9863            }
9864            "FLATTEN" => {
9865                let mut it = args.into_iter();
9866                TypedFunction::Flatten {
9867                    expr: Box::new(it.next()?),
9868                }
9869            }
9870
9871            // ── JSON ───────────────────────────────────────────────
9872            "JSON_EXTRACT" | "JSON_VALUE" => {
9873                let mut it = args.into_iter();
9874                let expr = it.next()?;
9875                let path = it.next()?;
9876                TypedFunction::JSONExtract {
9877                    expr: Box::new(expr),
9878                    path: Box::new(path),
9879                }
9880            }
9881            "JSON_EXTRACT_SCALAR" => {
9882                let mut it = args.into_iter();
9883                let expr = it.next()?;
9884                let path = it.next()?;
9885                TypedFunction::JSONExtractScalar {
9886                    expr: Box::new(expr),
9887                    path: Box::new(path),
9888                }
9889            }
9890            "PARSE_JSON" | "JSON_PARSE" => {
9891                let mut it = args.into_iter();
9892                TypedFunction::ParseJSON {
9893                    expr: Box::new(it.next()?),
9894                }
9895            }
9896            "JSON_FORMAT" | "TO_JSON" | "TO_JSON_STRING" => {
9897                let mut it = args.into_iter();
9898                TypedFunction::JSONFormat {
9899                    expr: Box::new(it.next()?),
9900                }
9901            }
9902
9903            // ── Window ─────────────────────────────────────────────
9904            "ROW_NUMBER" => TypedFunction::RowNumber,
9905            "RANK" => TypedFunction::Rank,
9906            "DENSE_RANK" => TypedFunction::DenseRank,
9907            "NTILE" => {
9908                let mut it = args.into_iter();
9909                TypedFunction::NTile {
9910                    n: Box::new(it.next()?),
9911                }
9912            }
9913            "LEAD" => {
9914                let mut it = args.into_iter();
9915                let expr = it.next()?;
9916                let offset = it.next();
9917                let default = it.next();
9918                TypedFunction::Lead {
9919                    expr: Box::new(expr),
9920                    offset: offset.map(Box::new),
9921                    default: default.map(Box::new),
9922                }
9923            }
9924            "LAG" => {
9925                let mut it = args.into_iter();
9926                let expr = it.next()?;
9927                let offset = it.next();
9928                let default = it.next();
9929                TypedFunction::Lag {
9930                    expr: Box::new(expr),
9931                    offset: offset.map(Box::new),
9932                    default: default.map(Box::new),
9933                }
9934            }
9935            "FIRST_VALUE" => {
9936                let mut it = args.into_iter();
9937                TypedFunction::FirstValue {
9938                    expr: Box::new(it.next()?),
9939                }
9940            }
9941            "LAST_VALUE" => {
9942                let mut it = args.into_iter();
9943                TypedFunction::LastValue {
9944                    expr: Box::new(it.next()?),
9945                }
9946            }
9947
9948            // ── Math ───────────────────────────────────────────────
9949            "ABS" => {
9950                let mut it = args.into_iter();
9951                TypedFunction::Abs {
9952                    expr: Box::new(it.next()?),
9953                }
9954            }
9955            "CEIL" | "CEILING" => {
9956                let mut it = args.into_iter();
9957                TypedFunction::Ceil {
9958                    expr: Box::new(it.next()?),
9959                }
9960            }
9961            "FLOOR" => {
9962                let mut it = args.into_iter();
9963                TypedFunction::Floor {
9964                    expr: Box::new(it.next()?),
9965                }
9966            }
9967            "ROUND" => {
9968                let mut it = args.into_iter();
9969                let expr = it.next()?;
9970                let decimals = it.next();
9971                TypedFunction::Round {
9972                    expr: Box::new(expr),
9973                    decimals: decimals.map(Box::new),
9974                }
9975            }
9976            "LOG" => {
9977                let mut it = args.into_iter();
9978                let expr = it.next()?;
9979                let base = it.next();
9980                TypedFunction::Log {
9981                    expr: Box::new(expr),
9982                    base: base.map(Box::new),
9983                }
9984            }
9985            "LN" => {
9986                let mut it = args.into_iter();
9987                TypedFunction::Ln {
9988                    expr: Box::new(it.next()?),
9989                }
9990            }
9991            "POW" | "POWER" => {
9992                let mut it = args.into_iter();
9993                let base = it.next()?;
9994                let exponent = it.next()?;
9995                TypedFunction::Pow {
9996                    base: Box::new(base),
9997                    exponent: Box::new(exponent),
9998                }
9999            }
10000            "SQRT" => {
10001                let mut it = args.into_iter();
10002                TypedFunction::Sqrt {
10003                    expr: Box::new(it.next()?),
10004                }
10005            }
10006            "GREATEST" => TypedFunction::Greatest { exprs: args },
10007            "LEAST" => TypedFunction::Least { exprs: args },
10008            "MOD" => {
10009                let mut it = args.into_iter();
10010                let left = it.next()?;
10011                let right = it.next()?;
10012                TypedFunction::Mod {
10013                    left: Box::new(left),
10014                    right: Box::new(right),
10015                }
10016            }
10017
10018            // ── Conversion ─────────────────────────────────────────
10019            "HEX" | "TO_HEX" => {
10020                let mut it = args.into_iter();
10021                TypedFunction::Hex {
10022                    expr: Box::new(it.next()?),
10023                }
10024            }
10025            "UNHEX" | "FROM_HEX" => {
10026                let mut it = args.into_iter();
10027                TypedFunction::Unhex {
10028                    expr: Box::new(it.next()?),
10029                }
10030            }
10031            "MD5" => {
10032                let mut it = args.into_iter();
10033                TypedFunction::Md5 {
10034                    expr: Box::new(it.next()?),
10035                }
10036            }
10037            "SHA" | "SHA1" => {
10038                let mut it = args.into_iter();
10039                TypedFunction::Sha {
10040                    expr: Box::new(it.next()?),
10041                }
10042            }
10043            "SHA2" | "SHA256" | "SHA512" => {
10044                let mut it = args.into_iter();
10045                let expr = it.next()?;
10046                let bit_length = it.next().unwrap_or(Expr::Number("256".to_string()));
10047                TypedFunction::Sha2 {
10048                    expr: Box::new(expr),
10049                    bit_length: Box::new(bit_length),
10050                }
10051            }
10052
10053            // Not a recognized typed function
10054            _ => return None,
10055        };
10056
10057        Some(Expr::TypedFunction {
10058            func: tf,
10059            filter: None,
10060            over: None,
10061        })
10062    }
10063
10064    /// Try to extract a DateTimeField from a column-name expression.
10065    fn expr_to_datetime_field(expr: &Expr) -> Option<DateTimeField> {
10066        match expr {
10067            Expr::Column {
10068                name, table: None, ..
10069            } => match name.to_uppercase().as_str() {
10070                "YEAR" => Some(DateTimeField::Year),
10071                "QUARTER" => Some(DateTimeField::Quarter),
10072                "MONTH" => Some(DateTimeField::Month),
10073                "WEEK" => Some(DateTimeField::Week),
10074                "DAY" => Some(DateTimeField::Day),
10075                "HOUR" => Some(DateTimeField::Hour),
10076                "MINUTE" => Some(DateTimeField::Minute),
10077                "SECOND" => Some(DateTimeField::Second),
10078                "MILLISECOND" => Some(DateTimeField::Millisecond),
10079                "MICROSECOND" => Some(DateTimeField::Microsecond),
10080                _ => None,
10081            },
10082            Expr::StringLiteral(s) | Expr::NationalStringLiteral(s) => {
10083                match s.to_uppercase().as_str() {
10084                    "YEAR" => Some(DateTimeField::Year),
10085                    "QUARTER" => Some(DateTimeField::Quarter),
10086                    "MONTH" => Some(DateTimeField::Month),
10087                    "WEEK" => Some(DateTimeField::Week),
10088                    "DAY" => Some(DateTimeField::Day),
10089                    "HOUR" => Some(DateTimeField::Hour),
10090                    "MINUTE" => Some(DateTimeField::Minute),
10091                    "SECOND" => Some(DateTimeField::Second),
10092                    "MILLISECOND" => Some(DateTimeField::Millisecond),
10093                    "MICROSECOND" => Some(DateTimeField::Microsecond),
10094                    _ => None,
10095                }
10096            }
10097            _ => None,
10098        }
10099    }
10100
10101    fn parse_case_expr(&mut self) -> Result<Expr> {
10102        self.expect(TokenType::Case)?;
10103
10104        let operand = if self.peek_type() != &TokenType::When {
10105            Some(Box::new(self.parse_expr()?))
10106        } else {
10107            None
10108        };
10109
10110        let mut when_clauses = Vec::new();
10111        while self.match_token(TokenType::When) {
10112            let condition = self.parse_expr()?;
10113            self.expect(TokenType::Then)?;
10114            let result = self.parse_expr()?;
10115            when_clauses.push((condition, result));
10116        }
10117
10118        let else_clause = if self.match_token(TokenType::Else) {
10119            Some(Box::new(self.parse_expr()?))
10120        } else {
10121            None
10122        };
10123
10124        self.expect(TokenType::End)?;
10125
10126        Ok(Expr::Case {
10127            operand,
10128            when_clauses,
10129            else_clause,
10130        })
10131    }
10132}
10133
10134#[cfg(test)]
10135mod tests {
10136    use super::*;
10137
10138    #[test]
10139    fn test_parse_simple_select() {
10140        let stmt = Parser::new("SELECT a, b FROM t")
10141            .unwrap()
10142            .parse_statement()
10143            .unwrap();
10144        match stmt {
10145            Statement::Select(sel) => {
10146                assert_eq!(sel.columns.len(), 2);
10147                assert!(sel.from.is_some());
10148            }
10149            _ => panic!("Expected SELECT"),
10150        }
10151    }
10152
10153    #[test]
10154    fn test_parse_select_with_where() {
10155        let stmt = Parser::new("SELECT x FROM t WHERE x > 10")
10156            .unwrap()
10157            .parse_statement()
10158            .unwrap();
10159        match stmt {
10160            Statement::Select(sel) => assert!(sel.where_clause.is_some()),
10161            _ => panic!("Expected SELECT"),
10162        }
10163    }
10164
10165    #[test]
10166    fn test_parse_select_wildcard() {
10167        let stmt = Parser::new("SELECT * FROM users")
10168            .unwrap()
10169            .parse_statement()
10170            .unwrap();
10171        match stmt {
10172            Statement::Select(sel) => {
10173                assert_eq!(sel.columns.len(), 1);
10174                assert!(matches!(sel.columns[0], SelectItem::Wildcard));
10175            }
10176            _ => panic!("Expected SELECT"),
10177        }
10178    }
10179
10180    #[test]
10181    fn test_parse_insert() {
10182        let stmt = Parser::new("INSERT INTO t (a, b) VALUES (1, 'hello')")
10183            .unwrap()
10184            .parse_statement()
10185            .unwrap();
10186        match stmt {
10187            Statement::Insert(ins) => {
10188                assert_eq!(ins.table.name, "t");
10189                assert_eq!(ins.columns, vec!["a", "b"]);
10190                match &ins.source {
10191                    InsertSource::Values(rows) => {
10192                        assert_eq!(rows.len(), 1);
10193                        assert_eq!(rows[0].len(), 2);
10194                    }
10195                    _ => panic!("Expected VALUES"),
10196                }
10197            }
10198            _ => panic!("Expected INSERT"),
10199        }
10200    }
10201
10202    #[test]
10203    fn test_parse_delete() {
10204        let stmt = Parser::new("DELETE FROM users WHERE id = 1")
10205            .unwrap()
10206            .parse_statement()
10207            .unwrap();
10208        match stmt {
10209            Statement::Delete(del) => {
10210                assert_eq!(del.table.name, "users");
10211                assert!(del.where_clause.is_some());
10212            }
10213            _ => panic!("Expected DELETE"),
10214        }
10215    }
10216
10217    #[test]
10218    fn test_parse_join() {
10219        let stmt = Parser::new("SELECT a.id, b.name FROM a INNER JOIN b ON a.id = b.a_id")
10220            .unwrap()
10221            .parse_statement()
10222            .unwrap();
10223        match stmt {
10224            Statement::Select(sel) => {
10225                assert_eq!(sel.joins.len(), 1);
10226                assert_eq!(sel.joins[0].join_type, JoinType::Inner);
10227            }
10228            _ => panic!("Expected SELECT"),
10229        }
10230    }
10231
10232    #[test]
10233    fn test_parse_cte() {
10234        let stmt = Parser::new("WITH cte AS (SELECT 1 AS x) SELECT x FROM cte")
10235            .unwrap()
10236            .parse_statement()
10237            .unwrap();
10238        match stmt {
10239            Statement::Select(sel) => {
10240                assert_eq!(sel.ctes.len(), 1);
10241                assert_eq!(sel.ctes[0].name, "cte");
10242            }
10243            _ => panic!("Expected SELECT"),
10244        }
10245    }
10246
10247    #[test]
10248    fn test_parse_union() {
10249        let stmt = Parser::new("SELECT 1 UNION ALL SELECT 2")
10250            .unwrap()
10251            .parse_statement()
10252            .unwrap();
10253        match stmt {
10254            Statement::SetOperation(sop) => {
10255                assert_eq!(sop.op, SetOperationType::Union);
10256                assert!(sop.all);
10257            }
10258            _ => panic!("Expected SetOperation"),
10259        }
10260    }
10261
10262    #[test]
10263    fn test_parse_cast() {
10264        let stmt = Parser::new("SELECT CAST(x AS INT) FROM t")
10265            .unwrap()
10266            .parse_statement()
10267            .unwrap();
10268        match stmt {
10269            Statement::Select(sel) => {
10270                if let SelectItem::Expr { expr, .. } = &sel.columns[0] {
10271                    assert!(matches!(expr, Expr::Cast { .. }));
10272                }
10273            }
10274            _ => panic!("Expected SELECT"),
10275        }
10276    }
10277
10278    #[test]
10279    fn test_parse_subquery() {
10280        let stmt = Parser::new("SELECT * FROM (SELECT 1 AS x) AS sub")
10281            .unwrap()
10282            .parse_statement()
10283            .unwrap();
10284        match stmt {
10285            Statement::Select(sel) => {
10286                if let Some(from) = &sel.from {
10287                    assert!(matches!(from.source, TableSource::Subquery { .. }));
10288                }
10289            }
10290            _ => panic!("Expected SELECT"),
10291        }
10292    }
10293
10294    #[test]
10295    fn test_parse_exists() {
10296        let stmt = Parser::new("SELECT * FROM t WHERE EXISTS (SELECT 1 FROM t2)")
10297            .unwrap()
10298            .parse_statement()
10299            .unwrap();
10300        match stmt {
10301            Statement::Select(sel) => {
10302                assert!(sel.where_clause.is_some());
10303            }
10304            _ => panic!("Expected SELECT"),
10305        }
10306    }
10307
10308    #[test]
10309    fn test_parse_window_function() {
10310        let stmt = Parser::new(
10311            "SELECT ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC) FROM emp",
10312        )
10313        .unwrap()
10314        .parse_statement()
10315        .unwrap();
10316        match stmt {
10317            Statement::Select(sel) => {
10318                if let SelectItem::Expr { expr, .. } = &sel.columns[0] {
10319                    match expr {
10320                        Expr::TypedFunction { over, .. } => {
10321                            assert!(over.is_some());
10322                        }
10323                        Expr::Function { over, .. } => {
10324                            assert!(over.is_some());
10325                        }
10326                        _ => panic!("Expected function"),
10327                    }
10328                }
10329            }
10330            _ => panic!("Expected SELECT"),
10331        }
10332    }
10333
10334    #[test]
10335    fn test_parse_multiple_statements() {
10336        let stmts = Parser::new("SELECT 1; SELECT 2;")
10337            .unwrap()
10338            .parse_statements()
10339            .unwrap();
10340        assert_eq!(stmts.len(), 2);
10341    }
10342
10343    #[test]
10344    fn test_parse_insert_select() {
10345        let stmt = Parser::new("INSERT INTO t SELECT * FROM s")
10346            .unwrap()
10347            .parse_statement()
10348            .unwrap();
10349        match stmt {
10350            Statement::Insert(ins) => {
10351                assert!(matches!(ins.source, InsertSource::Query(_)));
10352            }
10353            _ => panic!("Expected INSERT"),
10354        }
10355    }
10356
10357    #[test]
10358    fn test_parse_create_table_constraints() {
10359        let stmt =
10360            Parser::new("CREATE TABLE t (id INT PRIMARY KEY, name VARCHAR(100) NOT NULL UNIQUE)")
10361                .unwrap()
10362                .parse_statement()
10363                .unwrap();
10364        match stmt {
10365            Statement::CreateTable(ct) => {
10366                assert_eq!(ct.columns.len(), 2);
10367                assert!(ct.columns[0].primary_key);
10368                assert!(ct.columns[1].unique);
10369            }
10370            _ => panic!("Expected CREATE TABLE"),
10371        }
10372    }
10373
10374    #[test]
10375    fn test_parse_extract() {
10376        let stmt = Parser::new("SELECT EXTRACT(YEAR FROM created_at) FROM t")
10377            .unwrap()
10378            .parse_statement()
10379            .unwrap();
10380        match stmt {
10381            Statement::Select(sel) => {
10382                if let SelectItem::Expr { expr, .. } = &sel.columns[0] {
10383                    assert!(matches!(expr, Expr::Extract { .. }));
10384                }
10385            }
10386            _ => panic!("Expected SELECT"),
10387        }
10388    }
10389
10390    #[test]
10391    fn test_parse_postgres_cast() {
10392        let stmt = Parser::new("SELECT x::int FROM t")
10393            .unwrap()
10394            .parse_statement()
10395            .unwrap();
10396        match stmt {
10397            Statement::Select(sel) => {
10398                if let SelectItem::Expr { expr, .. } = &sel.columns[0] {
10399                    assert!(matches!(expr, Expr::Cast { .. }));
10400                }
10401            }
10402            _ => panic!("Expected SELECT"),
10403        }
10404    }
10405
10406    #[test]
10407    fn test_parse_on_conflict_expression_targets() {
10408        let stmt = Parser::new(
10409            "INSERT INTO t VALUES (1, 'Crowberry') ON CONFLICT (lower(fruit) collate \"C\" text_pattern_ops, key) DO NOTHING",
10410        )
10411        .unwrap()
10412        .parse_statement()
10413        .unwrap();
10414
10415        match stmt {
10416            Statement::Insert(ins) => {
10417                let on_conflict = ins.on_conflict.expect("Expected ON CONFLICT");
10418                assert_eq!(on_conflict.columns.len(), 2);
10419                assert!(on_conflict.columns[0].starts_with("lower"));
10420                assert!(on_conflict.columns[0].contains("text_pattern_ops"));
10421                assert_eq!(on_conflict.columns[1], "key");
10422            }
10423            _ => panic!("Expected INSERT"),
10424        }
10425    }
10426
10427    #[test]
10428    fn test_parse_postgres_operator_sequences() {
10429        let cases = [
10430            "SELECT * FROM box_temp WHERE f1 <<| '(10,4.33334),(5,100)'",
10431            "SELECT * FROM box_temp WHERE f1 &<| '(10,4.3333334),(5,1)'",
10432            "SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth'",
10433        ];
10434
10435        for sql in &cases {
10436            let stmt = Parser::new(sql).unwrap().parse_statement().unwrap();
10437            assert!(matches!(stmt, Statement::Select(_)));
10438        }
10439    }
10440}
10441
10442/// Attach comments to the appropriate field on a parsed statement.
10443fn attach_comments_to_statement(stmt: &mut Statement, comments: Vec<String>) {
10444    match stmt {
10445        Statement::Select(s) => s.comments = comments,
10446        Statement::Insert(s) => s.comments = comments,
10447        Statement::Update(s) => s.comments = comments,
10448        Statement::Delete(s) => s.comments = comments,
10449        Statement::CreateTable(s) => s.comments = comments,
10450        Statement::DropTable(s) => s.comments = comments,
10451        Statement::SetOperation(s) => s.comments = comments,
10452        Statement::AlterTable(s) => s.comments = comments,
10453        Statement::CreateView(s) => s.comments = comments,
10454        Statement::DropView(s) => s.comments = comments,
10455        Statement::Truncate(s) => s.comments = comments,
10456        Statement::Explain(s) => s.comments = comments,
10457        Statement::Use(s) => s.comments = comments,
10458        Statement::Merge(s) => s.comments = comments,
10459        Statement::Command(s) => s.comments = comments,
10460        // Transaction and Expression don't have comment fields
10461        Statement::Transaction(_) | Statement::Expression(_) => {}
10462    }
10463}